
    jC2                     &   d dl mZ d dlZd dlmZ d dlmZ ddlmZ ddl	m
Z
mZmZmZmZmZ ddlmZmZ dd	lmZmZ dd
lmZ ddlmZ d Zd"dZd"dZd"dZd"dZd"dZd"dZ d"dZ!d"dZ"	 	 d#dZ#	 	 	 	 	 	 	 	 d$dZ$eeeeee e!e"ee e!e"dZ%d%d!Z&dS )&    )partialN)attempt_download_asset)
torch_load   )MaskDecoder)FpnNeckHieraImageEncoderImageEncoderViTMemoryEncoderPromptEncoder)MemoryAttentionMemoryAttentionLayer)	SAM2ModelSAMModel)TinyViT)TwoWayTransformerc                    || S t          |          }t          |d          5 }t          |          }ddd           n# 1 swxY w Y   d|v r#t          |d         t                    r|d         }|                     |           | S )z*Load checkpoint into model from file path.Nrbmodel)r   openr   
isinstancedictload_state_dict)r   
checkpointf
state_dicts       a/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/ultralytics/models/sam/build.py_load_checkpointr      s    '
33J	j$		 #1]]
# # # # # # # # # # # # # # # *Jw,?!F!F(
	*%%%Ls   A  AAc                 .    t          dddg d|           S )z_Build and return a Segment Anything Model (SAM) h-size model with specified encoder parameters.                      encoder_embed_dimencoder_depthencoder_num_headsencoder_global_attn_indexesr   
_build_samr   s    r   build_sam_vit_hr1   '   ,    $3OO       c                 .    t          dddg d|           S )z_Build and return a Segment Anything Model (SAM) l-size model with specified encoder parameters.      r#   )         r'   r)   r.   r0   s    r   build_sam_vit_lr:   2   r2   r3   c                 .    t          dddg d|           S )z_Build and return a Segment Anything Model (SAM) b-size model with specified encoder parameters.      )   r7      r8   r)   r.   r0   s    r   build_sam_vit_br@   =   s,    $1MM   r3   c                 8    t          g dg dg ddd|           S )z_Build and return a Mobile Segment Anything Model (Mobile-SAM) for efficient image segmentation.)@         i@  )r>   r>      r>   )r>      r7   
   NT)r*   r+   r,   r-   
mobile_samr   r.   r0   s    r   build_mobile_samrI   H   s9    ---"ll'--$(   r3   c           	      >    t          dg ddg dg dg d|           S )zjBuild and return a Segment Anything Model 2 (SAM2) tiny-size model with specified architecture parameters.`   )r   r>   r%   r>   r   )r7   r%   	   r?   rF      r%   r<   i     rK   r*   encoder_stagesr,   encoder_global_att_blocksencoder_window_specencoder_backbone_channel_listr   _build_sam2r0   s    r   build_sam2_trX   T   s@    #||"+)))MM&9&9&9   r3   c           	      >    t          dg ddg dg dg d|           S )zeBuild and return a small-size Segment Anything Model 2 (SAM2) with specified architecture parameters.rK   )r   r>   r8   r>   r   )r%   rG      rM   rO   rQ   rV   r0   s    r   build_sam2_sr[   a   s@    $}}"-++)MM&9&9&9   r3   c           
      D    t          dg ddg dg dddgg d|           S )	zjBuild and return a Segment Anything Model 2 (SAM2) base-size model with specified architecture parameters.p   )r>      r#   r^   r>   )r=   r#      rM   rN   )i  i     r]   )r*   rR   r,   rS   rT   encoder_window_spatial_sizerU   r   rV   r0   s    r   build_sam2_brb   n   sG    $}}".,,)MM%'H&:&:&:	 	 	 	r3   c           	      >    t          dg ddg dg dg d|           S )zeBuild and return a large-size Segment Anything Model 2 (SAM2) with specified architecture parameters.   r>   rE   $   rF   r>   )r'   !   +   r?   rF   r#   r?   i  i@  i   rd   rQ   rV   r0   s    r   build_sam2_lrk   |   s@    $}}".,,)MM&;&;&;   r3   Fc                    d}d}d}||z  }	|rt          ddd| ||g ddddd	ddd
          n8t          || |dt          t          j        j        d          ||dd|d|          }
t          |
t          ||	|	f||fd          t          dt          d|dd          |dd          g dg d          }|t          ||          }|                                 |S )aO  Build a Segment Anything Model (SAM) with specified encoder parameters.

    Args:
        encoder_embed_dim (int | list[int]): Embedding dimension for the encoder.
        encoder_depth (int | list[int]): Depth of the encoder.
        encoder_num_heads (int | list[int]): Number of attention heads in the encoder.
        encoder_global_attn_indexes (list[int] | None): Indexes for global attention in the encoder.
        checkpoint (str | None, optional): Path to the model checkpoint file.
        mobile_sam (bool, optional): Whether to build a Mobile-SAM model.

    Returns:
        (SAMModel): A Segment Anything Model instance with the specified architecture.

    Examples:
        >>> sam = _build_sam(768, 12, 12, [2, 5, 8, 11])
        >>> sam = _build_sam([64, 128, 160, 320], [2, 2, 6, 2], [2, 4, 5, 10], None, mobile_sam=True)
       r5   r#   r^   i  )r%   r%   rN   r%   g      @g        Fg?)img_sizein_chansnum_classes
embed_dimsdepths	num_headswindow_sizes	mlp_ratio	drop_ratedrop_path_rateuse_checkpointmbconv_expand_ratiolocal_conv_sizelayer_lr_decayrF   gư>)epsTrN   )depth	embed_dimrn   ru   
norm_layerrs   
patch_sizeqkv_biasuse_rel_posglobal_attn_indexeswindow_size	out_chans)r~   image_embedding_sizeinput_image_sizemask_in_chansr>   i   r?   )r}   embedding_dimmlp_dimrs   )num_multimask_outputstransformertransformer_dimiou_head_depthiou_head_hidden_dim)g33333^@gR]@gRY@)g(\2M@g(\L@g     L@)image_encoderprompt_encodermask_decoder
pixel_mean	pixel_std)r   r   r   torchnn	LayerNormr   r   r   r   r   eval)r*   r+   r,   r-   r   rH   prompt_embed_dim
image_sizevit_patch_sizer   r   sams               r   r/   r/      s|   2 JN%7$ !	
( '&  #	
 	
 	
 	
" 'ux1t<<<'% ;&
 
 
% B #$&"68L!M(*5	
 
 
 !"#).	   - #
 
 
 -,,)))+  C. sJ//HHJJJJr3   r!   re   r>   r$   rj   r%   r%   ri   c           
         t          t          | |||||          t          d|ddgd          d          }t          dd	d
t	                                }	t          d          }
|duod|v }t          d5i d|d|	d|
dddddddddd	dd	dd	dd	dd	d d	d!d	d"d	d#d	d$d	d%d	d&d	d'd	d(d)d*dd+d	d,d-d.|d/|d0|d1t          d	d2d34          }|t          ||          }|	                                 |S )6a`  Build and return a Segment Anything Model 2 (SAM2) with specified architecture parameters.

    Args:
        encoder_embed_dim (int, optional): Embedding dimension for the encoder.
        encoder_stages (list[int], optional): Number of blocks in each stage of the encoder.
        encoder_num_heads (int, optional): Number of attention heads in the encoder.
        encoder_global_att_blocks (list[int], optional): Indices of global attention blocks in the encoder.
        encoder_backbone_channel_list (list[int], optional): Channel dimensions for each level of the encoder backbone.
        encoder_window_spatial_size (list[int], optional): Spatial size of the window for position embeddings.
        encoder_window_spec (list[int], optional): Window specifications for each stage of the encoder.
        checkpoint (str | None, optional): Path to the checkpoint file for loading pre-trained weights.

    Returns:
        (SAM2Model): A configured and initialized SAM2 model.

    Examples:
        >>> sam2_model = _build_sam2(encoder_embed_dim=96, encoder_stages=[1, 2, 7, 2])
        >>> sam2_model.eval()
    )r~   rs   stagesglobal_att_blocks!window_pos_embed_bkg_spatial_sizewindow_specrm   r>   r^   nearest)d_modelbackbone_channel_listfpn_top_down_levelsfpn_interp_modelr   )trunkneckscalpTrF   )r   pos_enc_at_input
num_layerslayerrB   )out_dimNzsam2.1r   memory_attentionmemory_encodernum_maskmemr%   r   r5   sigmoid_scale_for_mem_encg      4@sigmoid_bias_for_mem_encg      $$use_mask_input_as_output_without_samdirectly_add_no_mem_embeduse_high_res_features_in_sammultimask_output_in_samiou_prediction_use_sigmoiduse_obj_ptrs_in_encoderadd_tpos_enc_to_obj_ptrs"only_obj_ptrs_in_the_past_for_evalpred_obj_scorespred_obj_scores_mlpfixed_no_obj_ptrmultimask_output_for_trackinguse_multimask_token_for_obj_ptrmultimask_min_pt_numr   multimask_max_pt_numuse_mlp_for_obj_ptr_projcompile_image_encoderFno_obj_embed_spatialproj_tpos_enc_in_obj_ptrsuse_signed_tpos_enc_to_obj_ptrssam_mask_decoder_extra_argsg?g\(\?)dynamic_multimask_via_stability!dynamic_multimask_stability_delta"dynamic_multimask_stability_thresh )
r
   r	   r   r   r   r   r   r   r   r   )r*   rR   r,   rS   rU   ra   rT   r   r   r   r   	is_sam2_1sam2s                r   rW   rW      sP   : !''!7.I+
 
 
 "?!"A&	
 
 
   M" 'sTVW_s_u_uvvv"2...N$&A8z+AI ! ! !#m!))! &~! A	!
 4! #'$! "'! .2T! #'$! &*T! !%! $(4! !%! "&! ,04!  !!" !D#!$ %!& '+d'!( )-)!* Q+!, Q-!. "&/!0 $e1!2 'Y3!4 #,)5!6 )2	7!8 %),0.2/3%
 %
 %
 %
9!DF j11IIKKKKr3   )zsam_h.ptzsam_l.ptsam_b.ptzmobile_sam.ptz	sam2_t.ptz	sam2_s.ptz	sam2_b.ptz	sam2_l.ptzsam2.1_t.ptzsam2.1_s.ptzsam2.1_b.ptzsam2.1_l.ptr   c                 *   d}t          |           } t                                          D ]1}|                     |          rt                              |          }2|s+t          |  dt                                                      ||           S )a  Build and return a Segment Anything Model (SAM) based on the provided checkpoint.

    Args:
        ckpt (str | Path, optional): Path to the checkpoint file or name of a pre-defined SAM model.

    Returns:
        (SAMModel | SAM2Model): A configured and initialized SAM or SAM2 model instance.

    Raises:
        FileNotFoundError: If the provided checkpoint is not a supported SAM model.

    Examples:
        >>> sam_model = build_sam("sam_b.pt")
        >>> sam_model = build_sam("path/to/custom_checkpoint.pt")

    Notes:
        Supported pre-defined models include:
        - SAM: 'sam_h.pt', 'sam_l.pt', 'sam_b.pt', 'mobile_sam.pt'
        - SAM2: 'sam2_t.pt', 'sam2_s.pt', 'sam2_b.pt', 'sam2_l.pt'
    Nz7 is not a supported SAM model. Available models are: 
 )strsam_model_mapkeysendswithgetFileNotFoundError)ckptmodel_builderks      r   	build_samr   O  s    * Mt99D!! 1 1== 	1)--a00M y4 w wanasasauau w wxxx=r3   )N)NF)r!   re   r>   r$   rj   r   ri   N)r   )'	functoolsr   r   ultralytics.utils.downloadsr   ultralytics.utils.patchesr   modules.decodersr   modules.encodersr   r	   r
   r   r   r   modules.memory_attentionr   r   modules.samr   r   modules.tiny_encoderr   modules.transformerr   r   r1   r:   r@   rI   rX   r[   rb   rk   r/   rW   r   r   r   r3   r   <module>r      s/          > > > > > > 0 0 0 0 0 0 ) ) ) ) ) ) i i i i i i i i i i i i i i i i K K K K K K K K , , , , , , , , ) ) ) ) ) ) 2 2 2 2 2 2           	 	 	 	
 
 
 

 
 
 
   
 
 
 
$ X X X Xx  -"7 &%X X X Xx  %       r3   