
    j                        d Z ddlmZ ddlZddlZddlZddlmZmZ  G d dej        j	                  Z
 G d dej	                  Z G d	 d
ej	                  Zd ZdddZdS )zVarious utility models.    )annotationsN)Tensornnc                  F     e Zd ZdZ	 	 	 d fd	Zed             Zd Z xZS )	DotProductScoringz^A module that computes dot-product scores between query features and pooled prompt embeddings.NT      (@c                   t                                                       || _        t          |t          j        j                  s|J || _        t          j                            ||          | _	        t          j                            ||          | _
        t          dt          j        |          z            | _        || _        | j        r	|| _        dS dS )z(Initialize the DotProductScoring module.N      ?)super__init__d_proj
isinstancetorchr   Module
prompt_mlpLinearprompt_projhs_projfloatnpsqrtscaleclamp_logitsclamp_max_val)selfd_modelr   r   r   r   	__class__s         k/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/ultralytics/models/sam/sam3/model_misc.pyr   zDotProductScoring.__init__   s     	*eho66L*:L:LL$ 8??7F;;xw773011
( 	/!.D	/ 	/    c                    |                      | j                                      dd          d         }t          j        t          j        |d          d          }| |z                      d          |z  }|S )z;Mean-pool the prompt embeddings over the valid tokens only.   r   ).Ndimr
   )min)todtypepermuter   clampsum)promptprompt_maskis_valid	num_validpooled_prompts        r   mean_pool_textz DotProductScoring.mean_pool_text'   sy     !L$$V\22::1a@@KK	( : : :DDD	(*//A/66Br   c                8   |                                 dk    r0|                                 dk    r|                                 dk    sJ | j        -|                     |                    |j                            }|                     ||          }|                     |          }|                     |          }t          j        ||	                    d                    }|| j
        z  }| j        r"|                    | j         | j                   |S )z1Compute dot-product scores between hs and prompt.         N)r$   max)r#   r   r%   r&   r/   r   r   r   matmul	unsqueezer   r   clamp_r   )r   hsr*   r+   r.   proj_pooled_promptproj_hsscoress           r   forwardzDotProductScoring.forward2   s   
 vvxx1}}!2!2{7H7HA7M7M7MM ?&__VYYrx%8%899F ++FK@@ "--m<<,,r"" g'9'C'CB'G'GHH$*  	KMMt11t7IMJJJr   )NTr   )	__name__
__module____qualname____doc__r   staticmethodr/   r=   __classcell__r   s   @r   r   r      sx        hh / / / / / /(   \      r   r   c                  2     e Zd ZdZ	 	 dd fdZddZ xZS )
LayerScalez;LayerScale module for per-channel scaling of layer outputs.h㈵>Fr#   intinit_valuesfloat | TensorinplaceboolreturnNonec                    t                                                       || _        t          j        |t          j        |          z            | _        dS )z!Initialize the LayerScale module.N)r   r   rK   r   	Parameterr   onesgamma)r   r#   rI   rK   r   s       r   r   zLayerScale.__init__R   sD     	\+
3"?@@


r   xr   c                X    | j         r|                    | j                  n	|| j        z  S )z%Apply LayerScale to the input tensor.)rK   mul_rR   )r   rS   s     r   r=   zLayerScale.forward]   s(    %)\Eqvvdj!!!q4:~Er   )rG   F)r#   rH   rI   rJ   rK   rL   rM   rN   )rS   r   rM   r   )r>   r?   r@   rA   r   r=   rC   rD   s   @r   rF   rF   O   ss        EE
 '+		A 	A 	A 	A 	A 	A 	AF F F F F F F Fr   rF   c                  0     e Zd ZdZ	 	 dd	 fdZd Z xZS )
TransformerWrapperzEA wrapper for the transformer consisting of an encoder and a decoder.noneTr   rH   c                    t                                                       || _        || _        ||j        nd| _        || _        |dv sJ d| d            || _        |                                  || _        dS )z"Initialize the TransformerWrapper.N)rX   zunknown param z of two_stage_type)	r   r   encoderdecodernum_queriespos_enc_at_input_dectwo_stage_type_reset_parametersr   )r   rZ   r[   r   r^   r]   r   s         r   r   zTransformerWrapper.__init__e   s     	292E7..4$8! )))+^N+^+^+^))),   r   c                    |                                  D ]H\  }}|                                dk    r+d|vr'd|vr#d|vrt          j                            |           IdS )z'Initialize the parameters of the model.r!   	box_embedquery_embedreference_pointsN)named_parametersr#   r   initxavier_uniform_)r   nps      r   r_   z$TransformerWrapper._reset_parameters{   st    ))++ 	/ 	/DAquuww{{a''M,B,BGYabGbGbG++A...	/ 	/r   )rX   T)r   rH   )r>   r?   r@   rA   r   r_   rC   rD   s   @r   rW   rW   b   s_        OO !      ,/ / / / / / /r   rW   c                2   | j         \  }}}t          j        | dddddf          d          }t          j        | dddddf          d          }|                                |z  }|                                |z  }t          j        ||gd          }|S )z:Compute the valid ratio of height and width from the mask.Nr   r!   r4   )shaper   r)   r   stack)	mask_HWvalid_Hvalid_Wvalid_ratio_hvalid_ratio_wvalid_ratios	            r   get_valid_ratioru      s    jGAq!iaaaAg**GiaaaAAAg**GMMOOa'MMMOOa'M+}m<bAAKr      
pos_tensortorch.Tensor	num_featsrH   c           	     x   |dz  dk    sJ |dz  }dt           j        z  }t          j        || j        | j                  }ddt          j        |dd          z  |z  z  }| dddddf         |z  }| dddddf         |z  }|dddddf         |z  }|dddddf         |z  }t          j        |dddddddf                                         |dddddddf         	                                fd	
          
                    d          }t          j        |dddddddf                                         |dddddddf         	                                fd	
          
                    d          }|                     d          dk    rt          j        ||fd
          }n|                     d          dk    rA| dddddf         |z  }	|	dddddf         |z  }
t          j        |
dddddddf                                         |
dddddddf         	                                fd	
          
                    d          }
| ddddd	f         |z  }|dddddf         |z  }t          j        |dddddddf                                         |dddddddf         	                                fd	
          
                    d          }t          j        |||
|fd
          }n%t          d|                     d                     |S )a  Generate sinusoidal position embeddings for 2D or 4D coordinate tensors.

    This function creates sinusoidal embeddings using sine and cosine functions at different frequencies, similar to the
    positional encoding used in Transformer models. It supports both 2D position tensors (x, y) and 4D tensors (x, y, w,
    h) for bounding box coordinates.

    Args:
        pos_tensor (torch.Tensor): Input position tensor of shape (n_query, bs, 2) for 2D coordinates or (n_query, bs,
            4) for 4D coordinates (bounding boxes).
        num_feats (int): Number of feature dimensions for the output embedding. Must be even. Defaults to 256.

    Returns:
        (torch.Tensor): Sinusoidal position embeddings of shape (n_query, bs, num_feats) for 2D input or (n_query, bs,
            num_feats * 2) for 4D input.

    Raises:
        AssertionError: If num_feats is not even.
        ValueError: If pos_tensor.size(-1) is not 2 or 4.

    Examples:
        >>> pos_2d = torch.rand(100, 8, 2)  # 100 queries, batch size 8, 2D coordinates
        >>> embeddings_2d = gen_sineembed_for_position(pos_2d, num_feats=256)
        >>> embeddings_2d.shape
        torch.Size([100, 8, 256])
        >>> pos_4d = torch.rand(50, 4, 4)  # 50 queries, batch size 4, 4D coordinates
        >>> embeddings_4d = gen_sineembed_for_position(pos_4d, num_feats=128)
        >>> embeddings_4d.shape
        torch.Size([50, 4, 256])
    r3   r   )r&   devicei'  floor)rounding_modeNr!   r2   r"   r4   r1   zUnknown pos_tensor shape(-1):)mathpir   aranger&   r{   divrk   sincosflattensizecat
ValueError)rw   ry   r   dim_tx_embedy_embedpos_xpos_yposw_embedpos_wh_embedpos_hs                r   gen_sineembed_for_positionr      s   < q=AQI KEL**::CTUUUEa59UAWEEEFRSEAAAq!E)GAAAq!E)GAAAqqq$J%'EAAAqqq$J%'EKqqq!!!QTTz*..00%111add
2C2G2G2I2IJPQRRRZZ[\]]EKqqq!!!QTTz*..00%111add
2C2G2G2I2IJPQRRRZZ[\]]EraiA...				!	!QQQ1W%-111d
#e+U111aaaA:.2244eAAAqqq!$Q$J6G6K6K6M6MNTUVVV^^_`aaQQQ1W%-111d
#e+U111aaaA:.2244eAAAqqq!$Q$J6G6K6K6M6MNTUVVV^^_`aaiue4!<<<N9L9LNNOOOJr   )rv   )rw   rx   ry   rH   )rA   
__future__r   r~   numpyr   r   r   r   r   r   rF   rW   ru   r    r   r   <module>r      s  
   " " " " " "              < < < < < < < <~F F F F F F F F&/ / / / / / / /B  9 9 9 9 9 9 9r   