
    Wj                         U d dl Z d dlmc mZ d dl mZ d dlmZ g Ze	e
         ed<   e j        j         G d d                      ZdS )    N)Tensor)2_scripted_functional_optimizer_deprecation_warning__all__c                       e Zd Z	 	 	 	 	 	 	 ddee         dedeeef         d	ed
edededefdZdeedz           fdZ	dS )_FunctionalAdamaxMbP?g?g+?:0yE>        Fparamslrbetasepsweight_decayforeachmaximize_allow_empty_param_listc	                    t          d           d|k    st          d|           d|k    st          d|           d|d         cxk    rdk     sn t          d|d                    d|d	         cxk    rdk     sn t          d
|d	                    d|k    st          d|           |||d         |d	         |d| _        || _        || _        t
          j                            t          t
          j	        t          t          t
          j	        f         f         i           | _        t          |          dk    r|st          d          d|i| _        d S )N   )
stacklevelr   zInvalid learning rate: zInvalid epsilon value: r   g      ?z#Invalid beta parameter at index 0:    z#Invalid beta parameter at index 1: zInvalid weight_decay value: )r   r   beta1beta2r   z%optimizer got an empty parameter listr   )r   
ValueErrordefaultsr   r   torchjitannotatedictr   strstatelenparam_group)	selfr   r   r   r   r   r   r   r   s	            n/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/torch/distributed/optim/functional_adamax.py__init__z_FunctionalAdamax.__init__   s    	;aHHHHbyy;r;;<<<czz<s<<===eAh$$$$$$$$M58MMNNNeAh$$$$$$$$M58MMNNNl""JLJJKKK 1X1X(
 
  Y''U\4U\@Q;R-R(SUWXX
v;;!$;DEEE %f-    	gradientsNc                    | j         d         }g }g }g }g }g }t          |          t          |          k    r6t          ddt          |           dz   dt          |           z             d}t          | j         d         |          D ]%\  }	}
|
|t	          j        |	          z  }|                    |	           |                    |
           |	| j        vrti | j        |	<   | j        |	         }t	          j        d          |d<   t	          j	        |	t          j
        	          |d
<   t	          j	        |	t          j
        	          |d<   | j        |	         }|                    |d
                    |                    |d                    |                    |d                    't	          j                    5  t          j        |||||| j        d         | j        d         | j        d         | j        d         | j        d         | j        | j        |           d d d            d S # 1 swxY w Y   d S )Nr   zEthe gradients passed in does not equal to the size of the parameters!zParams length: z. zGradients length: Fr   step)memory_formatexp_avgexp_infr   r   r   r   r   )r   r   r   r   r   r   r   has_complex)r#   r"   r   zipr   
is_complexappendr!   tensor
zeros_likepreserve_formatno_gradFadamaxr   r   r   )r$   r(   r   params_with_gradgradsexp_avgsexp_infsstate_stepsr.   paramgradientr!   s               r%   r*   z_FunctionalAdamax.stepB   s   !(+$&v;;#i..((W3CKK33347s9~~778   "4#3H#=yII 	2 	2OE8#u/666 ''...X&&&
**(*DJu% Ju-E$)L$5$5E&M','7U-B( ( (E)$ (-'7U-B( ( (E)$ 
5)i 0111i 0111""5=111]__ 	 	H M%(mG,mG,=&!]>:'   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   A#H55H9<H9)r   r	   r
   r   FFF)
__name__
__module____qualname__listr   floattupleboolr&   r*    r'   r%   r   r      s        
 %1!(-'. '.V'. '. UE\"	'.
 '. '. '. '. "&'. '. '. '.R8d6D=1 8 8 8 8 8 8r'   r   )r   torch.optim._functionaloptim_functionalr6   r   ,torch.distributed.optim._deprecation_warningr   r   rB   r    __annotations__r   scriptr   rF   r'   r%   <module>rM      s     # # # # # # # # #           
 c    b b b b b b b b b br'   