
    /jR                         U d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	  G d de          Z
 G d d	e          Z G d
 de          Z G d de          Zdaeej        dz           dz  ed<   dej        fdZdS )    N)chain_get_device_index)Function)commc                   :    e Zd Zed             Zed             ZdS )	Broadcastc                   	 t          d |D                       st          d          d |D             }|| _        t          |          dk    rdS t          |          | _        |d                                         | _        d |D             | _        t          j	        || j                  }|D ];}t          | j                  D ]$\  }}|rt          j        ||                   ||<   %<g }t          | j        dd                    D ](\  	}|s!|                    	fd|D                        ) | j        |  t!          t#          j        |                    S )	Nc              3   6   K   | ]}|j         j        d k    V  dS cpuNdevicetype.0is     a/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/torch/nn/parallel/_functions.py	<genexpr>z$Broadcast.forward.<locals>.<genexpr>   +      ::a18=E)::::::    z2Broadcast function not implemented for CPU tensorsc                 .    g | ]}t          |d           S Tr   r   xs     r   
<listcomp>z%Broadcast.forward.<locals>.<listcomp>   #    GGGa(D11GGGr   r    c                 6    g | ]}|                                 S r   
is_complex)r   inps     r   r   z%Broadcast.forward.<locals>.<listcomp>   s"    ???CNN,,???r      c              3   (   K   | ]}|         V  d S Nr   )r   outputidxs     r   r   z$Broadcast.forward.<locals>.<genexpr>"   s'      *M*M66#;*M*M*M*M*M*Mr   )allAssertionErrortarget_gpuslen
num_inputs
get_deviceinput_devicecomplex_maskr   broadcast_coalesced	enumeratetorchview_as_complexneeds_input_gradextendmark_non_differentiabletupler   from_iterable)
ctxr*   inputsoutputsdevice_outputsr   r!   non_differentiablesinput_requires_gradr'   s
            @r   forwardzBroadcast.forward   s   ::6::::: 	W !UVVVGG;GGG%v;;!2V!!9//11?????*63?CC% 	Q 	QN!*3+;!<!< Q Q: Q(-(=nQ>O(P(PN1%Q !(1#2Fqrr2J(K(K 	N 	N$C$& N#***M*M*M*MW*M*M*MMMM##%899U(11222r   c                 F    t          j        | j        | j        g|R  }d|z   S )Nr%   )ReduceAddCoalescedapplyr.   r,   )r9   grad_outputsgradss      r   backwardzBroadcast.backward&   s7    "(cn
/;
 
 
 r   N__name__
__module____qualname__staticmethodr?   rE   r   r   r   r	   r	   
   sH        3 3 \34   \  r   r	   c                   :    e Zd Zed             Zed             ZdS )rA   c                    fdt          dt                              D             | _        fdt                    D             }|| _        t	          d D                       fdt          dt                              D             }t          j        ||          }t	          d t          ||          D                       }|S )Nc                 D    g | ]}|                                          S r   r-   r   r   rD   s     r   r   z.ReduceAddCoalesced.forward.<locals>.<listcomp>2   s6     
 
 
&'E!H!!
 
 
r   r   c                 D    g | ]}|                                          S r   r    rO   s     r   r   z.ReduceAddCoalesced.forward.<locals>.<listcomp>6   s)    III!a++--IIIr   c              3   j   K   | ].}|                                 rt          j        |          n|V  /d S r%   )r!   r2   view_as_realr   gs     r   r   z-ReduceAddCoalesced.forward.<locals>.<genexpr>9   sP        
  
?@Q\\^^:Eq!!! 
  
  
  
  
  
r   c                 *    g | ]}||z            S r   r   )r   r   grads_convertedr,   s     r   r   z.ReduceAddCoalesced.forward.<locals>.<listcomp>=   s7     
 
 
 AJ./
 
 
r   c              3   L   K   | ]\  }}|rt          j        |          n|V   d S r%   r2   r3   )r   rr!   s      r   r   z-ReduceAddCoalesced.forward.<locals>.<genexpr>C   sO       
 
: )39E!!$$$
 
 
 
 
 
r   )ranger+   r*   r/   r7   r   reduce_add_coalescedzip)r9   destinationr,   rD   r/   grads_resultsrV   s     ``   @r   r?   zReduceAddCoalesced.forward0   s)   
 
 
 
+0CJJ
+K+K
 
 
 JIIIuZ7H7HIII'  
  
DI 
  
  
 
 

 
 
 
 
1c/22J??
 
 
 +FK@@ 
 
!$Wl!;!;
 
 
 
 

 r   c                 6    dt          j        | j        g|R  z   S )NNN)r	   rB   r*   )r9   rC   s     r   rE   zReduceAddCoalesced.backwardJ   s*    
 OCO;l;;;< 	<r   NrF   r   r   r   rA   rA   /   sH          \2 < < \< < <r   rA   c                   :    e Zd Zed             Zed             ZdS )Gatherc                     t          d |D                       st          d          |dk    rd _        nt          |d          }| _        | _        t          d |D                        _        t          d |D                       r=|dk    r7t          d |D                       }t          j        d	d
           d _	        nd _	        t           fd|D                        _
        t          |          dk    o|d                                         }t          j        | j         j                  }|rt          j        |          }|S )Nc              3   6   K   | ]}|j         j        d k    V  dS r   r   r   s     r   r   z!Gather.forward.<locals>.<genexpr>U   r   r   z/Gather function not implemented for CPU tensorsr   Tc              3   >   K   | ]}|                                 V  d S r%   rN   r   s     r   r   z!Gather.forward.<locals>.<genexpr>]   s*      >>!q||~~>>>>>>r   c              3   F   K   | ]}|                                 d k    V  dS r   N)dimr   ts     r   r   z!Gather.forward.<locals>.<genexpr>^   s.      ,,quuww!|,,,,,,r   r   c              3   @   K   | ]}|                     d           V  dS )r#   N)viewrj   s     r   r   z!Gather.forward.<locals>.<genexpr>_   s,      55166!99555555r   zvWas asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.   )
stacklevelFc              3   L   K   | ]}|                     j                  V  d S r%   )sizeri   )r   r   r9   s     r   r   z!Gather.forward.<locals>.<genexpr>i   s/      @@Asw@@@@@@r   )r(   r)   target_devicer   ri   r7   
input_gpuswarningswarnunsqueezed_scalarinput_sizesr+   r!   r   gatherr2   r3   )r9   rr   ri   r:   r!   r&   s   `     r   r?   zGather.forwardS   s|   ::6::::: 	T !RSSSE!! %C-mTBBM -C>>v>>>>>,,V,,,,, 
	*55f55555FM' 	    %)C!!$)C!@@@@@@@@@[[1_?)=)=)?)?
VSWc.?@@ 	3*622Fr   c                     t                               | j        | j        | j        |          }| j        rt          d |D                       }d|z   S )Nc              3   &   K   | ]}|d          V  dS rh   r   rS   s     r   r   z"Gather.backward.<locals>.<genexpr>z   s&      #B#BQAaD#B#B#B#B#B#Br   ra   )ScatterrB   rs   rw   ri   rv   r7   )r9   grad_outputscattered_gradss      r   rE   zGather.backwardt   sY    !--NCOSWk
 
   	C##B#B/#B#B#BBBOo--r   NrF   r   r   r   rc   rc   R   sI          \@ . . \. . .r   rc   c                   :    e Zd Zed             Zed             ZdS )r{   c                    d |D             }|| _         |j        j        dk    r|                                nd| _        d }t
          j                                        r| j        dk    rd |D             }|                                }t          j
        |||| j         |          }|rt          d |D                       }|t          |          D ]\  }}	t
          j                            ||                   5  t
          j                                        }
|
                    ||                    |	                    |
           d d d            n# 1 swxY w Y   |S )Nc                 .    g | ]}t          |d           S r   r   r   s     r   r   z#Scatter.forward.<locals>.<listcomp>   r   r   r   c                 P    g | ]#}t          t          j        |                    $S r   )_get_streamr2   r   )r   r   s     r   r   z#Scatter.forward.<locals>.<listcomp>   s*    SSSV{5<#7#788SSSr   c              3   >   K   | ]}t          j        |          V  d S r%   rX   )r   os     r   r   z"Scatter.forward.<locals>.<genexpr>   s-      FFE1!44FFFFFFr   )ri   r   r   r-   r.   r2   acceleratoris_availabler!   r   scatterr7   r1   device_indexcurrent_streamwait_streamrecord_stream)r9   r*   chunk_sizesri   inputstreamsr!   r;   r   r&   main_streams              r   r?   zScatter.forward   s   GG;GGG161Be1K1K5++---QS))++ 	T0@B0F0FSS{SSSG%%''
,uk;QQ 	GFFgFFFFFG &w// 6 6	6&33KNCC 6 6"'"3"B"B"D"DK++GAJ777((5556 6 6 6 6 6 6 6 6 6 6 6 6 6 6 s   =AEE	E	c                 D    d d d t          j        | j        | j        g|R  fS r%   )rc   rB   r.   ri   )r9   r|   s     r   rE   zScatter.backward   s*    T4c.>!V+!V!V!VVVr   NrF   r   r   r   r{   r{   ~   sM          \2 W W \W W Wr   r{   _streamsr   c                    | j         dk    st          j                                        sdS t          j                                        j         | j         k    r;t          dt          j                                        j          d| j                    t          "dgt          j                                        z  at          | j                 &t          j	        | j                  t          | j        <   t          | j                 S )zBGet a background stream for copying between CPU and target device.r   Nz"Expected current accelerator type z to match device type )
r   r2   r   r   current_acceleratorr)   r   device_countindexStream)r   s    r   r   r      s     {e5#4#A#A#C#Ct,,..3v{BB21B1V1V1X1X1] 2 2$*K2 2
 
 	
 6E-::<<<%!&fl!;!;FL!!r   )rt   	itertoolsr   r2   torch._utilsr   torch.autogradr   torch.nn.parallelr   r	   rA   rc   r{   r   listr   __annotations__r   r   r   r   r   <module>r      s_           * * * * * * # # # # # # " " " " " "" " " " " " " "J <  <  <  <  <  <  <  <F). ). ). ). ).X ). ). ).XW W W W Wh W W WB .2$u|d"
#d
* 1 1 1" " " " " " "r   