
    Wj@                       U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlmZmZ d dlmZmZ d dlZd dlZd dlmZ d dlmZ d dlmc mZ d dlmZ d dlmZ d d	lmZ d
dl m!Z!m"Z"m#Z# d
dl$m%Z% d
dl&m'Z'm(Z(m)Z) erd dl*m+Z+m,Z,m-Z- d dl.m/Z/ d dl0m1Z1  ed          Z2 ed          Z3 ej4        e5          Z6dGdZ7edHd            Z8e#dId            Z9	 dJdKd$Z:dLd&Z;e#dMd'            Z< G d( d)ej=                  Z>e#dNd+            Z?e#dOd,            Z@dPd/ZAejB        jC        ZCeCjD        eCjE        eCjF        eCjG        eCjH        eCjI        eCjJ        eCjK        eCjL        eCjM        eCjN        eCjO        eCjP        eCjQ        jR        eCjQ        jS        eCjT        eCjU        eCjV        eCjW        eCjX        eCjY        eCjZ        hZ[ ee[          Z[e#dMd0            Z\dQd4Z]dRd6Z^d a_d7e`d8<   dSd;ZadTdCZb	 dUdVdFZcdS )W    )annotationsN)contextmanager)partial)AnyTYPE_CHECKING)	ParamSpecTypeVar)SymInt)get_decompositions)bind_symbols   )aot_function
aot_modulemake_boxed_compiler)strip_overloads)default_partition
draw_graph#min_cut_rematerialization_partition)Callable	GeneratorSequence)Node)IntLikeType_P_Rfx_gfx.GraphModulereturnc                    | j                             dt          j        j        j                  D ]}t          j        j        j        |_        |                                  | S )Ncall_functionoptarget)	graph
find_nodestorchopsaten_to_copytor#   	recompile)r   nodes     _/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/torch/_functorch/compilers.py_canonicalizer.   /   s[    
%%59>#: &   ( ( in'NNK    Generator[None, None, None]c               #     K   t           j                            d          } 	 d V  t           j                            |            d S # t           j                            |            w xY w)NF)r&   _C_jit_set_autocast_mode)old_jit_autocast_flags    r-   _disable_jit_autocastr5   8   sh       "H;;EBB? 	''(=>>>>>''(=>>>>s   A !A)inpsSequence[Any]torch.jit.ScriptModulec                   t                      5  t          |            | j                            dt          j        j        j                  D ]V}t          |j	                  dk    r<t          |j
                  dk    r$d|j
        v rt          j        j        j        |_        W| j        j        D ]P}i }|j
                                        D ]+\  }}t          |t          j                  r|j        }|||<   ,||_
        Q| j                                         |                                  t          j                            |           }t          j                            |j                   t          j                            |                                          }t          j                            |          }t7          d |D                       s ||  ddd           n# 1 swxY w Y   |S )a  
    Compiles the :attr:`fx_g` with Torchscript compiler.

    .. warning::
        This API is experimental and likely to change.

    Args:
        fx_g(fx.GraphModule): The input Fx graph module to be compiled.

    Returns:
        Torch scripted model.
    r    r!   r   dtypec              3  T   K   | ]#}t          |t          j        j                  V  $d S N)
isinstancer&   _subclasses
FakeTensor).0ts     r-   	<genexpr>zts_compile.<locals>.<genexpr>n   s2      MM1:a!2!=>>MMMMMMr/   N)r5   r   r$   r%   r&   r'   r(   r)   lenargskwargsr*   r#   nodesitemsr=   devicetypelintr+   jitscriptr2   _jit_pass_remove_mutationfreezeevaloptimize_for_inferenceany)r   r6   r,   
new_kwargskvfs          r-   
ts_compilerV   C   s    
	 	   J))uy~'> * 
 
 	0 	0D 49~~""s4;'7'71'<'<DKAWAW#in/J$ 	% 	%DJ))++ " "1a.. A !
1$DKK
IT"" 	**17333IQVVXX&&I,,Q//MMMMMMM 	AtHH;              < Hs   GG--G14G1T_r   namestr
clear_metaboolc                R    t          | j                   t          | ||           | S )N)rZ   )printcoder   )r   rW   rX   rZ   s       r-   _draw_graph_compiler_   s   s.     
$)tTj1111Kr/   5Callable[[fx.GraphModule, list[Any]], fx.GraphModule]c                H    t          t          t          |                     S )NrX   )r   r   r_   rb   s    r-   draw_graph_compilerc   {   s      w':FFFGGGr/   c                    | S )z
    Returns the :attr:`fx_g` Fx graph module as it is. This is a no-op compiler
    and can be used to check accuracy.

    .. warning::
        This API is experimental and likely to change.

     r   rW   s     r-   noprg      s	     Kr/   c                  4     e Zd Zdddd fdZd fdZ xZS )DebugInterpreterNTinitial_envenable_io_processingrD   r   rk   dict[Node, Any] | Nonerl   r[   r   c               f    t          | j        g|R  | _         t                      j        |||dS )Nrj   )r   modulesymbol_mappingsuperrun)selfrk   rl   rD   	__class__s       r-   rr   zDebugInterpreter.run   sT     +K
 
 
 

 uww{{AU
 
 
 	
r/   nr   c                b   
 d fddfd	dfd
d
fd}t                                          |          }d|j        v rt          j        |j        d                   \  }}t          j        |          \  }}t          |          t          |          k    r.t          t          |           dt          |                     t          t          t          |                    ||          D ]2\  }}	t          |	t          j                  s! |||	 fd           3|S )Nnir   r   intc                    t          | t                    s| S t          j        | j        j                            j                            }|j        st          d|           t          |          S )Nzexpected r to be a number, got )r=   r
   sympyexpandr,   exprxreplacerp   	is_numberAssertionErrorrx   )rw   rrs   s     r-   subst_symintz/DebugInterpreter.run_node.<locals>.subst_symint   sk    b&)) 	RW\2243FGGHHA; L$%Jq%J%JKKKq66Mr/   nistuple[IntLikeType, ...]tuple[int, ...]c                :    t          fd| D                       S )Nc              3  .   K   | ]} |          V  d S r<   re   )r@   rw   r   s     r-   rB   zHDebugInterpreter.run_node.<locals>.subst_symint_tuple.<locals>.<genexpr>   s-      88bb))888888r/   )tuple)r   r   s    r-   subst_symint_tuplez5DebugInterpreter.run_node.<locals>.subst_symint_tuple   s&    8888C888888r/   atorch.Tensorbr[   c                ,    |                                            dk    rqt          | j                  D ]\} |                     |                    |                    |          k    r% |                     |                    dk    r dS ]dS )Nr   r   FT)numelrangendimstridesize)r   r   idxr   s      r-   check_significant_stridesz<DebugInterpreter.run_node.<locals>.check_significant_strides   s    |AGGII&&** == % %C$QXXc]]33qxx}}DD(L5599$uu4r/   nvrvdescCallable[[], str]Nonec           
        t          |          st          dt          |                     | j        |j        k    r)t           |             d| j         d|j                    |                                           |                                k    rat           |             d|                                  d |                                            d|                                            | |          }|sat           |             d|                                  d |                                            d|                                           d S )Nz"expected desc to be callable, got z:  != z aka )callabler   rI   r:   r   r   )r   r   r   same_stridesr   r   s       r-   checkz(DebugInterpreter.run_node.<locals>.check   s|   D>> X$%V$t**%V%VWWWx28##$%J%J"(%J%J%J%JKKK!!"'')),,		99$tvv____1C1CBGGII1N1N__TVT[T[T]T]__   54R<<L $tvveeee3E3Ebiikk3R3ReeXZXaXaXcXcee   r/   valr   c                     d  dj          S )Nzoutput z where )rp   )irs   s   r-   <lambda>z+DebugInterpreter.run_node.<locals>.<lambda>   s    &O&O&O$:M&O&O r/   )rw   r   r   rx   )r   r   r   r   )r   r   r   r   r   r[   )r   r   r   r   r   r   r   r   )rq   run_nodemetapytreetree_flattenrC   r   zipr   r=   r&   Tensor)rs   ru   r   r   n_vals_n_specr_vals_r_specr   r   r   r   r   r   rt   s   `         @@@@r-   r   zDebugInterpreter.run_node   s   	 	 	 	 	 		9 	9 	9 	9 	9 	9	 	 	 	 	 		 	 	 	 	 	 	 GGQAF??$1!&-@@OFG$1!44OFG 6{{c&kk))$F%F%FV%F%FGGG s6{{!3!3VVDD Q Q	2r!"el33 b"OOOOOPPPPr/   )rD   r   rk   rm   rl   r[   r   r   )ru   r   r   r   )__name__
__module____qualname__rr   r   __classcell__)rt   s   @r-   ri   ri      sm         /3%)	
 
 
 
 
 
 
 
5 5 5 5 5 5 5 5 5 5r/   ri   DCallable[[DebugInterpreter, Any, dict[Node, Any] | None, bool], Any]c                *    t          |           j        S )z
    Returns a (slow) interpreter over the FX graph module that also checks
    various debugging properties (e.g., that tracing strides matched real
    strides.)
    )ri   rr   rf   s     r-   	debug_nopr      s     D!!%%r/   c                    t          |            t          j                            |           }t          j                            |                                          }|S r<   )r   r&   rK   rL   rN   rO   )r   rW   rU   s      r-   simple_ts_compiler      sG    D	A	""AHr/   rU   Callable[..., Any]c                ,    t          | t                    S r<   )r   r   )rU   s    r-   nnc_jitr      s    ,---r/   c                .    t          | j                   | S r<   )r]   r^   rf   s     r-   print_compiler     s    	$)Kr/   fnCallable[_P, _R] | nn.ModulerE   c                    t           t           t          t          d}|                    |           t	          | t
          j        j                  rt          | fi |S t          | fi |S )a~  
    Wrapper function over :func:`aot_function` and :func:`aot_module` to perform
    memory efficient fusion. It uses the
    :func:`min_cut_rematerialization_partition` partitioner to perform efficient
    recomputation. It uses NVFuser to compile the generated forward and backward
    graphs.

    .. warning::
        This API is experimental and likely to change.

    Args:
        fn (Union[Callable, nn.Module]): A Python function or a ``nn.Module``
            that takes one or more arguments. Must return one or more Tensors.
        **kwargs: Any other overrides you want to make to the settings

    Returns:
        Returns a ``Callable``  or ``nn.Module`` that retains the eager behavior
        of the original :attr:`fn`, but whose forward and backward graphs have
        gone through recomputation optimizations, and the graphs have been
        compiled with nvfuser.

    fw_compilerbw_compilerpartition_fndecompositions)
rV   r   default_decompositionsupdater=   r&   nnModuler   r   )r   rE   configs      r-   memory_efficient_fusionr     sr    6 "!;0	 F MM&"eho&& *"'''''B))&)))r/   Sequence[torch.Tensor]c                    |                      d           t          dd |D              d           ddlm}   |                                            |  t          | |          S )NfooaQ  
##############################################################
# To minimize FX graph, copy and paste the below and run it  #
##############################################################

import torch
import torch.fx as fx
from functorch.compile import minifier, check_nvfuser_subprocess, check_nvfuser_correctness_subprocess

inps = c                *    g | ]}|j         |j        fS re   )shaper:   )r@   r   s     r-   
<listcomp>z!debug_compile.<locals>.<listcomp>G  s!    ***!'17	***r/   a?  
inps = [torch.ones(shape, dtype=dtype, device='cuda') for (shape, dtype) in inps]
from foo import FxModule
mod = FxModule().cuda()

with torch.jit.fuser("fuser2"):
  # check_nvfuser_subprocess can be replaced with check_nvfuser_correctness_subprocess
  minifier(fx.symbolic_trace(mod), inps, check_nvfuser_subprocess)
r   )FxModule)	to_folderr]   r   r   cudarV   )r   r6   r   s      r-   debug_compiler   9  s     	NN5		 	+*T***	 	 	  ( HHJJOOtdD!!!r/   rx   graph_indexinput_data_pathlist[torch.Tensor]c                6   g }t          | d          5 }t          j        |          }g }|D ]}t          |          dk    r|} |t	          j                              }n|\  }}}}	}
|	t
          j        t
          j        t
          j        t
          j	        t
          j        t
          j
        t          t          hv rt          j        dd||	|
          }nt          j        ||	|
          }|                    |           	 ddd           n# 1 swxY w Y   |S )zZ
    Return a random input for the given inputs meta generated from _save_fx_default.
    rbr   r   )r:   rH   N)openpickleloadrC   randomr&   rx   int32int64r[   uint8floatrandintrandappend)r   inputsrU   inputs_metar   rI   input_r   _strider:   rH   s              r-   
get_inputsr   [  sR    "$F	ot	$	$ "k!nn 	" 	"D4yyA~~fmoo..6:3eWeVIKKJIK	 	 	 #]1aeFSSSFF"ZU6JJJFMM&!!!!'	"" " " " " " " " " " " " " " ". Ms   C.DDDcurrent_namefolder_namedump_example_inputgmtorch.fx.GraphModuleexample_inputs	nn.Modulec                    	
 ddl m} d	fd	d 	fd
d
fd}d
fd}d
fd} ||||||t                    S )aO  
    The forward, backward, and joint computation graph will be stored in
    {folder_name}/{current_name}/{current_name}_forward_{graph_index},
    {folder_name}/{current_name}/{current_name}_backward_{graph_index}, and
    {folder_name}/{current_name}/{current_name}_joint_{graph_index} respectively.
    The input shape of the graphs will be stored in the .input files.
    These files can be loaded with pickle,
    and is a list of format (type, shape, stride, dtype, device).
    In the case of type = int or float, it is just (type,).
    For joint graph input, it is a nested list [[],[]]
    where the two inner lists have the same format.
    If dump_example_input is True, example_inputs will be stored in .pt file.
    Since each function might produce multiple graphs,
    the graph_index is used to distinguish difference graphs
    r   )aot_module_simplifiedrD   r   r   	list[Any]c                   g }t          |           dk    rEt          | d         t                    r*| | d                   z  }| | d                   z  }|S | D ]}t          |          t          u st          |          t
          u r$|                    t          |          f           R|                    t          |          |j        |                                |j	        |j
        f           |S )Nr   r   )rC   r=   r   rI   rx   r   r   r   r   r:   rH   )rD   
input_metaargget_input_metas      r-   r   z(_save_fx_default.<locals>.get_input_meta  s    
t99q==ZQ77=..a111J..a111J 	 	CCyyC499#5#5!!499,////!!#YY	3::<<CJO    r/   
gm_to_saver   	type_namerY   r   c                6   t          | j        j                  dk    r/t                              t          j        d|t                     d S t          j        |           }|j        	                    t          j        j                                                   |                                  	|          }t          j         d d           |                     d d d| dt           	           t#           d d d| dt           d d| dt           dd          5 }t%          j        ||           d d d            n# 1 swxY w Y   r9t          j        | d d d| dt           d d| dt           d	           d S d S )
Nr   z!No nodes in graph {%s}_{%s}_{%s}./T)exist_okrW   z.inputwbz.pt)rC   r$   rF   logloggingWARNINGr   copydeepcopyset_codegenr&   fxCodeGenr+   osmakedirsr   r   r   dumpsave)
r   rD   r   r   r   rU   r   r   r   r   s
         r-   graph_saver_helperz,_save_fx_default.<locals>.graph_saver_helper  s    z%&&!++GG3   F]:&&
UX^3355666
#^D))

{33\33dCCCC
TT\TTLTT9TT{TT	
 	
 	
   D  D\  D  DL  D  D9  D  D{  D  DUa  D  Ddm  D  Dp{  D  D  D
 
 	' K
A&&&		' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	'
  	J  E  E  E  E  E  Ey  E  E;  E  EYe  E  Ehq  E  Et  E  E  E    	 	s   1EEEr   r   r   c                "     | |d           | S )Nforwardre   r   r   r  s     r-   graph_saver_forwardz-_save_fx_default.<locals>.graph_saver_forward  s     	2~y999	r/   c                6     | |d           t           dz  a | S )Nbackwardr   )r   r
  s     r-   graph_saver_backwardz._save_fx_default.<locals>.graph_saver_backward  s)     	2~z:::q	r/   
joint_args%tuple[fx.GraphModule, fx.GraphModule]c                >     | |d           t          | |          S )Njoint)r   )r   r  r  s     r-   graph_saver_jointz+_save_fx_default.<locals>.graph_saver_joint  s*     	2z7333 Z000r/   r   )rD   r   r   r   )r   r   rD   r   r   rY   r   r   )r   r   r   r   r   r   )r   r   r  r   r   r  )functorch.compiler   r   )r   r   r   r   r   r   r  r  r  r   r  s   ```      @@r-   _save_fx_defaultr  z  s    , 877777     ! ! ! ! ! ! ! ! !F          1 1 1 1 1 1 ! 
'(&-   r/   F Callable[[bool, nn.Module], Any]c                4    da t          t          | ||          S )as  
    Dump the forward, backward, and joint computation graph.
    Example Usage:
    save_fx_func = graph_dumper_aot(current_name, folder_name, dump_example_input = False)
    optimize_ctx = torchdynamo.optimize(
        save_fx_func
    )
    with torch.enable_grad():
        with optimize_ctx:
            result = forward_and_backward_pass(model, example_inputs)
    r   )r   r   r  )r   r   r   s      r-   graph_dumper_aotr    s     K#\;@RSSSr/   )r   r   r   r   )r   r0   )r   r   r6   r7   r   r8   )T)
r   r   rW   r   rX   rY   rZ   r[   r   r   )rX   rY   r   r`   )r   r   rW   r   r   r   )r   r   rW   r   r   r   )r   r   rW   r   r   r8   )rU   r   r   r   )r   r   rE   r   r   r   )r   r   r6   r   r   r8   )r   rY   r   r   )r   rY   r   rY   r   r[   r   r   r   r   r   r   )F)r   rY   r   rY   r   r[   r   r  )d
__future__r   r   r   r  r   r   
contextlibr   	functoolsr   typingr   r   typing_extensionsr   r	   rz   r&   torch.fxr  torch.nnr   torch.utils._pytreeutils_pytreer   r
   torch._decompr   %torch.fx.experimental.symbolic_shapesr   aot_autogradr   r   r   compile_utilsr   partitionersr   r   r   collections.abcr   r   r   torch.fx.noder   torch.typesr   r   r   	getLoggerr   r   r.   r5   rV   r_   rc   rg   Interpreterri   r   r   r   r'   r(   detachgelu_backwardleaky_relu_backwardsigmoid_backwardthreshold_backwardhardtanh_backwardhardsigmoid_backwardhardswish_backwardtanh_backwardsilu_backwardelu_backwardcudnn_batch_normcudnn_batch_norm_backwardmasked_fillScalarr   elu
leaky_reluhardtanh	hardswishhardsigmoidconj_physicalis_same_sizer   r   r   r   r   __annotations__r   r  r  re   r/   r-   <module>rD     s   " " " " " " "   				   % % % % % %       % % % % % % % % 0 0 0 0 0 0 0 0               $ $ $ $ $ $ $ $ $       , , , , , , > > > > > > G G G G G G G G G G * * * * * *           (==========""""""'''''' Yt__WT]]g!!
    ? ? ? ? , , , ,` AE    H H H H 	 	 	 	E E E E Er~ E E EP & & & &    . . . . y~K"HOMN- 4 ,+,BCC     
$* $* $* $*N" " " ">        >f f f fV EJT T T T T T Tr/   