
    WjH                        d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZmZ d dlZd dlmZ d dlmZ d dlmc mZ d dlmc mZ d dlmZmZ d dlmZ d dlm Z m!Z! g d	Z"d
e#de$e#e#f         fdZ%dee&         dej'        de(e#ef         fdZ)dej'        de(e#ef         dej        j*        fdZ+d,dej        j*        dej        j*        fdZ,dej*        dej*        fdZ-dej*        de.ej'                 de.ej'                 de.ej'                 fdZ/ej0        ej1        ej2        ej3        ej4        ej5        ej6        ej7        ej8        ej9        ej7        ej:        ej;        gZ<ej=        ej>        gZ?ej0        ej@        ej1        ejA        ej2        d iZBde.ej'                 de(e#ej*        f         fdZCde.ej'                 de(e#ej*        f         de(ej*        ej*        f         fdZD G d  d!          ZEd-d$ZFd%eEdeGfd&ZH G d' d(          ZIdejJ        fdej        j*        d)ee(e#ef                  d*e&ejJ                 dej        j*        fd+ZKdS ).    N)defaultdict)Iterable)Enum)AnycastOptional)ArgumentTarget)	ShapeProp)fuse_conv_bn_evalfuse_linear_bn_eval)matches_module_patternreplace_node_modulefuseremove_dropoutextract_subgraphmodules_to_mkldnnreset_modulesMklSubgraphgen_mkl_autotuneruse_mkl_length	UnionFindoptimize_for_inferencetargetreturnc                 P    |                      dd          ^ }}|r|d         nd|fS )zp
    Splits a qualname into parent path and last atom.
    For example, `foo.bar.baz` -> (`foo.bar`, `baz`)
    .   r    )rsplit)r   parentnames      g/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/torch/fx/experimental/optimization.py_parent_namer$   %   s3    
 MM#q))MVT&6!99B,,    patternnodemodulesc                 j   t          |j                  dk    rdS |j        d         |f}t          | |          D ]x\  }}t          |t          j                  s dS |j        dk    r dS t          |j        t                    s dS |j        |vr dS t          ||j                           |ur dS ydS )Nr   Fcall_moduleT)
lenargszip
isinstancefxNodeopr   strtype)r&   r'   r(   nodesexpected_typecurrent_nodes         r#   r   r   /   s     49~~u"&)A,!5E'*7E':': 
 
#|,00 	55?m++55,-s33 	55g--55+,--]BB55 C4r%   
new_modulec                     t          | j        t                    s$t          dt	          | j                             t          | j                  \  }}||| j        <   t          ||         ||           d S )NExpected str target, got )r.   r   r2   AssertionErrorr3   r$   setattr)r'   r(   r7   parent_namer"   s        r#   r   r   C   sw     dk3'' NLdk9J9JLLMMM$T[11K%GDKGK $
33333r%   Fmodelc                    t           j        t           j        ft           j        t           j        ft           j        t           j        ft           j        t           j        fg}|st          j	        |           } |rt          | t          j        j                  st          j        |           }n| }t          |                                          }t          j	        |j                  }|D ]}|j        D ]}t'          |||          rt)          |j        d         j                  dk    r8||j        d         j                 }	||j                 }
|
j        se|d         t           j        t           j        t           j        fv rt3          |	|
          }nt5          |	|
          }t7          |j        d         ||           |                    |j        d                    |                    |           t          j        ||          S )z
    Fuses convolution/BN and linear/BN layers for inference purposes.
    Will deepcopy your model by default, but can modify the model inplace as well.
    r   r   )nnConv1dBatchNorm1dConv2dBatchNorm2dConv3dBatchNorm3dLinearcopydeepcopyr.   torchr/   GraphModulesymbolic_tracedictnamed_modulesgraphr4   r   r+   r,   usersr   track_running_statsr   r   r   replace_all_uses_with
erase_node)r=   inplaceno_tracepatternsfx_modelr(   	new_graphr&   r'   first_layerbnfused_layers               r#   r   r   M   s    
BN#	BN#	BN#	BN#	H  %e$$ :eUX-ABB $U++8))++,,Ghn--I + +O 	+ 	+D%gtW== +ty|)**Q..%dil&9:T[)- 1:")RY	!BBB"3K"D"DKK"5k2"F"FK#DIaL';GGG**49Q<888$$T***	+  >(I...r%   c                     t          j        |           } G d dt          j         j                  } ||                                          S )z5
    Removes all dropout layers from the module.
    c                   P     e Zd Zdedeedf         deeef         def fdZ	 xZ
S )&remove_dropout.<locals>.DropoutRemoverr   r,   .kwargsr   c                    t          | j        |         t          j                  r:t	          |          dk    rt          dt	          |                     |d         S t                                          |||          S )Nr   z Expected 1 arg for Dropout, got r   )r.   
submodulesr?   Dropoutr+   r:   superr*   )selfr   r,   r^   	__class__s       r#   r*   z2remove_dropout.<locals>.DropoutRemover.call_module|   ss     $/&12:>> At99>>()WCPTII)W)WXXXAwww**64@@@r%   )__name__
__module____qualname__r
   tupler	   rL   r2   r   r*   __classcell__)rd   s   @r#   DropoutRemoverr]   {   s}        	A 	A(-hm(<	AFJ3PS8n	A	A 	A 	A 	A 	A 	A 	A 	A 	A 	Ar%   rj   )r/   rK   rI   Transformer	transform)r=   rV   rj   s      r#   r   r   u   si      ''H	A 	A 	A 	A 	A- 	A 	A 	A >(##--///r%   orig_moduler4   inputsoutputsc                 P   t          j                    }i |D ]!}|                    |j                  }||<   "|D ] }|                    |fd          }||<   !|                    fd|D                        |                                 t          j        | |          S )z
    Given lists of nodes from an existing graph that represent a subgraph, returns a submodule that executes that subgraph.
    c                     |          S N )xenvs    r#   <lambda>z"extract_subgraph.<locals>.<lambda>   s    s1v r%   c                      g | ]
}|         S rs   rs   ).0outputru   s     r#   
<listcomp>z$extract_subgraph.<locals>.<listcomp>   s    888fc&k888r%   )r/   Graphplaceholderr"   	node_copyry   lintrJ   )	rm   r4   rn   ro   rW   inputnew_noder'   ru   s	           @r#   r   r      s     

I"$C  ((44E

  &&t-=-=-=-=>>D		8888888999NN>+y111r%   c                 *    t          j        |           S rr   )	th_mkldnnMkldnnBatchNorm)a_s     r#   rv   rv      s    !:1!=!= r%   c                    i }| D ]}|j         dk    rt          |j        t                    s$t	          dt          |j                             ||j                 }t          |          t          v rt          t          |                   |t          j                  }t          |t          j
                  st	          dt          |                     t          j        |          ||<   t          |||           |S )z
    For each node, if it's a module that can be preconverted into MKLDNN,
    then we do so and create a mapping to allow us to convert from the MKLDNN
    version of the module to the original.
    r*   r9   zExpected nn.Module, got )r1   r.   r   r2   r:   r3   
mkldnn_maprI   floatr?   ModulerG   rH   r   )r4   r(   old_modulesr'   
cur_moduler7   s         r#   r   r      s     /1K ? ?7m##dk3// V$%TdkARAR%T%TUUU -JJ:--'Z(8(89*ekRR
!*bi88 X()VDDTDT)V)VWWW*.-
*C*CJ'#D':>>>r%   r   c                     | D ]s}|j         dk    rft          |j        t                    s$t	          dt          |j                             ||j                 }||v rt          ||||                    tdS )za
    Maps each module that's been changed with `modules_to_mkldnn` back to its
    original.
    r*   r9   N)r1   r.   r   r2   r:   r3   r   )r4   r(   r   r'   r   s        r#   r   r      s      L L7m##dk3// V$%TdkARAR%T%TUUU -J[((#D';z3JKKKL Lr%   c                   $    e Zd Zdej        fdZdS )r   fx_graphc                 >    || _         g | _        g | _        g | _        d S rr   )r   r4   start_nodes	end_nodes)rc   r   s     r#   __init__zMklSubgraph.__init__   s#     $&
*,(*r%   N)re   rf   rg   r/   r{   r   rs   r%   r#   r   r      s1        + + + + + + +r%   r   
   r   c                 H     dddt           dt          f fd}|S )aW  
    This generates a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by running it with the example_inputs.

    Example usage:
        heuristic = gen_mkl_autotuner(example_inputs, iters=10)
        fast_model = optimization.optimize_for_inference(model, heuristic)
    NrN   r   c                    | j         }	:| j        j        	| j        j        t	          	                                         d |D             t          t          t          j	                 d | j
        D                       }t          	| j        ||          
fd} |fd          }t          j        j        t                                                                |fd          }||k     S )Nc                 @    g | ]}t          j        |j                  S rs   )rI   randnshaperx   r'   s     r#   rz   z@gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<listcomp>   s$    IIITTZ00IIIr%   c                 (    g | ]}|j         d          S )r   )r,   r   s     r#   rz   z@gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<listcomp>   s    *T*T*TD49Q<*T*T*Tr%   c                     t                    D ]} |              t          j                    }t                    D ]} |              t          j                    |z
  S rr   )rangetime)fr   beginiterswarmups      r#   	benchmarkz?gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.benchmark  sc    6]]  IKKE5\\  9;;&&r%   c                  6    d  d  D              D             S )Nc                 6    g | ]}|                                 S rs   )to_denserx   is     r#   rz   zRgen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>.<locals>.<listcomp>
  s-       !"

  r%   c                 6    g | ]}|                                 S rs   )	to_mkldnnr   s     r#   rz   zRgen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>.<locals>.<listcomp>  s     1W1W1WA!++--1W1W1Wr%   rs   sample_inputs	submodules   r#   rv   z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>
  s7      &/i1W1W1W1W1W&X   r%   c                         S rr   rs   r   s   r#   rv   z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>  s    		=(A r%   )r   r   owning_moduler   r   	propagater   listr/   r0   r   r   r4   r   rN   rL   rM   )rN   input_nodesoutput_argsr   mkl_timeno_mkl_timer   r   example_inputsrV   r   r   r   s         @@r#   use_mkl_heuristicz,gen_mkl_autotuner.<locals>.use_mkl_heuristic   s:   '~3H.4Kh)).999II[III4=*T*TEO*T*T*TUU$Xu{KUU		' 	' 	' 	' 	' 	' 9    
 
 	O!((**++		
 	
 	
  i A A A A ABB+%%r%   )r   bool)r   r   r   r   rV   r   s   ``` @@r#   r   r      s\     HK &  &  &  &  &  &  &  &  &  &  &  &D r%   rN   c                 2    t          | j                  dk    S )z
    This is a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by checking if there
    are more than 2 nodes in it
       )r+   r4   )rN   s    r#   r   r     s     u{ar%   c                   @    e Zd Zd ZdefdZdedefdZdedefdZd	S )
r   c                 2    d g|z  | _         dg|z  | _        d S )Nr   r!   size)rc   ns     r#   r   zUnionFind.__init__%  s     ,06A: !sQw			r%   vc                 .    || j         |<   d| j        |<   d S )Nr   r   )rc   r   s     r#   make_setzUnionFind.make_set)  s    A	!r%   r   c                     | j         |         }||k    r|S |t          d          |                     |          | j         |<   t          t          | j         |                   S )NzParent is None)r!   r:   findr   int)rc   r   pars      r#   r   zUnionFind.find-  sZ    k!n88H; !12223ACQ(((r%   r   bc                     |                      |          |                      |          }}||k    r|S | j        |         | j        |         k     r||}}|| j        |<   | j        |xx         | j        |         z  cc<   d S rr   )r   r   r!   )rc   r   r   s      r#   joinzUnionFind.join6  sz    yy||TYYq\\166H9Q<$)A,&&aqAA	!	!$r%   N)re   rf   rg   r   r   r   r   r   rs   r%   r#   r   r   $  s        ' ' '#    )c )c ) ) ) )%c %c % % % % % %r%   r   pass_configtracerc                    dddt           id}|i }|                    |           |d         rt          |           } |d         rt          |           } |d         du r| S t	          |d         t
                    st          d	          d|d         vrt          d
          |d         d         } |            }|                    t          j	        |                     t          j        |j                   t          |                                           } G d dt                    }t          j                  D ]}|j        }	|j        dk    r||j                 }
t)          |
          t*          v r{|j        }	t/          |
                                d          }|P|j        t4          j        k    rt9          d          |j        t5          j        d          k    rt9          d          n6|j        dk    r+|j        t*          v r|j        }	n|j        t<          v r|j        }	|	|j        k    r|	|j        k    r tA          d |j!        D                       s!"                    |          5  t          j#        |j!        fd          }ddd           n# 1 swxY w Y   tI          tJ          t          j&        j'                 |          |_!        (                    |          5  )                    dd|f          }|*                    |           |f|_!        ddd           n# 1 swxY w Y   tW          t          j                  |          }|_,        j        D ]}|j        dk    r|j        dk    r|j!        d         }t          |j-                  }|D ]B}|j        dk    r5|j        dk    r*|*                    |           .                    |           Ct_          |j-                  dk    r.                    |           t_          j                  }ta          |          fdtc          j                  D ]$\  }}|j        dk    r(|j        dk    r||_2        3                    |           9|j        dk    rL|j        dk    rA |j!        d                   t9          d           |j!        d                   |_4        fd|j5        D             }t_          |          dk    rtA          d |D                       rt9          d          tm          |          }|d         |_7        |dd         D ]}8                    |d         |           &ts          fd          }j        D ]}tu          |d          r8|;                    |j7                           j        <                    |           tu          |d           r8|;                    |j2                           j=        <                    |           tu          |d!          r8|;                    |j4                           j>        <                    |           |?                                D ]l} ||          s_|j=        |j>        z   D ]9}|j!        d         }|*                    |           .                    |           :t          |j        ||           md}j        D ]}|j        dk    s|j        dk    r|dz  }t          jB        t                    D                    d"|           E                                 t          j        |           }|S )#a  
    Performs a set of optimization passes to optimize a model for the
    purposes of inference. Specifically, the passes that are run are:
    1. Conv/BN fusion
    2. Dropout removal
    3. MKL layout optimizations

    The third optimization takes a function `use_mkl_heuristic` that's used
    to determine whether a subgraph should be explicitly run in MKL layout.

    Note: As FX does not currently handle aliasing, this pass currently
    assumes nothing aliases. If that isn't true, use at your own risk.
    T	heuristic)conv_bn_fuser   mkldnn_layout_optimizeNr   r   r   Fz+mkldnn_layout_optimize config is not a dictz4Heuristic not found in mkldnn_layout_optimize configc                       e Zd ZdZdZdZdS )*optimize_for_inference.<locals>.MklSupportr   r      N)re   rf   rg   NOYESUNKNOWNrs   r%   r#   
MklSupportr   l  s        r%   r   r*   z)this pass is only for torch.float modulescpuz!this pass is only for CPU modulescall_functionc              3   ,   K   | ]}|j         d k    V  dS )r   N)r   )rx   args     r#   	<genexpr>z)optimize_for_inference.<locals>.<genexpr>  s)      II3:3IIIIIIr%   c                 2                         d| f          S )Nr   )call_method)r   r   s    r#   rv   z(optimize_for_inference.<locals>.<lambda>  s    )=)=kA4)P)P r%   r   r   r   r   c                     t          | d          r                    | j                  S t          | d          r                    | j                  S d S )Ncolorstart_color)hasattrr   r   r   )r   ufs    r#   	get_colorz)optimize_for_inference.<locals>.get_color  sT    1g 	$7717###1m$$ 	*771=)))tr%   z!Expected color for to_dense inputc                 p    g | ]2}t          |t          j                  r |          ' |          3S rr   )r.   r/   r0   )rx   r   r   s     r#   rz   z*optimize_for_inference.<locals>.<listcomp>  sQ       a)) 9Q<<+ 	! ,++r%   c              3      K   | ]}|d u V  	d S rr   rs   r   s     r#   r   z)optimize_for_inference.<locals>.<genexpr>  s&      1119111111r%   zFound None in cur_colorsr   c                  "    t                     S rr   )r   )r   s   r#   rv   z(optimize_for_inference.<locals>.<lambda>  s    H@U@U r%   r   r   	end_colorzmkldnn conversions: %s)Fr   updater   r   r.   rL   RuntimeErrortracerG   rH   r/   rJ   rootrM   r   r   r4   r   r1   r   r3   mkldnn_supportedr   next
parametersdtyperI   r   r:   devicemkldnn_supported_unknownr   anyr,   inserting_beforemap_argr   rh   r'   r	   inserting_aftercreate_noderQ   r   r   rO   rR   r+   r   	enumerater   r   r   all_input_nodessortedr   r   r   r   r   appendr   r   valuesr   logging	getLoggerre   infor~   )r=   r   r   default_pass_configr   
cur_tracerr(   r   r'   supports_mkldnnr   sample_parametermkldnn_argsdense_xr   prv_noderO   user	num_nodescur_idx
cur_colorsother_colormkldnn_graphsrN   prvmkldnn_conversionsresultr   r   r   s                              @@@r#   r   r   @  sk   & #."? 
 {+++>* U+, &u%%34==)*BCTJJ JHIII-.FGGGQRRR+,DEkRJe 4 455HN:?H---$()<)<)>)>$?$?G    T    X^$$ "' "'$-7m## -JJ#333",.#'
(=(=(?(?#F#F #/'-<<,G   (.%,u2E2EEE,-PQQQW''{...",. 888","4jm++*"444IItyIIIII **400   jIPPPP               
 U27#34kBBDI))$// ' '"..}j4'RR**7333 $w' ' ' ' ' ' ' ' ' ' ' ' ' ' ' $D$8$8'BBK&H  	* 	*7m##z(A(Ay|H$$E . .7m++{0J0J..x888''---4:!####D)))HN##I	9		B    $ #8>22 4 47m##{(B(B&DKK    W%%$+*C*Cy1&&.$%HIII&Yty|44DNN   -  J :!##11j11111 A$%?@@@
++J#ADJ)!""~ 4 4
1{33334 -88U8U8U8U,V,VM J J4!! 	B"''$*--.4;;DAAA4'' 	N"''$"2334@GGMMM4%% 	J"''$.112<CCDIII %%'' = =  '' 	=)EO; * *il**3///##D))))%+w<<< $ $;+%%
)B)B!#h$$%=?QRRRMMOOO^E8,,FMs$   >J((J,	/J,	76L99L=	 L=	)FF)r   r   )LrG   r   operatorr   collectionsr   collections.abcr   enumr   typingr   r   r   rI   torch.fxr/   torch.nnr?   torch.nn.functional
functionalFtorch.utils.mkldnnutilsmkldnnr   torch.fx.noder	   r
   torch.fx.passes.shape_propr   torch.nn.utils.fusionr   r   __all__r2   rh   r$   r3   r0   rL   r   r   r   r   r   r   r   rB   rF   rC   ReLU	MaxPool2d	AvgPool2dAdaptiveAvgPool2drelu	transposesigmoid
avg_pool2dadaptive_avg_pool2dr   addmulr   MkldnnConv2dMkldnnLinearr   r   r   r   r   r   r   r   Tracerr   rs   r%   r#   <module>r-     s|       # # # # # # $ $ $ $ $ $       & & & & & & & & & &                       & & & & & & & & & * * * * * * * * 0 0 0 0 0 0 H H H H H H H H   - -sCx - - - -d^#%759#s(^   (4
'4 cN48=4 4 4 4%/ %/ %/58? %/ %/ %/ %/P0") 0	 0 0 0 0(22=2 M2 "']	2 2 2 2. IINGLL	J	O	MFL & %L(,7 Iy%Iy%N==
T"'] T#ry.5I    ,L=L#ry.!L bi*+L L L L$+ + + + + + + +. . . .b +  $        % % % % % % % %< -1 ir r8?r$sCx.)r Or X_	r r r r r rr%   