
    ajb                        d dl mZ d dlmZmZ d dlZd dlZd dlZd dlm	Z	 de
dee
         fdZdej        j        fd	Zd
e
deeee         f         fdZd Zd ZddddefdZed             ZdefdZdefdZdefdZdS )    )contextmanager)AnycastN)Timerfilenamereturnc                    d}d}d g }t          |           5 }|                                                    |          }t          |          D ]\  }}|dk    r|                    |          }|dk    r(|d |         }	||dz
                                           d         fd|	                    d          D             }
|                    d	                    |
                     	 d d d            n# 1 swxY w Y   |S )
Nz<GRAPH_EXPORT>z</GRAPH_EXPORT>r      c                 >    g | ]}|t                    d          S N)len).0xpfxs     `/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/torch/utils/jit/log_extract.py
<listcomp>zextract_ir.<locals>.<listcomp>   s'    GGGaQs3xxyy\GGG    T)keepends )openreadsplit	enumeratefind
splitlinesappendjoin)r   BEGINENDgraphsf
split_strsi	split_strend_locslinesr   s              @r   
extract_irr)   	   sQ   E
C
CF	h *1VVXX^^E**
%j11 		* 		*LAyAvvnnS))G"}}(7(#AQU#..004CGGGG1<<<+F+FGGGEMM"''%..))))		** * * * * * * * * * * * * * * Ms   CC==DDinp_typec                 Z   |                                  }|                                 }|                                 }|                                 }|t	          d          |t	          d          |t	          d          |t	          d          t          j        ||||          S )NzFmake_tensor_from_type: 'size' is None (inp_type.sizes() returned None)zJmake_tensor_from_type: 'stride' is None (inp_type.strides() returned None)zImake_tensor_from_type: 'device' is None (inp_type.device() returned None)zGmake_tensor_from_type: 'dtype' is None (inp_type.dtype() returned None))sizestridedevicedtype)sizesstridesr.   r/   AssertionErrortorchempty_strided)r*   r,   r-   r.   r/   s        r   make_tensor_from_typer5      s    >>DF__FNNE|efff~ijjj~hiii}fgggDeTTTTr   irc                    t           j                            | d          }|                                 g }|                                D ]}t          |                                t           j        j                  r)|                    t          j
        dd                     ]t          |                                t           j        j                  r)|                    t          j        dd                     t          |                                t           j        j                  rUt          t           j        j        |                                          }|                    t          |                     =t          |                                t           j        j                  r.|                    t          j        dd          dk               t#          d|                                           t           j                            d|          }t           j                            |j                   ||fS )	NT)parse_tensor_constantsg?d   r   r   z,A default value is not implemented for type forward)r3   _Cparse_irmakeMultiOutputIntoTupleinputs
isinstancetype	FloatTyper   randomuniformIntTyperandint
TensorTyper   r5   BoolTypeNotImplementedError_create_function_from_graph!_jit_pass_erase_shape_informationgraph)r6   rK   r>   inp
tensorTypefuncs         r   load_graph_and_inputsrO   -   s   Hb>>E	""$$$F||~~ c cchhjj%("455 
	cMM&.S112222

EH$455 	cMM&.C001111

EH$788 	ceh1388::>>JMM/
;;<<<<

EH$566 	cMM&.A..!34444%&aUXU]U]U_U_&a&abbb8//	5AAD	H..tz:::&>r   c                 f    t          d| |d          }|                                }|j        dz  S )Nzfn(*inputs))fnr>   )stmtglobals  )r   blocked_autorangemedian)rQ   r>   	test_runsttimess        r   	time_cudarZ   B   s:    =*G*GHHHA!!E<$r   c                     t          j                    }t          |          D ]} | |  t          j                    }||z
  |z  dz  S )NrT   )timeperf_counterrange)rQ   r>   rW   r'   _es         r   time_cpura   G   sV    A9  
FAEY%%r   
      )warmup_runsrW   c                (   t          |           \  }}t          |          D ]} ||  d }|D ].}t          |t          j                  r|j        j        dk    } n/|t          d          |rt          |||          nt          |||          }|S )NcpuzNo tensor found in inputs)
rO   r^   r?   r3   Tensorr.   r@   r2   ra   rZ   )	r6   r>   rd   rW   rK   r_   is_cpuinputouts	            r   run_testrk   N   s    $R((HE1;  vF  eU\** 	\&%/FE	 ~899906
_(5&)
,
,
,IeVU^<_<_CJr   c               /      K   t           j                            d          }	 d V  t           j                            |           d S # t           j                            |           w xY w)NF)r3   r;   _get_graph_executor_optimize)argskwargsold_optimizes      r   no_fuserrq   ^   sa      888??L<--l;;;;;--l;;;;s   A !A)c                 r    t                      5  t          | |          cd d d            S # 1 swxY w Y   d S r   )rq   rk   r6   r>   s     r   run_baseline_no_fusionrt   f   s}    	 $ $F##$ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $s   ,00c                    	 |rdgndg}t           j                            |          }t           j                            d          5  t	          | |          cd d d            t           j                            |           S # 1 swxY w Y   	 t           j                            |           d S # t           j                            |           w xY w)N)DYNAMICrb   )STATICrb   fuser1)r3   jitset_fusion_strategyfuserrk   )r6   r>   dynamicstrat	old_strats        r   run_nncr   k   s   1%,B!!>2BI11%88	Y__X&& 	( 	(B''	( 	( 	( 	( 	( 	( 	( 		%%i0000	( 	( 	( 	( 	( 	( 	( 	( 	( 		%%i00000	%%i0000s0   AB1 BB1 BB1 BB1 1!Cc                     t           j                            d          5  t          | |          cd d d            S # 1 swxY w Y   d S )Nfuser2)r3   ry   r{   rk   rs   s     r   run_nvfuserr   t   s    		"	" $ $F##$ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $s   =AA)
contextlibr   typingr   r   rB   r3   r\   torch.utils.benchmarkr   strlistr)   r;   rF   r5   tuplerO   rZ   ra   floatrk   rq   rt   r   r    r   r   <module>r      s   % % % % % %            ' ' ' ' ' ' c    *UEH$7 U U U Uc eCcN&;    *  
& & & )+b   U      < < <$% $ $ $ $
1E 1 1 1 1$u $ $ $ $ $ $r   