
    Wj\9                       U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZ d dlmZ d dlmZ d dlmZ erd d	lmZ d d
lmZ d dlmZ d ad ad ag aded<   g Zded<   d a d a!ej         G d d                      Z"g a#ded<   d a$d a%d a&d a'ded<   d a(d a)dSdZ*e G d d                      Z+dTdZ, G d d          Z-i Z.ded <   e G d! d"                      Z/e/0                    d#g d$           e/0                    d%g d&           e/0                    d'g d(           e/0                    d)g d*           e/0                    d+g d,           dUd/Z1dVd1Z2dWd4Z3dXd5Z4dYd7Z5dVd8Z6dZd:Z7d[d=Z8d\d?Z9d]dBZ:dSdCZ;d^dEZ<e	d_dG            Z=d`dJZ>dadKZ?e/0                    dLg dM           dbdRZ@dS )c    )annotationsN)	dataclass)	lru_cache)OptionalTYPE_CHECKINGUnion)config)get_benchmark_name)
OrderedSet)Callable)Config)BaseSchedulerNodez#list[tuple[BaseSchedulerNode, int]]nodes_num_elemz%list[tuple[BaseSchedulerNode, float]]node_runtimesc                  (    e Zd ZU ded<   dZded<   dS )CppOuterLoopFusedCountintinner_kernel_numberr   local_buffer_numberN)__name__
__module____qualname____annotations__r        \/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/torch/_inductor/metrics.pyr   r   +   s0                r   r   zlist[CppOuterLoopFusedCount]!cpp_outer_loop_fused_inner_countsr   num_auto_chunkingreturnNonec                     da dadat                                           t
                                           dadat                                           da	da
dadadadad S )Nr   )generated_kernel_countgenerated_cpp_vec_kernel_countnum_bytes_accessedr   clearr   ir_nodes_pre_fusioncpp_to_dtype_countr   num_comprehensive_padding)num_matches_for_scatter_upon_const_tensornum_loop_reorderingparallel_reduction_countcodegen_mix_order_reductionr   r   r   r   resetr-   A   s     %&"%++--- !01- "#r   c                  P    e Zd ZU dZded<   ded<   ded<   ded<   ded<   ded<   d	S )
CachedMetricsDeltasz]
    The subset of metrics we want update across cache hits, e.g., the
    FxGraphCache.
    r   r"   r#   r&   r'   r$   r)   N)r   r   r   __doc__r   r   r   r   r/   r/   _   sf          
  ''''222222r   r/   	list[str]c                 H    d t          j        t                    D             S )Nc                    g | ]	}|j         
S r   name).0fields     r   
<listcomp>z%get_metric_fields.<locals>.<listcomp>o   s    LLL5EJLLLr   )dataclassesfieldsr/   r   r   r   get_metric_fieldsr;   n   s"    LLK$67J$K$KLLLLr   c                  :    e Zd ZdZd
dZddZedd            Zd	S )CachedMetricsHelperz
    A helper class to help calculate and apply counter deltas for those
    metrics we want to save with cache entries (e.g., FxGraphCache) and
    apply on a cache hit.
    r   r    c                n    i | _         t                      D ]}t                      |         | j         |<   d S N)cached_metricsr;   globals)selfmetrics     r   __init__zCachedMetricsHelper.__init__y   sB     ')) 	< 	<F*1))F*;D''	< 	<r   r/   c                    i }t                      D ]'}t                      |         | j        |         z
  ||<   (t          di |S )Nr   )r;   rA   r@   r/   )rB   delta_metricsrC   s      r   
get_deltaszCachedMetricsHelper.get_deltas~   sU    ')) 	T 	TF$+IIf$58KF8S$SM&!!"33]333r   deltac                |    t                      D ],}t                      |xx         t          | |          z  cc<   -d S r?   )r;   rA   getattr)rH   rC   s     r   apply_deltasz CachedMetricsHelper.apply_deltas   sN    ')) 	8 	8FIIf!7!77	8 	8r   Nr   r    )r   r/   )rH   r/   r   r    )r   r   r   r0   rD   rG   staticmethodrK   r   r   r   r=   r=   r   sf         < < < <
4 4 4 4 8 8 8 \8 8 8r   r=   zdict[str, MetricTable]REGISTERED_METRIC_TABLESc                  j    e Zd ZU ded<   ded<   dZded<   ddZddZddZddZe	dd            Z
dS )MetricTablestr
table_namer1   column_namesr   r   num_rows_addedrow_fn4Callable[[], dict[str, Optional[Union[str, float]]]]r   r    c                   | j         t                      vrd S  |            t          | j                  t                    k    s.J t          | j                   dt                                 t	          | j                  t	                                                    k    s@J t	          | j                   dt	                                                                 t                      }|gfd| j        D             z   }t          d |D                       sJ |                     |           d S )Nz v.s. c                     g | ]
}|         S r   r   )r6   column_namerow_dicts     r   r8   z'MetricTable.add_row.<locals>.<listcomp>   s    QQQh{+QQQr   c           	   3  j   K   | ].}t          |t          t          t          d           f          V  /d S r?   )
isinstancerQ   floattype)r6   is     r   	<genexpr>z&MetricTable.add_row.<locals>.<genexpr>   s9      HHq:a#ud4jj!9::HHHHHHr   )	rR   enabled_metric_tableslenrS   r   keysr
   all
_write_row)rB   rU   bnrowrZ   s       @r   add_rowzMetricTable.add_row   sD    ?"7"9"999F6884$%%X6664$%%<<S]]<< 766 $+,,
8==??0K0KKKK$+,,QQJx}}4O4OQQ LKK  !!dQQQQt?PQQQQHHCHHHHHHHHr   c                    d| j          dS )Nmetric_table_z.csv)rR   )rB   s    r   output_filenamezMetricTable.output_filename   s    4t4444r   c                    |                                  }t          |d          5 }t          j        |d          }|                    dg| j        z              d d d            d S # 1 swxY w Y   d S )Nw
lineterminator
model_name)rk   opencsvwriterwriterowrS   )rB   filenamefdrt   s       r   write_headerzMetricTable.write_header   s    ''))(C   	@BZ4888FOO\NT->>???	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@s   5A''A+.A+rg   list[str | float | None]c                   |                                  }| j        dk    r3t          j                            |          s|                                  | xj        dz  c_        t          |          D ]+\  }}t          |t                    r|d}n|d}n|}|||<   ,t          |d          5 }t          j        |d          }|                    |           d d d            d S # 1 swxY w Y   d S )Nr      z.6f arn   ro   )rk   rT   ospathexistsrx   	enumerater\   r]   rr   rs   rt   ru   )rB   rg   rv   idxorig_valnew_valrw   rt   s           r   re   zMetricTable._write_row   sF   ''))!##BGNN8,D,D#q &s^^ 	 	MC(E** #%++!"CHH(C   	!BZ4888FOOC   	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	!s   .,C''C+.C+r5   c                :    t          | |          }|t          | <   d S r?   )rP   rN   )r5   rS   tables      r   register_tablezMetricTable.register_table   s"    D,//). &&&r   N)rU   rV   r   r    )r   rQ   rL   )rg   ry   r   r    )r5   rQ   rS   r1   r   r    )r   r   r   r   rT   rh   rk   rx   re   rM   r   r   r   r   rP   rP      s         OOON   (5 5 5 5@ @ @ @! ! ! !( / / / \/ / /r   rP   slow_fusion)kernel1_pathkernel1_latencykernel2_pathkernel2_latencyfused_kernel_pathfused_kernel_latencyslow_down_ratiograph_stats)graph_idnum_nodes_before_fusionnum_nodes_after_fusionpersistent_red_perf)
kernel0_pathr   r   kernel3_pathkernel0_latencyr   r   kernel3_latency
size_hintsreduction_hint'fusion_failure_due_to_indexing_mismatch)pre_grad_graph_idpost_grad_graph_id
node1_name
node2_namenode1_debug_strnode2_debug_strcommon_buffer_namesfailure_reasonkernel_metadatakernel_namekernel_pathkernel_categoryr   r   line_of_codenum_load	num_storenum_for_loopnum_atomic_addnum_argsxnumelynumelrnumelkernel_args_num_gbkernel_module_coderQ   c                    ddl m} ddlm} |                    |           } ||          }t          j        |j        j                  S )z
    The kernel_module_code is the python module that contains kernel function code.
    kernel function is the proper triton kernel function annotated with
    @triton.jit
    r{   )PyCodeCache)get_triton_kernel)	codecacher   wrapper_benchmarkr   loadinspect	getsourcefn)r   r   r   modkernels        r   _parse_kernel_fn_coder      sc     '&&&&&444444


-
.
.Cs##F VY\***r   proper_kernel_fn_codec                D    t          |                                           S )zJ
    Return the line of code for the kernel excluding the decorators.
    )rb   
splitlines)r   s    r   _parse_kernel_line_of_coder   0  s     $//11222r   r   Optional[str]c                ~    |dk    rd S t          j        d|           }|s
J d            |                    d          S )Nforeachzsize_hints=(\[[0-9, ]*\]),zsize_hints missing!r{   researchgroup)r   r   ms      r   _parse_size_hintsr   7  sI    )##t
	/1CDDA#####1771::r   c                z    | dvrd S t          j        d|          }|s
J d            |                    d          S )N)	reductionpersistent_reductionz$reduction_hint=ReductionHint\.(\w*),z/reduction_hint not found in kernel source code!r{   r   )r   r   r   s      r   _parse_reduction_hintr   @  sL     CCCt
	9;MNNA?????1771::r   patternc                ,    |                      |          S r?   )count)r   r   s     r   _count_patternr   J  s     &&w///r   c                   |                                  d         }|                    d          sJ |                    d          }|                    d          }||dz   |         }|                    d          }t	          |          S )Nr   def (z):r{   ,)r   
startswithindexsplitrb   )r   def_line	start_idxend_idxdecl_csvcompss         r   _count_argsr   N  s    $//11!4Hv&&&&&s##InnT""G	A/0HNN3Eu::r   kernel_fn_codec                @    |                      d          }| |d         S )z
    Skip decorators.
    r   N)r   )r   	start_poss     r   _parse_proper_kernel_fn_coder   X  s&     $$V,,I)**%%r   numel_arg_nameOptional[int]c                ~    t          j        | d|           }|r"t          |                    d                    S d S )Nz
 = ([\d]+)r{   )r   r   r   r   )r   r   r   s      r   _parse_numelr   `  s@    
	^0002GHHA 1771::tr   Optional[float]c                z    t          j        d|           }|r"t          |                    d                    S 	 dS )z
    inductor meta looks like:
        inductor_meta={... 'mutated_arg_names': [], 'no_x_dim': False, 'kernel_num_gb': 2.0},
    z.kernel_num_gb.:\s*([0-9.]+)r{   N)r   r   r]   r   )r   r   r   s      r   _parse_kernel_args_num_gbr   h  sB     		1>BBA 
QWWQZZ   	 tr   r   r   c           
     *   	 ddl m}  ||          t          |          t          |          	t	          |          t                    t                    t          d                               	fd           dS )z
    An utility to log kernel metadata. We may parse metadata from kernel source code here.

    It's fine to parse the generated kernel code here since the logging is
    disabled by default. It would hurt compilation time.
    r{   )"get_kernel_category_by_source_coder   c                 "    t          d          t          d          t          d          t          d          t                    t          d          t          d          t          d          t                     dS )	Nztl.loadztl.storezfor ztl.atomic_addr   r   r   r   )r   r   r   r   )r   r   kernel_line_of_coder   r   r   r   r   s   r   <lambda>z%log_kernel_metadata.<locals>.<lambda>  s    &&.$,/&'<iHH'(=zJJ*+@&II,-BOTT#$9::"#8(CC"#8(CC"#8(CC";# #
 
 r   N)	r   r   r   r   r   r   r   get_metric_tablerh   )
r   r   r   r   r   r   r   r   r   r   s
   ``  @@@@@@r   log_kernel_metadatar   }  s     FEEEEE889KLLO*?<NOON"#5GGJ*+=>>N8HH 55JKK&''//	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
    r   c                    t                                           D ]p\  } }| t                      v r[|                                }t          j                            |          rt	          j        |           |                                 qdS )z
    Purge the old log file at the beginning when the benchmark script runs.
    Should do it in the parent process rather than the child processes running
    each individual model.
    N)	rN   itemsra   rk   r~   r   r   unlinkrx   )r5   r   rv   s      r   purge_old_log_filesr     s     05577 ! !e(****,,..Hw~~h'' $	(###   ! !r   OrderedSet[str]c                 4    t          t          j                  S r?   )enabled_metric_tables_implr	   ra   r   r   r   ra   ra     s    %f&BCCCr   
config_strc                    t                      }|                     d          D ]E}|                                }|s|t          v sJ d| d            |                    |           F|S )Nr   zMetric table name z is not registered)r   r   striprN   add)r   enabledr5   s      r   r   r     s    )||G  %%  zz|| 	////9999 0// 	DNr   r5   boolc                "    | t                      v S r?   )ra   r4   s    r   is_metric_table_enabledr    s    (****r   c                J    | t           v sJ d|  d            t           |          S )NzMetric table z is not defined)rN   r4   s    r   r   r     s3    ++++-RT-R-R-R+++#D))r   kernel_autotuner   r   triton_config
latency_msr	   r   latencyr]   c                ^     t          d                               fd           d S )Nr  c                 ,    t                     dS )Nr  )rQ   )r	   r   r   r  s   r   r   z,log_kernel_autotune_result.<locals>.<lambda>  s!    && [[!	
 
 r   )r   rh   )r   r   r	   r  s   ````r   log_kernel_autotune_resultr    sV     &''//	
 	
 	
 	
 	
 	
 	
    r   rL   )r   r1   )r   rQ   r   rQ   )r   rQ   r   r   )r   rQ   r   rQ   r   r   )r   rQ   r   rQ   r   r   )r   rQ   r   rQ   r   r   )r   rQ   r   rQ   )r   rQ   r   rQ   r   r   )r   rQ   r   rQ   r   r   )r   rQ   r   rQ   r   rQ   r   r    )r   r   )r   rQ   r   r   )r5   rQ   r   r   )r5   rQ   r   rP   )
r   rQ   r   rQ   r	   r   r  r]   r   r    )A
__future__r   rs   r9   r   r~   r   r   	functoolsr   typingr   r   r   torch._inductorr	   torch._inductor.utilsr
   torch.utils._ordered_setr   collections.abcr   %torch._inductor.runtime.triton_compatr   torch._inductor.schedulerr   r"   r#   r$   r   r   r   r&   r'   r   r   r(   r)   r*   r   r+   r,   r-   r/   r;   r=   rN   rP   r   r   r   r   r   r   r   r   r   r   r   r   ra   r   r  r   r  r   r   r   <module>r     s   " " " " " " " 



      				 				 ! ! ! ! ! !       1 1 1 1 1 1 1 1 1 1 " " " " " " 4 4 4 4 4 4 / / / / / /  <((((((<<<<<<;;;;;;  !"         8: 9 9 9 9     ! ! ! ! ! ! ! ! CE ! D D D D ,- )            < 3 3 3 3 3 3 3 3M M M M8 8 8 8 8 8 8 82 46  5 5 5 5 :/ :/ :/ :/ :/ :/ :/ :/z                     "   -	 	 	         6+ + + + 3 3 3 3      0 0 0 0   & & & &      *) ) ) )X! ! ! !D D D D 
 
 
 
+ + + +* * * *
       
 
 
 
 
 
r   