
    ajM                     .   d dl Z d dlmZmZ d dlmZmZmZ d dlm	Z	 ddl
mZ e j        j        Z e	e j        e j        e j        e j        g          Z e	g ej        ej        ej        ej        ej        ej        ej        ej        ej        ej        ej        ej        ej        ej         ej!        ej"        ej#        ej$        ej%        ej&        ej'        ej(        ej)        ej*        ej+        ej,        ej-        ej.        ej/        ej0        ej1                  Z2 e	ej3        ej4        ej5        ej6        ej7        ej8        ej9        ej:        ej;        g	          Z<e2e<z  Z=de>fdZ?d	e j@        deAfd
ZBde>fdZCdS )    N)get_device_tflopsget_gpu_dram_gbps)has_hint	size_hintstatically_known_true)
OrderedSet   )flop_registryreturnc                    | t           v r{t          |          dk    rt          d| d|            |                                }t	          |          dz  }d}||z  }t           |          }	 |	|i |d|idz  }
|
|z  dz  }|S d	S )
aw  
    Estimates the compute time of an aten operator.

    Args:
        func_packet: The operator overload packet.
        args: The arguments to the operator.
        kwargs: The keyword arguments to the operator.
        out: The output of the operator.
        out_dtypes: The output data types.

    Returns:
        float: The estimated compute time in nanoseconds.
    r	   z"Only support single out dtype got z for g  4&kCg      ?out_val   g    eAg        )r
   lenAssertionErrorpopr   )func_packetargskwargsout
out_dtypesdtypepeak_gpu_flopsfactorpeak_empirical_flopsflop_count_func
flop_countcompute_times               d/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/torch/utils/_runtime_estimation.pyget_compute_timer   N   s     m##z??a SZSSkSS     *511D8%6'4$_dBfBBcBBBQF
"%99S@3    tc                    d}t          | j        |                                           D ]K\  }}t          |          rt          |          s dS t	          |dk              s|t          |          z  }L||                                 z  S )z
    Calculates the memory consumption of a tensor.

    Args:
        t (torch.Tensor): The input tensor.

    Returns:
        int: The memory consumption of the tensor in bytes.
    r	   r   )zipshapestrider   r   r   element_size)r!   
real_numelsizer%   s       r   get_num_bytesr)   p   s     JAGQXXZZ00 * *f~~ 	Xf%5%5 	11 %Vq[11 	*)D//)J((((r    c                     t                      }t          d | D                       }t          d |D                       }||z   }||z  }|S )aG  
    Estimates the memory transfer time of input and output tensors.

    Args:
        flat_args_kwargs (List[torch.Tensor]): The flat list of arguments and keyword arguments.
        flat_outs (List[torch.Tensor]): The flat list of outputs.

    Returns:
        float: The estimated memory transfer time in nanoseconds.
    c              3   h   K   | ]-}t          |t          j                  t          |          V  .d S N
isinstancetorchTensorr)   .0r!   s     r   	<genexpr>z$get_transfer_time.<locals>.<genexpr>   sO        jEL6Q6Qa     r    c              3   h   K   | ]-}t          |t          j                  t          |          V  .d S r,   r-   r1   s     r   r3   z$get_transfer_time.<locals>.<genexpr>   sO        z!U\/J/Ja     r    )r   sum)flat_args_kwargs	flat_outsgpu_memory_bandwidth
read_byteswrite_bytescounted_bytestransfer_times          r   get_transfer_timer=      s     -..  "2    J   "+    K ,M!$88Mr    )Dr/   torch._inductor.utilsr   r   %torch.fx.experimental.symbolic_shapesr   r   r   torch.utils._ordered_setr   flop_counterr
   opsatenfloat16bfloat16float32float64_FLOAT_TYPES
lift_freshr!   	transposeviewdetach_unsafe_viewsplitadjoint
as_strideddiagonalexpand	expand_asmovedimpermuteselectsqueezemTmHrealimagview_as	unflattenunfoldunbind	unsqueezevsplithsplitsplit_with_sizesswapaxesswapdimschunk	_VIEW_OPSrandintrandnrand
randn_like	rand_likerandint_likearange	ones_like
zeros_like_CREATE_OPS_IGNORE_OPSfloatr   r0   intr)   r=    r    r   <module>rv      s    F F F F F F F F         
 0 / / / / / ' ' ' ' ' ' y~z	  J    	  			 
 	  	  	
  	  	  	  	  	  	  	  	   	! " 	# $ 	% & 		' ( 		) * 	+ , 	- . 	/ 0 	1 2 	3 4 	5 6 	7 8 	9 : 	; < 	= > 	
? " "	H j
	
  +%E    D)U\ )c ) ) ) ),e      r    