
    Mjn                        d dl mZmZmZmZmZ d dlZd dlmZ ddl	m
Z ddlmZmZmZmZmZmZ d dlmZmZ d dlmZmZ  ed	          Zefd
edeg ef         fdZd ZddZde_          G d d          Z! G d dee                   Z"dS )    )SequenceListTypeVarTupleCallableN)TritonSemantic   )_core)
AutoLayoutDistributedLayoutDistributedLinearLayoutSliceLayoutSharedLayoutCoalescedLayout)GluonOpBuildercompute_tmem_reg_layout)flatten_values_to_irunflatten_ir_valuesTensorTycondmsg_fnc                 0    | s | |                      d S N )r   r   categorys      q/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/triton/experimental/gluon/language/_semantic.py_checkr      s*     !hvvxx   ! !    c                 ^    t          | t                    ot          d | D                       S )Nc              3   @   K   | ]}t          |t                    V  d S r   
isinstanceint).0is     r   	<genexpr>z_is_int_list.<locals>.<genexpr>   s,      .Q.Qaz!S/A/A.Q.Q.Q.Q.Q.Qr   )r"   r   allvalues    r   _is_int_listr*      s.    eX&&Q3.Q.Q5.Q.Q.Q+Q+QQr   c                    t          t          t                    d            t          dv fd           t          t          t                    fd           t          dk    odz
  z  dk    d            t	                    t          t          d	 D                       fd
           t                    }t          |dk    d            |g }dk    }|rdn|r'|D ]$}t          t          |          |k    d            %t          | ||          t          d ufd           |rd         }	j        sSt          j	        d         d|	dz  gk    fd           j        
                    d|	dz  g           ddgj	        d<   nj        d         d|	dz  gk    r| j        dt          j                  z  }
t          |
dz  k    fd           j        }dD ]O}t          |          }t          |          D ]-\  }}|d|	dz  gk    r||         |d         c|d<   ||<   c c S .PJ d|             S )Nc                      dS )Nzinstr_variant must be a stringr   r   r   r   <lambda>z*_compute_tmem_reg_layout.<locals>.<lambda>   s    3S r   )32x32b16x64b16x128b16x256b16x32bx232x32b_splitnc                      d  S )Nzunknown instr_variant: r   )instr_variants   r   r-   z*_compute_tmem_reg_layout.<locals>.<lambda>   s    <]<< r   c                  (    dt                     S )Nz!num_warps must be an int but got type	num_warpss   r   r-   z*_compute_tmem_reg_layout.<locals>.<lambda>   s    /fSWXaSbSb/f/f r      r	   r   c                      dS )Nz)num_warps must be a power of two and >= 4r   r   r   r   r-   z*_compute_tmem_reg_layout.<locals>.<lambda>   s    Ju r   c              3   @   K   | ]}t          |t                    V  d S r   r!   )r$   dims     r   r&   z+_compute_tmem_reg_layout.<locals>.<genexpr>   s,      55z#s##555555r   c                      d  S )Nz#shape entries must be ints but got r   shapes   r   r-   z*_compute_tmem_reg_layout.<locals>.<lambda>   s    ?lej?l?l r      c                      dS )Nzexpected a 2D tensorr   r   r   r   r-   z*_compute_tmem_reg_layout.<locals>.<lambda>   s    4 r   r3   r.   c                      dS )Nzcga_layout basis rank mismatchr   r   r   r   r-   z*_compute_tmem_reg_layout.<locals>.<lambda>(   s    /O r   c                      d  d d S )NzTMEM layout 'z' unsupported for shape z and num_warps r   )atom_variantr:   rA   s   r   r-   z*_compute_tmem_reg_layout.<locals>.<lambda>3   s!    j<jjjj_hjj r   c                      d  S )NzJsplitn with 1 register requires the last lane basis to be [0, N / 2]. Got r   )
layout_objs   r   r-   z*_compute_tmem_reg_layout.<locals>.<lambda>;   s    thrtt r       c                      dd z   d  dS )NzETo be able to `tmem.load` into `tl.split` you need to have more than rJ    z-bit registers, as you need to use the instruction 32x32b.x1 twice. You can always load into instr_variant="32x32b" and then convert_layout to this layout otherwise.r   )bitwidths   r   r-   z*_compute_tmem_reg_layout.<locals>.<lambda>B   s3     3]h3] 3])13] 3] 3] r   )
lane_bases
warp_basesFz6splitn requires at least one basis of [0, N / 2]. Got )r   r"   strr#   listr'   lenr   	reg_basesrN   appendprimitive_bitwidthgetattr	enumerate)
element_tyrA   layoutr:   r5   
cga_layoutranksplitnbasisNnum_regrS   	bases_strbasesr%   rF   rM   rI   s    ` ``          @@@r   _compute_tmem_reg_layoutrb      sV   
:mS))+S+STTT
=cc<<<<> > >
:i%%'f'f'f'fggg
9>@yIM:q@BuBuvvvKKE
355u555557l7l7l7lmmmu::D
41944555
o-F%888=L Q 	Q 	QE3u::%'O'OPPPP( J :T!jjjjjjl l l  \!H# 	\ :(,AF;ttttv v v ''AF444)*AJ!"%%!"%!Q!V44!4HZ1222G".( +] +] +] +]^ ^ ^ #,I9 * *	
I66 )% 0 0 * *HAuAF++27(IbM/	"uQx)))))) ,* \[SY[[[[5r   Tc                   &    e Zd ZdefdZd Zd ZdS )GluonCallerContextr:   c                     || _         d S r   r9   )selfr:   s     r   __init__zGluonCallerContext.__init__W   s    "r   c                     d| j          S )N_NWr9   rf   s    r   manglezGluonCallerContext.mangleZ   s    %T^%%%r   c                 b    |                     d|                    | j                             d S )Nzttg.num-warps)set_attrget_int32_attrr:   )rf   fnbuilders      r   initialize_calleez$GluonCallerContext.initialize_callee]   s,    
OW%;%;DN%K%KLLLLLr   N)__name__
__module____qualname__r#   rg   rk   rq   r   r   r   rd   rd   U   sR        ## # # # #& & &M M M M Mr   rd   c            
           e Zd ZU ej        ZeZeed<   defdZd Z	d Z
dee         dee         fdZded	ed
efdZdeded
ef fdZded
eeef         f fdZdedee         d
ef fdZdedee         d
efdZdeded
ef fdZ fdZdedee         def fdZd Zd Zd?dZd Zd  Zd! Zd" Zd# Z d$ Z!d% Z"d& Z#d' Z$d( Z%d) Z&d* Z'd+ Z(e)d,             Z*d-e+e         d	ed.ed
eed/f         fd0Z,d-e+e         d	ed
eed/f         fd1Z-ded2ed3ed
efd4Z.dededed
efd5Z/d6ed7ed	ed
efd8Z0d6ed
efd9Z1d:e+e         d;e+e         fd<Z2d= Z3d> Z4 xZ5S )@GluonSemanticrp   c                     || _         d S r   )rp   )rf   rp   s     r   rg   zGluonSemantic.__init__g   s    r   c                     |g k    r|}n.t          j        ||| j                            |                    }|                     ||          S r   )ttgldistributed_typerp   get_gluon_layout_from_tensortensor)rf   handle	scalar_tyrA   tys        r   _wrap_handle_infer_layoutz'GluonSemantic._wrap_handle_infer_layoutj   sK    B;;BB&y%9b9bci9j9jkkB{{62&&&r   c                 X    |                      |j        |j        j        |j                  S r   )r   r}   r8   scalarrA   )rf   r|   s     r   _wrap_tensor_infer_layoutz'GluonSemantic._wrap_tensor_infer_layoutq   s%    --fmV[=OQWQ]^^^r   	lhs_shape	rhs_shapec                    t          |          t          |          k    rt          d| d|           g }t          |          D ]\  }}||         }|dk    r|                    |           )|dk    s||k    r|                    |           Kt          dt	          |          z   dz   t	          |          z   dz   t	          |          z             |S )N!Cannot broadcast, rank mismatch: , r	   z?Cannot make_shape_compatible: incompatible dimensions at index : z and )rR   
ValueErrorrW   rT   rP   )rf   r   r   	ret_shaper%   leftrights          r   _broadcast_shapeszGluonSemantic._broadcast_shapest   s   y>>S^^++YYYiYYZZZ	 ++ 	a 	aGAtaLEqyy  ''''1**%4--  &&&&  "-/21vv"68<"=?B4yy"IKR"SUXY^U_U_"` a a ar   inputaxisreturnc                 b   d j         D             }|                    d           dk     rt          j                   z  t          t	          j        t          j                  fd           j        j        t          t	          t          t          t          f          fd           t          t	          t          t          f          p
j        k    fd           | j                            j                  }|                     |j        j        |          S )Nc                 6    g | ]}t          j        |          S r   )ry   _unwrap_if_constexprr$   xs     r   
<listcomp>z-GluonSemantic.expand_dims.<locals>.<listcomp>   s#    GGGaT.q11GGGr   r	   r   c                      d j         S Nz=expected expand_dims input to be a distributed_type but got: r7   r   s   r   r-   z+GluonSemantic.expand_dims.<locals>.<lambda>       eW\Waee r   c                      d  S )Nz;expected expand_dims input to have a SliceLayout, but got: r   rY   s   r   r-   z+GluonSemantic.expand_dims.<locals>.<lambda>   s    ]U[]] r   c                      d  dj          S )Nz7expected expand_dims input layout to be sliced in axis z	 but got r>   )r   rY   s   r   r-   z+GluonSemantic.expand_dims.<locals>.<lambda>   s    idii]c]gii r   )rA   insertrR   r   r"   r8   ry   rz   rY   r   r   r   r>   rp   create_expand_dimsr}   r   r   )rf   r   r   	dst_shaper}   rY   s    ``  @r   expand_dimszGluonSemantic.expand_dims   s7   GG5;GGG	q!!!!88C$$$Dz%*d&;<<eeee	g 	g 	g"z&;
O"LMM]]]]	_ 	_ 	_v
O<==StASiiiii	k 	k 	k 00tDD--fej6GSSSr   abc                     |                      ||          \  }}t          |j        g k    d            t                                          ||          }|                     |          S )Nc                      dS )NzCannot join scalars in gluonr   r   r   r   r-   z$GluonSemantic.join.<locals>.<lambda>   s    &D r   )broadcast_impl_valuer   rA   superjoinr   )rf   r   r   r)   	__class__s       r   r   zGluonSemantic.join   sa    ((A..1qw"}DDEEEQ""--e444r   c                     t                                          |          \  }}|                     |          |                     |          fS r   )r   splitr   )rf   r   lhsrhsr   s       r   r   zGluonSemantic.split   sD    77==##S--c22D4R4RSV4W4WWWr   dimsc                 r    t                                          ||          }|                     |          S r   )r   permuter   )rf   r   r   r)   r   s       r   r   zGluonSemantic.permute   s.    t,,--e444r   rA   c                    t          t          j        t          j                  fd           j                                        t          t                    t                    k    fd           k    rS t                    D ];\  }}|         |k    r*|dk    r$t          d|          d| d| d d 
          <t          j        j        j	        j        j
                  }| j                            j        |                    | j                            }|                     ||          S )	Nc                      d j         S r   r7   r   s   r   r-   z4GluonSemantic.broadcast_impl_shape.<locals>.<lambda>   r   r   c                      d d  S )Nr   r   r   )rA   	src_shapes   r   r-   z4GluonSemantic.broadcast_impl_shape.<locals>.<lambda>   s    5mYb5m5mfk5m5m r   r	   z3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension r   r   )r   r"   r8   ry   rz   get_block_shapesrR   rW   r   r   rY   rp   create_broadcastr}   to_irr|   )rf   r   rA   r%   itemret_tyr}   r   s    ``    @r   broadcast_impl_shapez"GluonSemantic.broadcast_impl_shape   sy   z%*d&;<<eeee	g 	g 	gJ//11	s9~~U+-m-m-m-m-mnnnIL ++ 	@ 	@GAtQx4DAII  "?W\]^W_ "? "?CG"? "?%&"? "?*3"? "?7<"? "? @ @ @ &uz'8%ARSS..u|V\\$,=W=WXX{{66***r   r   r   c                   	 |j         |j         	                                r	                                s"t                                          ||          S t	          t          t          j                  fd           t	          t          	t          j                  	fd                                           }	                                }| 	                    ||          }t          j
        t                    }t          	j
        t                    }|r|s|                     |	j
                  }nO|r|s|                     |j
                  }n/j
        	j
        k    rt          dj
         d	j
                   |                     ||          }|                     ||          }||fS )Nc                      d S )Nz@expected broadcast left input to be a distributed_type but got: r   )lhs_tys   r   r-   z4GluonSemantic.broadcast_impl_value.<locals>.<lambda>   s    dZ`dd r   c                      d S )NzAexpected broadcast right input to be a distributed_type but got: r   )rhs_tys   r   r-   z4GluonSemantic.broadcast_impl_value.<locals>.<lambda>   s    e[aee r   zLayout mismatch in broadcast: z vs )r8   is_blockr   r   r   r"   ry   rz   r   r   rY   r   set_auto_layoutr   r   )rf   r   r   r   r   r   is_lhs_autois_rhs_autor   r   r   s           @@r   r   z"GluonSemantic.broadcast_impl_value   s      	:(9(9 	:77//S999z&$"788dddd	f 	f 	fz&$"788eeee	g 	g 	g ++--	++--	**9i@@	 
;; 
;; 	b{ 	b&&sFM::CC 	b 	b&&sFM::CC]fm++`fm``QWQ^``aaa''Y77''Y77Cxr   c                     ||z
  g}|t                      }t          j        t          j        ||          }t	                                          |||          S )N)r   )r   ry   rz   int32r   arange)rf   startendrY   rA   r   r   s         r   r   zGluonSemantic.arange   sN    u>\\F&tz5&AAww~~eS~888r   r   can_reorderc                     t          | d            t                                          |||          }|                     |          S )Nc                      dS )Nz%can_reorder is not supported in gluonr   r   r   r   r-   z'GluonSemantic.reshape.<locals>.<lambda>   s    (O r   )r   r   reshaper   )rf   r   r   r   r)   r   s        r   r   zGluonSemantic.reshape   sF    ; O OPPPy+>>--e444r   c                     t          |          dk    r|S t          j        |j        ||          }| j                            |                    | j                  |j                  }t          j        ||          S )Nr   )	rR   ry   rz   dtyperp   create_splatr   r}   r|   )rf   r)   rA   rY   r   r}   s         r   splatzGluonSemantic.splat   se    u::??L&u{E6BB**6<<+E+Eu|TT{66***r   c                 |    |                      ||          }|t                      }|                     |||          S r   )make_scalarr   r   )rf   rA   r)   r   rY   r   s         r   fullzGluonSemantic.full   s;    !!%//>\\Fzz&%000r   Fc                    |j         t          t          t          j                  fd           t          t          t          j                  fd           t          j        j        j                  }|                    | j	                  }|rw| j	        
                    ||j                  sWt          dj         d d|                     j        j                   d|                     j                             | j	                            ||j                  }t          j        ||          S )Nc                      d S )Nz@expected convert_layout input to be a distributed_type but got: r   )r   s   r   r-   z.GluonSemantic.convert_layout.<locals>.<lambda>   s    `Z\`` r   c                      d  S Nz4expected 'layout' to be a DistributedLayout but got r   r   s   r   r-   z.GluonSemantic.convert_layout.<locals>.<lambda>       VfVV r   zlayout conversion from z to z) is not trivial.
The linear layouts are:

)r8   r   r"   ry   rz   r   rX   rA   r   rp   is_convert_layout_trivialr}   	TypeErrorrY   to_linear_layoutcreate_convert_layoutr|   )rf   r)   rY   assert_trivialr   	ret_ty_irr}   r   s     `    @r   convert_layoutzGluonSemantic.convert_layout   si   Zz"d344````	b 	b 	bz&$"899VVVV	X 	X 	X&r}bhGGLL..	 	J$,"H"HTYT`"a"a 	J Ibi I IV I I8<8M8MbiY[Ya8b8bI I#44VRXFFI I J J J 33Iu|LL{66***r   c                 ,   t          t          t          j                  fd           t          t	                    fd           t          t          t          j                  fd           t          j                  }|9| j                            |	                    | j                  |j
                  }n2| j                            |	                    | j                            }t          j        |          S )Nc                      d  S )Nz,expected 'element_ty' to be a dtype but got r   )rX   s   r   r-   z/GluonSemantic.allocate_shared.<locals>.<lambda>   s    ;vjt;v;v r   c                      d  S Nz1all elements of 'shape' must be integers but got r   r@   s   r   r-   z/GluonSemantic.allocate_shared.<locals>.<lambda>       ,g`e,g,g r   c                      d  S Nz/expected 'layout' to be a SharedLayout but got r   r   s   r   r-   z/GluonSemantic.allocate_shared.<locals>.<lambda>       QQQ r   )r   r"   ry   r   r*   r   shared_memory_descriptor_typerp   create_local_allocr   r}   shared_memory_descriptor)rf   rX   rA   rY   r)   r   r}   s    ```   r   allocate_sharedzGluonSemantic.allocate_shared   s    z*dj113v3v3v3vwww|E""$g$g$g$ghhhz&$"344QQQQ	S 	S 	S/
E65QQ\44RXXdl5K5KU\ZZFF\44RXXdl5K5KLLF,VZPUVVVr   c                 4   t          t          t          j                  fd           t          j        |j        |j                  }| j                            |	                    | j                  |j
                  }t          j        ||          S )Nc                      d  S r   r   r   s   r   r-   z+GluonSemantic.shared_load.<locals>.<lambda>  r   r   )r   r"   ry   r   rz   r   rA   rp   create_local_loadr   r}   r|   )rf   mem_descrY   r   r}   s     `  r   shared_loadzGluonSemantic.shared_load  s    z&$"899VVVV	X 	X 	X&x~x~vNN//T\0J0JHO\\{66***r   c                 2   t          t          t          j                  fd           t          j        j        k    fd           t          j        j        k    fd           | j                            j        j                   d S )Nc                  (    dt                      S )Nz+expected 'value' to be a tensor, but got a r7   r(   s   r   r-   z,GluonSemantic.shared_store.<locals>.<lambda>  s    7reijoepep7r7r r   c                  *    dj          d j          dS )Nzsource shape z and destination shape  must matchr@   r   r)   s   r   r-   z,GluonSemantic.shared_store.<locals>.<lambda>      fu{ff8>fff r   c                  *    dj          d j          dS )Nzsource dtype z and destination dtype r   r   r   s   r   r-   z,GluonSemantic.shared_store.<locals>.<lambda>  r   r   )	r   r"   ry   r|   rA   r   rp   create_local_storer}   )rf   r   r)   s    ``r   shared_storezGluonSemantic.shared_store  s    z%--/r/r/r/rsssu{hn,fffff	h 	h 	hu{hn,fffff	h 	h 	h''FFFFFr   c                 (   t          |t          j                  st          dt	          |                     t          |t          j                  st          dt	          |                     |j        |j        k    r t          d|j         d|j         d          |j        |j        k    r t          d|j         d|j         d          |j        |j	        t          |j                   d          k    rt          d	|j         d
|j	                   |j                            | j                  }|j                            | j                  }| j                            ||t          |j                  |j        j                  S )NzIbank_conflicts expects the register layout to be a distributed_type, got zTbank_conflicts expects the shared layout to be a shared_memory_descriptor_type, got zregister shape z and shared shape r   z$mismatched dtypes between register (z) and shared (z	) layoutsz,bank_conflicts NYI for subslices. Got shape z and alloc_shape )r"   ry   rz   r   r8   r   rA   r   rX   alloc_shaperR   rY   _to_irrp   get_shared_bank_conflictsrQ   rU   )rf   distr_ty	shared_tyreg_attrshared_attrs        r   bank_conflictszGluonSemantic.bank_conflicts  s   (D$9:: 	nl\`ai\j\jlln n n )T%GHH 	xgklugvgvxx   >Y_,,mx~mmQZQ`mmmnnn8#666yx7JyyZcZnyyy   ?i3S5I5I4I4J4JKKKxyxxajavxx   ?))$,77&--dl;;|55hTRZR`MaMa6>6I6\^ ^ 	^r   c                    t          t          t          t          f          fd           t          |t                    st	          |          }t          j                  t          t          t          f          rt          j	                  S t          j	        | j
                                                | j
                  |                    S )Nc                  (    dt                      S )Nz2Expected a DistributedLayout or SharedLayout, got r7   r   s   r   r-   z0GluonSemantic.to_linear_layout.<locals>.<lambda>1  s    ZDQWLLZZ r   )r   r"   r   r   rQ   ry   r   r   r   	constexprrp   r   r   )rf   rY   rA   s    ` r   r   zGluonSemantic.to_linear_layout/  s    z&#4l"CDDZZZZ	\ 	\ 	\ %&& 	 KKE*622fz+BCDD 	*>&)))~dl;;FMM$,<W<WY^__```r   c                 D    | j                             |j                   d S r   )rp   create_local_deallocr}   )rf   r   s     r   shared_dealloczGluonSemantic.shared_dealloc=  s     ))(/:::::r   c                    j         }t          t          t                    fd           t          t          |j        t
                    fd           | j                                                | j                  j	                  }t          j        |j        |j                  }|                     ||          S )Nc                      d  S )Nz9set_auto_layout must set to a distributed layout but got r   r   s   r   r-   z/GluonSemantic.set_auto_layout.<locals>.<lambda>C  s    [SY[[ r   c                  "    d j         j         S )Nz4set_auto_layout input must have auto layout but got r8   rY   r(   s   r   r-   z/GluonSemantic.set_auto_layout.<locals>.<lambda>E  s    aejN_aa r   )r8   r   r"   r   rY   r   rp   create_set_auto_layoutr   r}   ry   rz   rX   rA   r|   )rf   r)   rY   src_tyr}   res_tys    ``   r   r   zGluonSemantic.set_auto_layout@  s    z&"344[[[[	] 	] 	]z&-44aaaa	c 	c 	c44V]]4<5P5PRWR^__&v'8&,OO{{66***r   c                 2   t          t          t                    fd           t          t          t                    fd           t          t          t                    fd           dg|j        z  }|<   t	          |j                  }|<   |j        }t          j        |j	        |||j
        j                  }| j        }	|	                    |                    |	          |j        |          }
t          j        |
fi |j        S )Nc                      d  S )Nz&expected 'start' to be an int but got r   )r   s   r   r-   z-GluonSemantic.memdesc_slice.<locals>.<lambda>K  s    /_X]/_/_ r   c                      d  S )Nz'expected 'length' to be an int but got r   )lengths   r   r-   z-GluonSemantic.memdesc_slice.<locals>.<lambda>L  s    0bZ`0b0b r   c                      d  S )Nz$expected 'dim' to be an int but got r   r   s   r   r-   z-GluonSemantic.memdesc_slice.<locals>.<lambda>M  s    -YTW-Y-Y r   r   )r   r"   r#   r[   rQ   rA   rY   ry   r   r   r8   r   rp   create_memdesc_subslicer   r}   r   __dict__)rf   r   r   r  r>   offsetsrA   rY   r   rp   r}   s     ```      r   memdesc_slicezGluonSemantic.memdesc_sliceJ  s   z%%%'_'_'_'_```z&#&&(b(b(b(bcccz#s##%Y%Y%Y%YZZZ#%X^$$c
/vx}Ohii,00'1B1BHOU\]],VCCr{CCCr   c                    |                                t          j        t          j        k    fd           |j        dd          }|                                j        |j        }t          j        |j	        |||          }| j
        }|                    |                    |          |j                  }t          j        |fi |j        S )Nc                      d j          S )Nz%expected 'index' to be int32 but got r7   indexs   r   r-   z-GluonSemantic.memdesc_index.<locals>.<lambda>Z  s    1eY^Yc1e1e r   r	   )	to_tensorr   r8   ry   r   rA   r}   rY   r   r   rp   create_memdesc_indexr   r   r  )rf   r   r   rA   rY   r   rp   r}   s     `     r   memdesc_indexzGluonSemantic.memdesc_indexX  s    u%%uzTZ')e)e)e)efffqrr"u%%,/vuUU,--bhhw.?.?RWXX,VCCr{CCCr   c                    t          t                    fd           t          t                    t          j                  k    fd           fdD             }j        j        d t                    j        z
           }|fdD             z  }| j                            j	                  }| j        
                    |          }t          j        |j        |||          S )Nc                      d  S )Nz1all elements of 'order' must be integers but got r   )orders   r   r-   z-GluonSemantic.memdesc_trans.<locals>.<lambda>d  r   r   c                  :    d j          dt                     dS )Nzsource rank (z) and order length (z) must match)r[   rR   )r   r&  s   r   r-   z-GluonSemantic.memdesc_trans.<locals>.<lambda>g  s#    _HM__s5zz___ r   c                 *    g | ]}j         |         S r   r@   )r$   r%   r   s     r   r   z/GluonSemantic.memdesc_trans.<locals>.<listcomp>i  s     222q"222r   c                 Z    g | ]'}t                    j        z
  d          |         (S r   )rR   r[   )r$   r%   r   r   s     r   r   z/GluonSemantic.memdesc_trans.<locals>.<listcomp>l  s9    ]]]RSKK(8(88=(H(I(IJ1M]]]r   rX   rA   r   rY   )r   r*   rR   rA   r8   r   r[   rp   create_memdesc_transr}   get_gluon_layout_from_memdescry   r   r   )rf   r   r&  rA   new_alloc_shaper}   rY   r   s    ``    @r   memdesc_transzGluonSemantic.memdesc_transc  s%   |E""$g$g$g$ghhhJJ#hn---_____	a 	a 	a 3222E222m/%&Gs;'7'7(-'G&GH]]]]]W\]]]]228?EJJ;;FCC,VV[9HQWY Y Y 	Yr   c                    t          t                    fd           t          t          j                  t          j        j                  k    fd           | j                            j                  }| j                            |          }j	        j
        }t          |          j        z
  }|d |         t                    z   }t          j        |j        ||          S )Nc                      d  S r   r   r@   s   r   r-   z/GluonSemantic.memdesc_reshape.<locals>.<lambda>t  r   r   c                      d j          d S )Nz)memdesc_reshape total elements mismatch: z -> r@   )r   rA   s   r   r-   z/GluonSemantic.memdesc_reshape.<locals>.<lambda>w  s&     4 4 4,14 4 r   r*  )r   r*   mathprodrA   rp   create_memdesc_reshaper}   r,  r8   r   rR   r[   rQ   ry   r   r   )rf   r   rA   r}   rY   r   
prefix_lenr-  s    ``     r   memdesc_reshapezGluonSemantic.memdesc_reshapes  s   |E""$g$g$g$ghhhIe	(. 9 995 5 5 5 5	
 	
 	
 44X_eLL;;FCCm/%%5
%kzk2T%[[@,~'
 
 
 	
r   c                    t          t          t          j                  fd           t          t	                    fd           t          t          t          j                  fd           t          j                  }| j                            |	                    | j                  |j
                  }t          j        |fi |j        S )Nc                      d  S )Nz'expected 'dtype' to be a dtype but got r   r   s   r   r-   z3GluonSemantic.memdesc_reinterpret.<locals>.<lambda>  s    6g`e6g6g r   c                      d  S r   r   r@   s   r   r-   z3GluonSemantic.memdesc_reinterpret.<locals>.<lambda>  r   r   c                      d  S r   r   r   s   r   r-   z3GluonSemantic.memdesc_reinterpret.<locals>.<lambda>  r   r   )r   r"   ry   r   r*   r   r   rp   create_memdesc_reinterpretr   r}   r   r  )rf   r   r   rA   rY   r   r}   s     ```  r   memdesc_reinterpretz!GluonSemantic.memdesc_reinterpret  s    z%,,.g.g.g.ghhh|E""$g$g$g$ghhhz&$"344QQQQ	S 	S 	S/ufeLL88$,9O9OQYQ`aa,VCCr{CCCr   c                 d    |rt          j        |||          }n|}|                     ||          S r   )ry   rz   r|   )rf   r   r~   r   rY   r  s         r   wrap_tensorzGluonSemantic.wrap_tensor  s9     	*9iHHFFF{{1f%%%r   c                    | D ]2t          t          j        t          j                  fd           3d | D             d         t          t          fddd          D                       fd           d S )Nc                      d j         S Nz#expected distributed_type but got: r7   )r   s   r   r-   z2GluonSemantic._check_same_layout.<locals>.<lambda>  s    FvlmlrFvFv r   c                 &    g | ]}|j         j        S r   r  r   s     r   r   z4GluonSemantic._check_same_layout.<locals>.<listcomp>  s    ---Q16=---r   r   c              3   $   K   | ]
}|k    V  d S r   r   )r$   ll0s     r   r&   z3GluonSemantic._check_same_layout.<locals>.<genexpr>  s'      00q17000000r   r	   c                      d  S )Nz3Expected inputs to have matching layouts, but got: r   )layoutss   r   r-   z2GluonSemantic._check_same_layout.<locals>.<lambda>  s    VWVV r   )r   r"   r8   ry   rz   r'   )xsrE  rG  r   s    @@@r   _check_same_layoutz GluonSemantic._check_same_layout  s     	x 	xA:afd&;<<>v>v>v>vwwww--"---QZs0000GABBK00000VVVV	X 	X 	X 	X 	Xr   inputsreverse.c                     d         j         j        t                    }| |cxk    r|k     sn J d| d| d            |dk     r||z  }D ]}|j         j        k    s
J d             j                            d D             ||           |                                           sJ t           fdt          t                              D                       S )Nr   z
scan axis z must be < inputs rank ()z(all scan inputs must have the same shapec                     g | ]	}|j         
S r   r}   r$   ts     r   r   z2GluonSemantic.associative_scan.<locals>.<listcomp>  s    +E+E+EAH+E+E+Er   c              3      K   | ]>}                                         |          |         j        j                  V  ?d S r   r   
get_resultr8   r   )r$   r%   rJ  scan_oprf   rA   s     r   r&   z1GluonSemantic.associative_scan.<locals>.<genexpr>  sa       ) ) **7+=+=a+@+@&).BWY^__) ) ) ) ) )r   )r8   rA   rR   rp   create_scanverifytuplerange)	rf   rJ  r   region_builder_fnrK  r[   rQ  rU  rA   s	   ``     @@r   associative_scanzGluonSemantic.associative_scan  sK   q	$5zzu####t#####%W$%W%WPT%W%W%W###!88DLD 	U 	UA6<5(((*T((((,**+E+Ef+E+E+EtWUU'"""~~ ) ) ) ) ) ) )3v;;'') ) ) ) ) 	)r   c                 z    t           fdD                       dd         j        j        t                    t	          dcxk    ok     nc fd                                           fdt                    D             t          fdD                       s
J d             j        	                    d D                        |           
                                sJ t           fdt          t                              D                       S )	Nc              3   ^   K   | ]'}                     ||j        j        gd           V  (dS )F)r   N)r   numelr)   )r$   rQ  rf   s     r   r&   z*GluonSemantic.reduction.<locals>.<genexpr>  s<      __ST4<<AGM?<NN______r   r   c                      d d  S )Nz/expected reduction axis to be in the range [0, z
) but got r   r   r[   s   r   r-   z)GluonSemantic.reduction.<locals>.<lambda>  s    )q[_)q)qko)q)q r   c                 &    g | ]\  }}|k    |S r   r   )r$   r%   sr   s      r   r   z+GluonSemantic.reduction.<locals>.<listcomp>  s"    AAA41aqDyyQyyyr   c              3   8   K   | ]}|j         j        k    V  d S r   )r8   rA   )r$   rQ  rA   s     r   r&   z*GluonSemantic.reduction.<locals>.<genexpr>  s,      99Q16<5(999999r   z-all reduction inputs must have the same shapec                     g | ]	}|j         
S r   rO  rP  s     r   r   z+GluonSemantic.reduction.<locals>.<listcomp>  s    /I/I/IQ/I/I/Ir   c              3      K   | ]>}                                         |          |         j        j                  V  ?d S r   rS  )r$   r%   rJ  	reduce_opr   rf   s     r   r&   z*GluonSemantic.reduction.<locals>.<genexpr>  sa       ) ) **9+?+?+B+BF1INDY[dee) ) ) ) ) )r   )rX  r8   rA   rR   r   rI  rW   r'   rp   create_reducerW  rY  )rf   rJ  r   rZ  r[   rf  r   rA   s   ``` @@@@r   	reductionzGluonSemantic.reduction  s   <____X^_____FDq	$5zzqD4!q!q!q!q!qrrr'''AAAA9U#3#3AAA	9999&99999jj;jjj9L../I/I&/I/I/I4PP	)$$$!!!!! ) ) ) ) ) ) )3v;;'') ) ) ) ) 	)r   num_binsmaskc                 "   t          t          |j                  dk    d            t          |j                                        d            t          |d ud            |M|                     ||          \  }}t          |j        j                                        d            |j	        }|
                    | j                  }| j                            |j	        |||          }|                     |t          j        |g|          S )Nr	   c                      dS )Nz histogram only supports 1D inputr   r   r   r   r-   z)GluonSemantic.histogram.<locals>.<lambda>  s    .P r   c                      dS )Nz%histogram only supports integer inputr   r   r   r   r-   z)GluonSemantic.histogram.<locals>.<lambda>  s    -T r   c                      dS )Nz'histogram requires a destination layoutr   r   r   r   r-   z)GluonSemantic.histogram.<locals>.<lambda>  s    +T r   c                      dS )Nz"Mask must have boolean scalar typer   r   r   r   r-   z)GluonSemantic.histogram.<locals>.<lambda>  s    7[ r   )r   rR   rA   r   is_intr   r8   r   is_boolr}   r   rp   create_histogramr>  ry   r   )rf   r   ri  rj  rY   layout_attrr}   s          r   	histogramzGluonSemantic.histogram  s    s5;1$&P&PQQQu{!!##%T%TUUUvT!#T#TUUU33D%@@KD%49#++--/[/[\\\;DmmDL11..u|Xt[YY
XJGGGr   c           	         t          |d ud            t          |d            t          t          |j                  dk    d            t          j        |j        j        |j        d         |j        d         z   g|          }|                     | j        	                    |j
        |j
        |                    | j                            |          S )Nc                      dS )Nz!cat requires a destination layoutr   r   r   r   r-   z#GluonSemantic.cat.<locals>.<lambda>  s    +N r   c                      dS )Nz;current implementation of `cat` always may reorder elementsr   r   r   r   r-   z#GluonSemantic.cat.<locals>.<lambda>  s    $a r   r	   c                      dS )Nzcat requires a rank-1 inputr   r   r   r   r-   z#GluonSemantic.cat.<locals>.<lambda>  s    ,I r   r   )r   rR   rA   ry   rz   r8   r   r|   rp   
create_catr}   r   )rf   r   r   r   rY   ret_types         r   catzGluonSemantic.cat  s    vT!#N#NOOO{aabbbs39~~"$I$IJJJ(39Q<#)TU,;V:WY_``{{4<223:sz8>>Z^ZfKgKghhjrsssr   srcr   c                 f   t          t          j        t          j                  fd           t          t          j        t          j                  fd           t          j        j                                        fd           t          j        j                  t          t          j        j                  k    d            t           cxk    ok     nc fd           dk     rz  t                    D ]@}|k    r	t          j        j        |         j        j        |         k    fd           A| j
                            j        j                  }|                     |j        j        j        j        j        j                  S )Nc                      d j         S rA  r7   )r|  s   r   r-   z&GluonSemantic.gather.<locals>.<lambda>  s    DvjmjrDvDv r   c                      d j         S rA  r7   r  s   r   r-   z&GluonSemantic.gather.<locals>.<lambda>  s    KUZKK r   c                  "    d j         j        S )Nz&expected integer scalar type but got: )r8   r   r  s   r   r-   z&GluonSemantic.gather.<locals>.<lambda>  s    3q\a\f\m3q3q r   c                      dS )Nz0source and index tensors must have the same rankr   r   r   r   r-   z&GluonSemantic.gather.<locals>.<lambda>  s    6h r   c                      d  d dS )Nzgather axis z must be < source rank (rM  r   r`  s   r   r-   z&GluonSemantic.gather.<locals>.<lambda>  s    -aD-a-aZ^-a-a-a r   r   c                      d  dS )Nz
index dim z( must match the corresponding source dimr   )r   s   r   r-   z&GluonSemantic.gather.<locals>.<lambda>  s    STSSS r   )r   r"   r8   ry   rz   r   rp  rR   rA   rY  rp   create_gatherr}   r>  rY   )rf   r|  r   r   dgatherr[   s    ```  @r   r  zGluonSemantic.gather  s   z#(D$9::<v<v<v<vwwwz%*d&;<<KKKK	M 	M 	Muz ''))+q+q+q+qrrr38>""s5:#$$,.h.hiiiu####t####%a%a%a%a%abbb!88DLDt 	 	ADyy
 #sx~a'88SSSS    ++CJdKK9I5:K\]]]r   c                     | j                             |j        |                    | j                   |          }t	          |j        j                  }||xx         dz  cc<   |                     |||          S )NrB   )rp   create_fp4_to_fpr}   r   rQ   r8   rA   r   )rf   r|  	elem_typer   resultrA   s         r   	fp4_to_fpzGluonSemantic.fp4_to_fp  sk    ..sz9??4<;X;XZ^__SX^$$dq--fiGGGr   worker_num_warpsworker_num_regsc                 >   |D ]7\  }t          t          t          t          j        f          fd           8t	          |          dk    s
J d            |d         \  }}t	          |          dz
  }|dd          }	|t	          |          k    sJ d| dt	          |           d            |t	          |          k    sJ d| dt	          |           d            | j        }
|
                                }|
                                }|
                    |           |	                    ||i 	          }g }|t          |          }|
                    |           d
 |D             }d |	D             }t          |g           }|
                    |           |
                    |||                                                              |                               |           |
                                                    g            |
                    |          }d |D             }dt+          |	          D ]\  }\  }t-          ||                   }|
                    |                    |          |          ||         }fdt1          t	          |                    D             }t3          |d D                       }|	                    ||i |           |
                                 t	          |          z  |
                                                               fdt1          t	          |                    D             }|d S t          t3          |d |D                                 S )Nc                  (    dt                      S )Nz9function arguments must be a tuple of arguments, but got r7   )argss   r   r-   z/GluonSemantic.warp_specialize.<locals>.<lambda>  s    cW[\`WaWacc r   r	   z8expected at least one function for the default partitionr   zwarp specialize got z partitions but z warp countsz register counts)kwargsc                 6    g | ]}|                                 S r   get_typer$   rs     r   r   z1GluonSemantic.warp_specialize.<locals>.<listcomp>  s     ;;;

;;;r   c                 2    g | ]\  }}t          |          S r   )r   )r$   _r  s      r   r   z1GluonSemantic.warp_specialize.<locals>.<listcomp>  s%    IIIga+D11IIIr   c                 6    g | ]}|                                 S r   r  r$   args     r   r   z1GluonSemantic.warp_specialize.<locals>.<listcomp>$  s     999S\\^^999r   r9   c                 @    g | ]}                     |z             S r   )get_argument)r$   jarg_itblocks     r   r   z1GluonSemantic.warp_specialize.<locals>.<listcomp>*  s+    XXXQ%,,VaZ88XXXr   c                     g | ]	}|j         
S r   r7   r  s     r   r   z1GluonSemantic.warp_specialize.<locals>.<listcomp>+  s    9S9S9Ss#(9S9S9Sr   )r  caller_contextc                 :    g | ]}                     |          S r   )rT  )r$   r%   ws_ops     r   r   z1GluonSemantic.warp_specialize.<locals>.<listcomp>1  s'    NNN((++NNNr   c                     g | ]	}|j         
S r   r7   r  s     r   r   z1GluonSemantic.warp_specialize.<locals>.<listcomp>4  s    7X7X7X17X7X7Xr   )r   r"   rX  ry   rR   rp   get_insertion_point	new_blockset_insertion_point_to_startcall_JitFunctionr   create_warp_yieldsumrestore_insertion_pointcreate_warp_specializeget_default_region	push_backset_requested_registerscreate_block_with_parentget_partition_op_holder!create_warp_specialize_partitionsrW   rd   
get_regionrY  r   create_warp_returnset_insertion_point_afterget_operation)rf   functions_and_argsr  r  	generatorr  default_partitiondefault_argsnum_partitionsworkersrp   	insert_ptdefault_blockdefault_resultsmlir_resultsresult_typesworker_args	mlir_argspartitions_op	arg_typesr%   funcr  
block_argsr  r  r  r  s                           @@@@r   warp_specializezGluonSemantic.warp_specialize  s"   ) 	e 	eGAt:dUDJ$788cccce e e e %&&!+++-g+++*<Q*?'</0014$QRR("
 "
 
 
 
e.ee#FVBWBWeee
 
 
 "
 "
 
 
 
h.hh#oBVBVhhh
 
 
 ,//11	  ))++,,];;;#445F]_4``&/@@L!!,///;;l;;; JIIIIR((	''	222..|YHXYY  "",,];;;%%o666 	(()F)F)H)H"MMMAA.QQ99y999	(11 	% 	%OA|d/:J1:MNNNN44]5M5Ma5P5PR[\\E#AIXXXXX%IBWBWXXXJ,Z9S9Sd9S9S9STTJ&&tZSa&bbb&&(((c)nn$FF))%*=*=*?*?@@@NNNNU3|;L;L5M5MNNN"F(7X7X7X7X7XYYZZZr   c                 H    t          j        | j        j        j                  S r   )ry   r	  rp   optionsnum_ctasrj   s    r   r  zGluonSemantic.num_ctas6  s    ~dl2;<<<r   c                     |j         :t          |j         t                    sJ t          j        |j         j                  S t          j        | j        j        j                  S r   )r  r"   rd   ry   r	  r:   rp   r  )rf   r  s     r   r:   zGluonSemantic.num_warps9  sS    #/i68JKKKKK>)":"DEEE~dl2<===r   )F)6rr   rs   rt   ry   r|   langr   __annotations__rg   r   r   r   r#   r   r   r   r   r   r   r   r   r   r   boolr   r   r   r   r   r   r   r  r   r  r   r  r#  r.  r6  r<  r>  staticmethodrI  r   r[  rh  rt  r{  r  r  r  r  r:   __classcell__)r   s   @r   rv   rv   a   s        [FD    ' ' '_ _ _49 c     T T T T T T T&5h 58 5 5 5 5 5 5 5Xx XE(H*<$= X X X X X X5X 5U3Z 5H 5 5 5 5 5 5+( +5: +( + + + +  x H      :9 9 9 9 95X 5$s) 5$ 5 5 5 5 5 5
+ + +1 1 1+ + + +
W 
W 
W+ + +G G G^ ^ ^4a a a; ; ;+ + +D D D	D 	D 	DY Y Y 
 
 
,D D D& & & X X \X)x'9 ) )"&)+03+?) ) ) )*) 2 )# )UZ[ceh[hUi ) ) ) )(
Hx 
H3 
Hh 
HS[ 
H 
H 
H 
Htx th tT th t t t t^( ^8 ^3 ^8 ^ ^ ^ ^,HX H8 H H H H:[HSM :[dlmpdq :[ :[ :[ :[x= = => > > > > > >r   rv   r   )#typingr   r   r   r   r   r2  triton.language.semanticr    r
   ry   _layoutsr   r   r   r   r   r   triton._C.libtriton.gluon_irr   r   triton.compiler.code_generatorr   r   r   r   r  rP   r   r*   rb   __triton_builtin__rd   rv   r   r   r   <module>r     s   ; ; ; ; ; ; ; ; ; ; ; ; ; ;  3 3 3 3 3 3       x x x x x x x x x x x x x x x x P P P P P P P P T T T T T T T T7: <F ! ! !xC0 ! ! ! !
R R R: : : :z /3  +	M 	M 	M 	M 	M 	M 	M 	M\> \> \> \> \>N8, \> \> \> \> \>r   