
    /jj                         d dl Z d dlmZ d dlZd dlZd dlZ	 d dlmZ n# e	$ r dZY nw xY wddl
mZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ  G d	 d
          Z G d d          ZdS )    N)Any)to_array_extended   )
TensorData)	ONNXModel)DEQUANT_OP_NAMEONNX_TYPE_TO_NP_TYPEQUANT_OP_NAMETENSOR_NAME_QUANT_SUFFIXfind_by_namemodel_has_infer_metadatanormalize_axispack_bytes_to_4bitquantize_dataquantize_nparray&save_and_reload_model_with_shape_infertensor_proto_to_array)TensorQuantOverridesHelperc                   J    e Zd Zdeeef         fdZd	dZd Zd Z	d Z
d ZdS )
QuantizationParamsdatac                 h   i | _         |                                D ]\  }}t          |t                    s#t	          dt          |           d|d          |dk    rJt          |t          t          t          j        f          s#t	          dt          |           d|d          |dk    r7t          |t                    s"| t	          dt          |           d          |dk    r9|j	        t          j
        t          j        fvrt          d|j	         d|          || j         |<   d S )	NzKeys must be strings not z for k=.axisz1Values must be numpy arrays, int, float, str not z'Axis value must be an int or None, not scalez5scale must a float32 or float16 numpy element but is )r   items
isinstancestr	TypeErrortypeintnpndarraydtypefloat32float16
ValueError)selfr   kvs       l/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/onnxruntime/quantization/base_quantizer.py__init__zQuantizationParams.__init__'   s@   	JJLL 		 		DAqa%% T RDGG R RA R R RSSSF{{:a#sBJ1G#H#H{ jTXYZT[T[ j jde j j jkkkF{{:a#5#5{!- T$q'' T T TUUUG||
BJ/G G G !nYZY`!n!nij!n!noooDIaLL		 		    Nc                 8    | j                             ||          S N)r   get)r(   keydefault_values      r+   r0   zQuantizationParams.get4   s    y}}S-000r-   c              #   $   K   | j         E d {V  d S r/   r   r(   s    r+   __iter__zQuantizationParams.__iter__7   s&      9r-   c                     | j         |         S r/   r4   )r(   r1   s     r+   __getitem__zQuantizationParams.__getitem__:   s    y~r-   c                     || j         |<   d S r/   r4   )r(   r1   values      r+   __setitem__zQuantizationParams.__setitem__=   s    	#r-   c                 *    t          | j                  S r/   )lenr   r5   s    r+   __len__zQuantizationParams.__len__@   s    49~~r-   r/   )__name__
__module____qualname__dictr   r   r,   r0   r6   r8   r;   r>    r-   r+   r   r   &   s        tCH~    1 1 1 1          r-   r   c                       e Zd Z	 ddZdej        j        defdZd Z	d Z
d Zd	 Zd
 Zd ZddZddZ	 	 ddZd ZdS )BaseQuantizerNc                    t          |          st          |          }d |j        j        D             | _        | j                            d |j        j        D                        | j                            d |j        j        D                        t          |          | _	        || _
        || _        |
r|
ni | _        d| j        v o| j        d         | _        d | _        d| j        v o| j        d         | _        | j                            dd           | _        | j                            dd          | _        | j                            d	          | _        t)          |d
|          | _        t)          |d
|          | _        	 |Zt/          d |                                D                       r/t3          dd |                                D              d          || _        || _        || _        |	| _        |                                 | _        tA          | j                            di                     | _!        d | j	        "                                D             | _#        | j!        $                    | j#        | j        %                                |          \  }}|stM          |          | j!        '                                | _(        d S )Nc                     i | ]
}|j         |S rC   name).0vis     r+   
<dictcomp>z*BaseQuantizer.__init__.<locals>.<dictcomp>T   s    IIIBBGRIIIr-   c                     i | ]
}|j         |S rC   rH   )rJ   ots     r+   rL   z*BaseQuantizer.__init__.<locals>.<dictcomp>U   s     J J J" J J Jr-   c                     i | ]
}|j         |S rC   rH   )rJ   its     r+   rL   z*BaseQuantizer.__init__.<locals>.<dictcomp>V   s     I I I" I I Ir-   EnableSubgraphForceQuantizeNoInputCheckWeightSymmetricActivationSymmetricFMinimumRealRangetensor_typec              3   B   K   | ]}t          |t                     V  d S r/   )r   r   )rJ   ts     r+   	<genexpr>z)BaseQuantizer.__init__.<locals>.<genexpr>y   s/      ,k,kqAz1J1J-J,k,k,k,k,k,kr-   z(tensors_range contains unexpected types c                 ,    h | ]}t          |          S rC   )r    )rJ   r*   s     r+   	<setcomp>z)BaseQuantizer.__init__.<locals>.<setcomp>{   s    <e<e<eT!WW<e<e<er-   z, not TensorData.TensorQuantOverridesc                     i | ]
}|j         |S rC   rH   )rJ   initzers     r+   rL   z*BaseQuantizer.__init__.<locals>.<dictcomp>   s    [[[wW\7[[[r-   ))r   r   graph
value_infovalue_infosupdateoutputinputr   modelper_channelreduce_rangeextra_optionsenable_subgraph_quantizationparentforce_quantize_no_input_checkr0   _is_weight_symmetricis_activation_symmetricmin_real_rangegetattractivation_qTypeweight_qTypeanyvaluesr   tensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantizecheck_opset_versionopset_versionr   tensor_quant_overridesinitializerinitializersis_validkeysr'   get_quant_typestensor_quant_override_qtypes)r(   re   rf   rg   rq   rp   rt   ru   rv   rw   rh   overrides_validoverrides_errs                r+   r,   zBaseQuantizer.__init__E   s    (.. 	B:5AAEII%+2HIII J Ju{7I J J JKKK I Iu{7H I I IJJJu%%
&(.;C]] 22[t7IJZ7[ 	) '4+==q$BTUpBq 	* 261C1G1GHY[_1`1`!'+'9'='=>SUZ'['[$"0445GHH '(8-IY Z Z#L-NN
	 $,k,kTaThThTjTj,k,k,k)k)k$y<e<emNbNbNdNd<e<e<eyyy   +!2 0$8!!5577 'AASAWAWXnprAsAs&t&t#[[$*BXBXBZBZ[[[)-)D)M)Mt/44668H*
 *
&  	,]+++,0,G,W,W,Y,Y)))r-   weight_quant_typereturnc                     | j         | j         S |t          j        j        t          j        j        t          j        j        t          j        j        fv S r/   )rl   onnxTensorProtoINT4INT8INT16FLOAT8E4M3FN)r(   r   s     r+   is_weight_symmetricz!BaseQuantizer.is_weight_symmetric   sJ    $0,, !!")	%
 
 	
r-   c                     t           r/   )NotImplementedErrorr5   s    r+   quantize_modelzBaseQuantizer.quantize_model   s    !!r-   c                 X    t          || j                                                  }|d uS r/   )r   re   r{   )r(   
input_namer{   s      r+   is_input_a_initializerz$BaseQuantizer.is_input_a_initializer   s*    ":tz/E/E/G/GHH$&&r-   c                     | j         S r/   )rf   r5   s    r+   is_per_channelzBaseQuantizer.is_per_channel   s    r-   c                     t          || j                                                  }|)|j        t          j        j        t          j        j        fv S | j        r| j	        dS | j	        
                    |          S )NF)r   re   r{   	data_typer   r   FLOATFLOAT16ri   rj   is_valid_quantize_weight)r(   weight_nameweights      r+   r   z&BaseQuantizer.is_valid_quantize_weight   sq    k4:+A+A+C+CDD#(8(>@P@X'YYY1 	t{7J5{33K@@@r-   c                     | j         (t          | j                   dk    r|j        | j         vrdS |j        | j        vrdS |j        t
          t          fv rdS | j        |j        | j        v rdS dS )Nr   FT)ru   r=   rI   op_typerw   r   r
   rv   )r(   nodes     r+   should_quantize_nodez"BaseQuantizer.should_quantize_node   s    ".D*++q00	!7775<t8885<O];;;5 ,d>S1S1S5tr-   c                 d   d | j         j         j        D             }t          |          dk    rt          d          |d         j        }|dk    rt          j        d| d           dS |dk     rt          j        d| d           | j         j         j                            |d                    | j         j         j                            t          j
                            d	d
          g           d
}|dk     r| j        t          j        j        k    rt          j        d| d           | j         j         j                            |d                    | j         j         j                            t          j
                            d	d          g           d| j         j         _        d}|S )Nc                 6    g | ]}|j         r|j         d k    |S )zai.onnx)domain)rJ   opsets     r+   
<listcomp>z5BaseQuantizer.check_opset_version.<locals>.<listcomp>   s7     
 
 
EL
TYT`dmTmTmETmTmTmr-   r   z$Failed to find proper ai.onnx domainr   
   z$The original model opset version is ze, which does not support node fusions. Please update the model to opset >= 11 for better performance.z, which does not support quantization. Please update the model to opset >= 11. Updating the model automatically to opset 11. Please verify the quantized model.       z, which does not support quantization to float 8. Please update the model to opset >= 19. Updating the model automatically to opset 19. Please verify the quantized model.	   )re   opset_importr=   r'   versionloggingwarningremoveextendr   helpermake_opsetidrq   r   r   
ir_version)r(   ai_onnx_domainry   s      r+   rx   z!BaseQuantizer.check_opset_version   s   
 
#z/<
 
 
 ~!##CDDD&q)1BO l}  l  l  l   22O f}  f  f  f   J)001BCCCJ)00$+2J2J2r2R2R1STTTM2$"3t7G7T"T"TO5} 5 5 5  
 J)001BCCCJ)00$+2J2J2r2R2R1STTT*+DJ'Mr-         ?c                 n	   t          || j                                                  }t          |          }|t          z   }| j        t          j        j        k    r	t          j
        |          }|j        t          j        k    rt          j        j        }	n?|j        t          j        k    rt          j        j        }	nt!          d|j         d          |                    t          j                  }
t          j        dg|
j                  }|                    d          }t          j                            |
|          }| j                            |g           d}n||z  |z  }t          j
        |t          j                  t          j
        |t          j                  z  }
|
                                }
t          j        t          j        t          j                  j                  }t          j        t          j        t          j                  j                  }t          j        |
|k               st          j        |
|k              rt=          j        d| d           t          j         |
||                              t          j                  }
t          j
        |
t          j                                      |j!                  }t          j                            ||          }| j                            |g           t          j
        ||j                                      d          }d	}| j        }	|d
z   }t          j                            ||          }| j                            |g           | j        t          j        j        k    r| j        }nt          j        j"        }|dz   }| j        t          j        j        k    r*t          j#        $                    || j        dgdg          }n|j%        dk    rYt          j&        |j'        t          j                                      d          }t          j                            ||          }n#t          j#        $                    ||g dg          }| j                            |g           ||||||	fS )z]
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        zEOnly float16 or float32 are supported with float 8 but bias dtype is r   r   r$   CastzQuantized bias `z<` exceeds the range of a int32. The bias scale is too small.DequantizeLinear_scale_zero_point        r   )(r   re   r{   r   r   rq   r   r   r   r"   asarrayr$   r&   r   r%   r   r   astypearrayreshapenumpy_helper
from_arrayinitializer_extendfloat64roundiinfoint32minmaxrr   r   r   clipdimsINT32r   make_tensorsizezerosshape)r(   	bias_nameinput_scaleweight_scalebetabias_initializer	bias_dataquantized_bias_namer   
node_qtypequantized_data
bias_scalebias_scale_datapacked_bias_initializer	node_type	int32_min	int32_maxbias_np_dataquantized_bias_scale_namepacked_bias_scale_initializerrV   quantized_bias_zp_namepacked_bias_zp_initializerbias_zp_datas                           r+   quantize_bias_static_implz'BaseQuantizer.quantize_bias_static_impl   s-    (	4:3I3I3K3KLL)*:;;	'*BB  0 ===:i((DzRZ''!-5

rz))!-3

 uhlhr u u uvvv![[44N1#^-ABBBJ(0044O&*&7&B&B>Sf&g&g#J))+B*CDDDII %|3d:J  Z	DDDrzR\dfdnGoGoGooN+1133N 
28BH#5#5#9::I
28BH#5#5#9::Ivny011 RVNY<V5W5W nynnn    W^Y	JJQQRTRZ[[N :nBHEEEMMN^NcddL&*&7&B&B<Qd&e&e#J))+B*CDDD !j9?KKKSSTVWWO*I*J %8($B!(,(9(D(D_Vo(p(p%
%%'D&EFFF  0 ===+KK*0K!4}!D 0 ===)-)@)@AWY]Yjmnloruqv)w)w&&_q  8J$4BHEEEMMbQQL)-):)E)ElTj)k)k&&)-)@)@AWYdfhkljm)n)n&
%%'A&BCCC  %"
 	
r-   Fc                 2   |j         t          z   }|j         dz   }|j         dz   }t          |          }| j                            |j         i           }	d|	v r|	d         j        }d|	v rd|	v rt          j        |	d         t          |                   }
t          j        |	d                   }t          ||
                                ||
          }t          |
t          j                  sJ dt          |
                       |
j        t          j        k    r|
j        t          j        k    sJ d	|
j                     t          |t          j                  sJ dt          |                       nQ|| j        k    r|                     |          n| j        }t)          |
                                ||	                    d
|          |	                    d| j        o|          | j        |	                    d          |	                    d                    \  }
}}t          |
t          j                  sJ dt          |
                       |
j        t          j        k    r|
j        t          j        k    sJ d	|
j                     t          |t          j                  sJ dt          |                       |j        }t2          j                            ||g |                    d                                                    }t2          j                            ||g |
                    d                                                    }| j                            ||g           |s| j        t2          j         j!        k    rFt3          j                     }| j        |_        |j"        #                    |j"                   ||_         |
                                $                                %                                |_&        tN          tO          |          }|j(        |j(        k    s*|%                                |%                                k    rrtS          d|j(         d|%                                dd          d|%                                dd          d|j(         dtU          |          dd          d          n|t2          j         j+        t2          j         j,        fv r|j        t          j-        t          j.        fvrtS          d| d          t_          ta          |%                                                    }t2          j                            |||j"        |d          }nkt          j1        |t2          j        2                    |                                        |j"                  }t2          j3        4                    ||          }| j                            |g           |||fS )a  
        :param weight: TensorProto initializer
        :param qType: type to quantize to
        :param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                                  If keep_float_weight is False, quantize the weight, or don't quantize the weight.
        :return: quantized weight name, zero point name, scale name
        r   r   default_val
quant_typer   
zero_pointr   Unexpected type Unexpected dtype 	symmetricrg   rminrmaxrg   rn   rmin_overridermax_override)r   NzThe initializer of shape z! could not be created, expecting r   z, got z and shape=z
raw=   r   Quantized weights for . must be 8-bit before packing as 4-bit values.Traw)5rI   r   r   rz   get_per_tensor_overridesrV   r"   r   r	   r   flattenr   r#   r    r$   r%   r&   rq   r   rm   r   r0   rg   rn   r   r   r   r   r   tolistre   r   r   r   r   r   copytobytesraw_datar   r   RuntimeErrorr   r   UINT4int8uint8bytesr   r   tensor_dtype_to_np_dtyper   r   )r(   r   qTyperg   keep_float_weightq_weight_namezp_name
scale_nameweight_dataquant_overridesr   r   q_weight_datar   scale_dtypescale_initializerzero_initializerq_weight_initializercheckpacked_datas                       r+   quantize_initializer_implz'BaseQuantizer.quantize_initializer_impl6  s    &>>+-[8+
 ,F335NNv{hjNkk?**#L1=Eo%%,/*I*I/,"?G[\aGbcccJH_W566E,UK4G4G4I4I5R\]]Mj"*55\\7\$zJZJZ7\7\\\5#rz11j6F"*6T6T6T6J$466 7U6TT eRZ00RR2RT%[[2R2RRR0R <ADDU;U;U00777[_[wI/<##%%##K;;,00ARAcWcdd#2-11&99-11&990 0 0,J} j"*55\\7\$zJZJZ7\7\\\5#rz11j6F"*6T6T6T6J$466 7U6TT eRZ00RR2RT%[[2R2RRR0& K33JRQVQ^Q^_dQeQeQlQlQnQnoo;227E2zGYGYZ_G`G`GgGgGiGijj
%%'8:J&KLLL  #	B D$4$AAA'+'7'9'9$151B$.$)00===,9$)0=0E0E0G0G0L0L0N0N0V0V0X0X$-$0 ..BCCE{k&7775==??mNcNcNeNe;e;e*H8I H H,4466ss;H HCH==??SVTVSVCWH HdjdpH H%()=%>%>tt%DH H H  
 4+0$2B2HIII &rw.AAA&nnnn   $$6}7L7L7N7N$O$OPP (,{'>'>}eU[U`bmsw'>'x'x$$ "
=@d@dej@k@k l l l t tK! ! (,'8'C'CMS`'a'a$J))+?*@AAAgz11r-   Tc                 @   t          || j                                                  }|t          d|          t	          |          }t          |j                  }t          ||          \  }	}
|	st          d| d| d|           |
}|j        |         }| j        	                    |d|ig          }t          |          }|dk    r||k    rt          d| d	| d
          t          |d         d         |          \  }}|r||k    r%t          d| d| d|d         d          d          d|d         v r|d         d         j
        }|d                             d|                     |                    }|d                             d| j        o|          }g }g }g }t          |j                  }t          |          }d||<   t          |          D ]}|                    ||          }||k     r|nd}||         }d|v rDd|v r?t#          j        |d         t&          |                   }t#          j        |d                   }t)          ||                                ||          }t-          |t"          j                  sJ dt1          |                       |j        t"          j        k    r|j        t"          j        k    sJ d|j                     t-          |t"          j                  sJ dt1          |                       t-          |t"          j                  sJ dt1          |                       n/t9          |                                |||| j        |                    d          |                    d                    \  }}}t-          |t"          j                  sJ dt1          |                       |j        t"          j        k    r|j        t"          j        k    sJ d|j                     t-          |t"          j                  sJ dt1          |                       t-          |t"          j                  sJ dt1          |                       |                    |           |                    |           |                    t#          j        |                               |                     	t#          j!        ||          }|tD          z   }|dz   }|dz   } |j#        |         g}!tH          j%        &                    | |j'        |!t#          j(        |          )                                          }"tH          j%        &                    |||!t#          j(        |          )                                          }#| j        *                    |"|#g           |sJ|tH          j+        j,        tH          j+        j-        fv r|j        t"          j.        t"          j/        fvrta          d| d          tc          te          |3                                                    }$tH          j%        &                    ||||$d           }%| j        *                    |%g           nt#          j        |tH          j%        4                    |                                         |j#                  }tH          j5        6                    ||          }%| j        *                    |%g           ||| fS )!Nz{} is not an initializerzWeight z# has a per-channel axis with value z  that is out-of-bounds for rank r   r   r   z.Per-channel tensor quantization overrides for z must have either 1 or z& elements in the list of dictionaries.r   z"Tensor quantization overrides for z& specify an unexpected axis. Expected z
, but got r   r   r   rg   r   r   r   r   r   r   r   r   r   r   r   r   Tr   )7r   re   r{   r'   r   r=   r   r   rz   get_per_channel_overridesrV   r0   r   rg   listrangetaker"   r   r	   r   r   r   r#   r    r$   r%   r&   r   rn   appendr   r   concatenater   r   r   r   r   r   hstackr   r   r   r   r   r  r  r   r  r   r   r  r   r   )&r(   r   rq   channel_axisrg   r  r{   weightsweights_rankis_axis_valid	axis_normchannel_countquant_overrides_for_channelsnum_channel_overridesis_axis_override_validaxis_overrider   zero_point_list
scale_listquantized_per_channel_data_listweights_shapereshape_dimsiper_channel_datachannel_override_indexchannel_quant_overridesr   r   quantized_per_channel_dataquantized_weightsr  r  r	  zero_scale_shaper  r  r  r  s&                                         r+    quantize_weight_per_channel_implz.BaseQuantizer.quantize_weight_per_channel_impl  s    #;
0F0F0H0HII7EEE'447=))#1,#M#M y 	9+ 9 9, 9 9*69 9  
 !l3'+'B'\'\v|&<%= (] (
 (
$ !$$@ A A A%%*?=*P*PU U U,U U U  
 1??[\]?^_e?fht0u0u-% 	,)F)F_[ _ _(_ _4PQR4STZ4[_ _ _   7:::7:<HTL0377TE]E]^jEkEkll	3A6::>4K\Kmamnn
*,'W]++M**%&\"}%% *	q *	qA&||A|<<*+.C*C*CQQ"&BCY&Z#111lF]6]6]X&=l&KSghtSuvvv
!8!ABB-= "2":":"<"<eZ. .* "*bj99``;`dS]N^N^;`;```9!'2:55*:Jbj:X:X:X:
(8:: ;Y:XX "%44VV6Ve6V6VVV4!"<bjII  It,F'G'GII I 
 AN$,,.. !-#'#6"9"="=f"E"E"9"="=f"E"EA A A=
E#= "*bj99``;`dS]N^N^;`;```9!'2:55*:Jbj:X:X:X:
(8:: ;Y:XX "%44VV6Ve6V6VVV4!"<bjII  It,F'G'GII I "":...e$$$+222:>X3Y3Y3a3abn3o3opppp N+JLYY#&>>- 8+
 (,\:; K33-/?:AVAVA]A]A_A_
 
  ;22\#3RY5O5O5V5V5X5X
 
 	
%%'8:J&KLLL  	F 0 5t7G7MNNN$*27BH2EEE&nnnn   $$67H7P7P7R7R$S$STT (,{'>'>!<QU (? ( ($ 
--/C.DEEEE$&J%+>>|LL% % % '+*++ " (,'8'C'CDUWd'e'e$
--/C.DEEEgz11r-   c                    | j         d S | j                                        D ]b}|j        dv r|                     |          s"t          | j                                        |j        d                            dk    r^|j        d         | j         vs|j        d         | j         vr| j         |j        d                  }t          |t                    s.t          dt          |           d|j        d         d          || j         |j        d         <   |j        dk    r_|                     |          st          t          j        d          t          j        d	          
          | j         |j        d         <   dd S )N)ClipRelur   r   r   z for r   Softmaxr   r   )lowesthighest)rt   re   nodesr   r   r=   input_name_to_nodesrd   rc   r   r   r   r    r"   r%   )r(   r   tds      r+   adjust_tensor_rangesz"BaseQuantizer.adjust_tensor_ranges  s   %FJ$$&& 	q 	qD|///0066 tz5577
1FGG1LL:a=(:::dk!nTXTf>f>f'A7!"j11 [#$YtBxx$Y$YdkRSn$Y$Y$YZZZ46"4:a=11**0066 5?rzRU`b`jkn`o`o5p5p5p"4;q>2#	q 	qr-   r/   )r   )FF)TF)r?   r@   rA   r,   r   r   DataTypeboolr   r   r   r   r   r   rx   r   r  r2  r<  rC   r-   r+   rE   rE   D   s$        IZ IZ IZ IZV
T5E5N 
SW 
 
 
 
" " "' ' '     A A A  &! ! !FR
 R
 R
 R
hY2 Y2 Y2 Y2@ L2 L2 L2 L2\q q q q qr-   rE   )r   typingr   numpyr"   r   onnx.numpy_helperonnx.reference.op_runr   ImportError	calibrater   
onnx_modelr   quant_utilsr   r	   r
   r   r   r   r   r   r   r   r   r   rz   r   r   rE   rC   r-   r+   <module>rG     s                  7777777    " ! ! ! ! ! ! ! ! ! ! !                            ? > > > > >       <pq pq pq pq pq pq pq pq pq pqs    ))