
    jRZ                    z   U d dl mZ d dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZmZmZmZmZ d dlmZ d dlmZmZmZ d d	lmZ d d
lmZmZmZmZ d dl m!Z! er&d dl"Z#d dl$Z%d dl&Z%d dl'Z%d dl(Z%d dl)m*Z* d dl+m,Z, d dl-m.Z.  G d de          Z/ G d de/          Z0 ed           G d d                      Z1e2e1z  Z3de4d<    ed           G d d                      Z5 ed           G d d                      Z6 ed           G d d                      Z7 G d d e          Z8 ed           G d! d"e8                      Z9 ed           G d# d$e8                      Z:dSd(Z;dTd+Z<i d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPZ=dQe4dR<   dS )U    )annotationsN)ABCabstractmethod)	dataclass)partial)perf_counter)TYPE_CHECKINGAnyFinalLiteral	TypeAlias)eprintverboseverbose_print_sensitive)ComputeError)IcebergStatisticsLoader)IdentityTransformedPartitionValuesBuilder#_normalize_windows_iceberg_file_uritry_convert_pyarrow_predicate)ScanCastOptions)StorageOptionsDict)NoPickleOption)	LazyFramec                  6    e Zd Zeedd                        ZdS )IcebergTableSerializertablepyiceberg.table.TablereturnSerializedTableStatec                    d S N r   s    _/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/polars/io/iceberg/_dataset.pyserialize_tablez&IcebergTableSerializer.serialize_table"   s    ORs    Nr   r   r   r   )__name__
__module____qualname__staticmethodr   r%   r"   r&   r$   r   r   !   s0        RRR ^ \RRRr&   r   c                  &    e Zd Zedd            ZdS )IcebergScanTableSerializerr   r   r   r   c                    | j         S r!   )metadata_locationr#   s    r$   r%   z*IcebergScanTableSerializer.serialize_table(   s    &&r&   Nr'   )r(   r)   r*   r+   r%   r"   r&   r$   r-   r-   '   s2        ' ' ' \' ' 'r&   r-   T)kw_onlyc                  $    e Zd ZU ded<   ded<   dS )IcebergCatalogTableDescriptorz"str | pyiceberg.typedef.Identifiertable_identifierIcebergCatalogConfigcatalog_configN)r(   r)   r*   __annotations__r"   r&   r$   r2   r2   -   s*         8888((((((r&   r2   r   r   c                  X    e Zd ZU ded<   ded<   ded<   ded<   ddZddZddZddZdS )IcebergTableWrapz%NoPickleOption[pyiceberg.table.Table]table_zSerializedTableState | Nonetable_descriptor_r   
serializerStorageOptionsDict | Noneiceberg_storage_propertiesr   r   c                   | j                                         !t                      rWt          | j        t
                    r!d| j        j        d| j        j        j        n	d| j         }t          d|            | j        J t          | j        t
                    rV | j        j        j        | j        j        j
        fi | j        j        j        }|                    | j        j                  }n)ddlm} |                    | j        | j        pi           }| j                             |           | j                                         S )	z!Fetch the PyIceberg Table object.NzDcatalog table descriptor: self.table_descriptor_.table_identifier = z1, self.table_descriptor_.catalog_config.class_ = zmetadata path: z'IcebergTableWrap: construct table from r   )StaticTable)r/   
properties)r9   getr   
isinstancer:   r2   r3   r5   class_r   namer@   
load_tablepyiceberg.tabler?   from_metadatar=   set)selffrom_catalogr   r?   s        r$   rA   zIcebergTableWrap.get=   s   ;??$yy 	J
 "$"8:WXXD I->I I-<CI I I D4+ACC  HHHIII)555$02OPP F$0?F*9> ,;F 
  **4+A+RSS777777#11&*&<#>D" 2  
 KOOE"""{   r&   	pa.schemac                l    ddl m}  ||                                                                           S )z$Fetch the arrow schema of the table.r   schema_to_pyarrow)pyiceberg.io.pyarrowrO   rA   schema)rI   rO   s     r$   arrow_schemazIcebergTableWrap.arrow_schema`   s9    ::::::  !2!2!4!4555r&   dict[str, Any]c                    | j                                         x}| j                            |          | _        | j        J | j        S r!   )r9   rA   r;   r%   r:   __dict__)rI   r   s     r$   __getstate__zIcebergTableWrap.__getstate__f   sH    [__&&&E3%)_%D%DU%K%KD"%111}r&   stateNonec                    || _         d S r!   )rU   )rI   rW   s     r$   __setstate__zIcebergTableWrap.__setstate__n   s    r&   N)r   r   r   rL   )r   rS   )rW   rS   r   rX   )r(   r)   r*   r6   rA   rR   rV   rZ   r"   r&   r$   r8   r8   6   s         11112222&&&&9999!! !! !! !!F6 6 6 6        r&   r8   c                  b    e Zd ZU dZded<   ded<   ded<   edd            Zedd            ZdS )r4   aI  
    Configuration for constructing a PyIceberg catalog.

    This is useful for constructing queries from a client that may not have
    access to a catalog server.

    .. warning::
        This functionality is considered **unstable**. It may be changed
        at any point without it being considered a breaking change.
    ztype[pyiceberg.catalog.Catalog]rC   strrD   dict[str, str]r@   rK   pyiceberg.catalog.Catalogr   c                T    t          t          |           | j        | j                  S )a  
        Constructs an IcebergCatalogConfig from an instantiated PyIceberg catalog.

        .. warning::
            This functionality is considered **unstable**. It may be changed
            at any point without it being considered a breaking change.
        )rC   rD   r@   )r4   typerD   r@   )rK   s    r$   from_catalogz!IcebergCatalogConfig.from_catalog   s/     $==)
 
 
 	
r&   7pyiceberg.catalog.Catalog | IcebergCatalogConfig | Nonefn_name'Literal['scan_iceberg', 'sink_iceberg']c               ~   dd l }ddlm} dd l}ddlm} t          | t                    r| }nt          | |j        j                  rt          	                    |           }n| !dt          |            }t          |          |j        j                                        r || d           	 |j                                        }n7# t           $ r*}	|dk    rdnd}
d	| d
|	d|
 }t#          |          |	d }	~	ww xY wt          	                    |          }|j        |k    rd| d}t          |          |S )Nr   )NoopCatalog)r   z&unknown type for `catalog` parameter: z,(): calling pyiceberg.catalog.load_catalog()scan_icebergzO If you intended to pass a static metadata path, ensure it is an absolute path. zfailed to load catalog for z() (error = z). Configure the default PyIceberg catalog, or pass a catalog via the 'catalog' parameter, or pass a PyIceberg table object instead of the name.zcannot use NoopCatalog with z())pyiceberg.catalogpyiceberg.catalog.nooprg   polars._utils.loggingr   rB   r4   rK   Catalogrb   ra   	TypeError_utilsloggingr   load_catalog	Exceptionr   rC   )rK   rd   	pyicebergrg   polarsr   r5   msgdefault_catalogerrorstatic_metadata_hints              r$   *_from_api_parameter_or_environment_defaultz?IcebergCatalogConfig._from_api_parameter_or_environment_default   s    	!   666666$$$$000000g344 "	P$NN!2!:;;  	P1>>wGGNN J4==JJCC.. }$,,.. Q'OOOPPP3"+"3"@"@"B"B 3 3 3 .00	9 9
  %.' . .u . . ,	. .  #3''U2%3( 2>>OON K//<<<<CC.. s   2C 
D %C;;D N)rK   r_   r   r4   )rK   rc   rd   re   r   r4   )r(   r)   r*   __doc__r6   r+   rb   ry   r"   r&   r$   r4   r4   r   s         	 	 ,+++III
 
 
 \
 3 3 3 \3 3 3r&   r4   c                      e Zd ZU dZded<   ded<   ded<   ded	<   ded
<   ded<   ddZddddddddZddddddddZdS )IcebergScanResolverzY
    Iceberg scan resolver.

    Defers scan resolution to run during IR resolution.
    r8   r   
int | Nonesnapshot_idz%Literal['native', 'pyiceberg'] | Nonereader_overridebooluse_metadata_statisticsfast_deletion_countuse_pyiceberg_filterr   rL   c                4    | j                                         S )zFetch the schema of the table.)r   rR   rI   s    r$   rQ   zIcebergScanResolver.schema   s    z&&(((r&   Nexisting_resolved_version_keylimit
projectionfilter_columnspyarrow_predicater   
str | Noner   r   list[str] | Noner   r   tuple[LazyFrame, str] | Nonec               v    |                      |||||          x}	 dS |                                |j        fS )zConstruct a LazyFrame scan.r   N)_to_dataset_scan_implto_lazyframesnapshot_id_key)rI   r   r   r   r   r   	scan_datas          r$   to_dataset_scanz#IcebergScanResolver.to_dataset_scan   s]     33.K%-"3 4   I  4%%'')BBBr&   2_NativeIcebergScanData | _PyIcebergScanData | Nonec                 / ddl m} dd l}|j        j                                        }d /| j        r| j        rt                    /|r:dnd}	/dnd}
t          d| j
         d| d| d| d	|	 d
|
 d| j                    t          /fd           | j                                        }|rt          d|j        j                    | j
        }d }|l|                    |          }|d| }t#          |          |j        }|d| d}t#          |          |                                |         }|j
         }n@|                                }|j        j        }|                                x}|j
         nd}|||k    r|rt          d|d           d S | j        pt1          j        d          }|r|dvrd| d}t#          |          |dk    rdn|j        dk    s
d|j         nd }|dnt7          |          }|dk    r|n	 |j        | }g }t;          ||          }| j        r|t=          | |j        |           nd }i }d}d}|dk    r|sddlm }m!} |rt          d           tE                      }|#                    |||          } /| $                    /          } d}!tK          | &                                          D ]1\  }"}#|#j'        j(        |j)        k    rd |#j'        j(         } n|#j*        rvg ||"<   |#j*        D ]i}$|$j+        |j,        k    rd!|$j+         } nL|$j(        |j)        k    rd"|$j(         } n0||"         -                    |$j.                   |!d#z  }!||$j/        z  }j|r n|0                    |"|#j'        j1        |#j'        j2        $           ||3                    |#j'                   ||#j'        j/        z  }|-                    ti          |#j'        j.                             3|r%tE                      |z
  }%t          d%|%d&d'           |s|rOtk          |          d#k    rdnd(}&|!d#k    rdnd(}'t          d)tk          |           d*|& d+| d,| d-|! d.|'             ||          }(|6                                })|#|6                    tk          |          |)          nd }*| j        j7        tq          | j        j7                  nd }+ts          |||(|)||*||+| j        r| j:        s|dk    r||fnd |/
  
        S |d0k    rd1| }tw          |          |rt          d2|            dd l<}t{          |j>        j?        j        j@        ||||/3          }, ||                                          }-t          jB        C                    |-|,d4d45          }.t          |.|6          S )7Nr   rN   zSome(<redacted>)rX   z5IcebergScanResolver: to_dataset_scan(): snapshot ID: z	, limit: z, projection: z, filter_columns: z, pyarrow_predicate: z, iceberg_table_filter: z , self.use_metadata_statistics: c                     dd S )Nz<IcebergScanResolver: to_dataset_scan(): pyarrow_predicate = z, iceberg_table_filter = r"   )iceberg_table_filterr   s   r$   <lambda>z;IcebergScanResolver._to_dataset_scan_impl.<locals>.<lambda>#  s    l;LllSgll r&   zJIcebergScanResolver: to_dataset_scan(): tbl.metadata.current_snapshot_id: ziceberg snapshot ID not found: z(IcebergScanResolver: requested snapshot z did not contain a schema IDri   zHIcebergScanResolver: to_dataset_scan(): early return (snapshot_id_key = )POLARS_ICEBERG_READER_OVERRIDE)nativers   z-iceberg: unknown value for reader_override: 'z*', expected one of ('native', 'pyiceberg')rs   z"forced reader_override='pyiceberg'   z"unsupported table format version: )*)DataFileContent
FileFormatz<IcebergScanResolver: to_dataset_scan(): begin path expansion)r~   r   selected_fieldsznon-parquet format: z unsupported deletion file type: z"unsupported deletion file format:    )current_indexpartition_spec_idpartition_valuesz?IcebergScanResolver: to_dataset_scan(): finish path expansion (z.3fzs)sz?IcebergScanResolver: to_dataset_scan(): native scan_parquet(): z sourcez, snapshot ID: z, schema ID: z, z deletion file)
sourcesprojected_iceberg_schemacolumn_mappingdefault_valuesdeletion_filesmin_max_statisticsstatistics_loaderstorage_options	row_countr   r   z)iceberg reader_override='native' failed: zLIcebergScanResolver: to_dataset_scan(): fallback to python[pyiceberg] scan: )r~   n_rowswith_columnsr   T)pyarrowis_pure)lfr   )ErP   rO   rl   ro   rp   r   r   r   r   r   r~   r   r   rA   metadatacurrent_snapshot_idsnapshot_by_id
ValueError	schema_idschemasrQ   current_schema_idcurrent_snapshotr   osgetenvformat_versiontupleselectr   r   pyiceberg.manifestr   r   r   scanfilter	enumerate
plan_filesfilefile_formatPARQUETdelete_filescontentPOSITION_DELETESappend	file_pathrecord_countpush_partition_valuesspec_id	partitionpush_file_statisticsr   lenfinishr=   0_convert_iceberg_to_object_store_storage_options_NativeIcebergScanDatar   r   polars.io.iceberg._utilsr   ioiceberg_scan_pyarrow_dataset_implplr   _scan_python_function_PyIcebergScanData)0rI   r   r   r   r   r   rO   rt   r   pyarrow_predicate_displayiceberg_table_filter_displaytblr~   r   snapshotru   iceberg_schemar   vr   fallback_reasonr   r   r   missing_field_defaultsr   r   total_physical_rowstotal_deleted_rowsr   r   
start_timer   total_deletion_filesi	file_infodeletion_fileelapsedr   s2r   identity_transformed_valuesr   r   funcrR   r   r   s0        `                                         @r$   r   z)IcebergScanResolver._to_dataset_scan_impl   s<	    	;:::::$$$$-'//11# ), *) * $AAR#S#S  	&7&C"" & ';&F""F ) P $ 0P PP P  *P P $2	P P
 '@P P *FP P 261MP P	 	 	 	     	
 	
 	
 jnn 	X58\5UX X  
 &	"))+66HEEE oo% *I 2{ 2 2 2  !oo% [[]]95N!)!57OO ZZ\\N6I -0,@,@,B,B'Bq&O1=""UW 
 *5-@@ .'. . .  
 4 . 
"),3
 3
  	"6MMMP#P P P  S//! +-- 10 %** Kc6HJJJ 	 %/$6&&E*<M<M &(( N&&8 	! !J$"
 "
 +0>0J $C)>)>)OPPP 	
 02#$"#k))/)FFFFFFFF WUVVV%J88' /   D $/{{#788#$  )$//*;*; < < - -9>-1CCCKy~/IKK $ E) I(*N1%)2)? I I(0O4TTT!;#0#8!; !; , "E(4
8JJJ!?#0#<!? !? , "E&q)001HIII,1,*m.HH**" E&<<"#&/n&<%.^%= =    %0%::9>JJJ#y~'BB#7	8PQQ     &..:5>.5=> > >  
  8	$ g,,!++BB/144RR#@7||@ @,-@ @ %0@ @ #,	@ @
 ,@ @
 <>@ @   /.~>>N*@*G*G*I*I' %0 "((W7RSSS  :8D AJ9     *)A-:-#5"3 / 4 "5 :Lq9P9P )*<==
  /#   ( ((OoOOCs### 	I7FI I  
 	('''I$?##!5
 
 
 )(66\//	 0 
 
 "RIIIIr&   r[   )r   r   r   r}   r   r   r   r   r   r   r   r   )r   r   r   r}   r   r   r   r   r   r   r   r   )r(   r)   r*   rz   r6   rQ   r   r   r"   r&   r$   r|   r|      s           ::::!!!!) ) ) ) 59 '++/(,C C C C C C4 59 '++/(,iJ iJ iJ iJ iJ iJ iJ iJr&   r|   c                  &    e Zd Zedd            ZdS )_ResolvedScanDataBaser   pl.LazyFramec                    d S r!   r"   r   s    r$   r   z"_ResolvedScanDataBase.to_lazyframe$  s    ,/Cr&   Nr   r   )r(   r)   r*   r   r   r"   r&   r$   r   r   #  s&        /// ^///r&   r   c                      e Zd ZU dZded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ded<   ded<   ded<   ddZdS )r   z.Resolved parameters for a native Iceberg scan.z	list[str]r   zpyiceberg.schema.Schemar   z	pa.Schemar   zdict[int, pl.Series | str]r   zdict[int, list[str]]r   zpl.DataFrame | Noner   zIcebergStatisticsLoader | Noner   r<   r   ztuple[int, int] | Noner   r]   r   r   r   c                    ddl m}  || j        t          j                    dd| j        d| j        fd| j        fd| j        f| j	        | j
        
  
        S )	Nr   )scan_parquetinsertignoreziceberg-column-mappingr   ziceberg-position-delete)	cast_optionsmissing_columnsextra_columnsr   _column_mapping_default_values_deletion_files_table_statistics
_row_count)polars.io.parquet.functionsr   r   r   _default_icebergr   r   r   r   r   r   )rI   r   s     r$   r   z#_NativeIcebergScanData.to_lazyframe<  sv    <<<<<<|L(9;;$" 05t7JK&(;<68KL"5~
 
 
 	
r&   Nr   r(   r)   r*   rz   r6   r   r"   r&   r$   r   r   (  s         885555....((((++++
 6555....%%%%
 
 
 
 
 
r&   r   c                  0    e Zd ZU dZded<   ded<   d	dZdS )
r   z.Resolved parameters for reading via PyIceberg.r   r   r]   r   r   c                    | j         S r!   )r   r   s    r$   r   z_PyIcebergScanData.to_lazyframeV  s	    wr&   Nr   r  r"   r&   r$   r   r   M  sJ         88      r&   r   objr
   r   c                    t          | t                    r-t                              |                                 d          n| dt	          |           j         dndS )NREDACTED<z object>rX   )rB   dictfromkeyskeysra   r(   )r	  s    r$   _redact_dict_valuesr  Z  s\     c4  	chhjj*--- ? .c#----r&   r=   r^   c                    i }|                                  D ]1\  }}t                              |          x}	 |||<   (d|vr|||<   2|S )N.)items&ICEBERG_TO_OBJECT_STORE_CONFIG_KEY_MAPrA   )r=   r   kr   translated_keys        r$   r   r   d  ss     O*0022 
# 
#1DHHKKKN /0ON++\\
 "#OA r&   zs3.endpointaws_endpoint_urlzs3.access-key-idaws_access_key_idzs3.secret-access-keyaws_secret_access_keyzs3.session-tokenaws_session_tokenz	s3.region
aws_regionzs3.proxy-uri	proxy_urlzs3.connect-timeoutconnect_timeoutzs3.request-timeouttimeoutzs3.force-virtual-addressing aws_virtual_hosted_style_requestzadls.account-nameazure_storage_account_namezadls.account-keyazure_storage_account_keyzadls.sas-tokenazure_storage_sas_keyzadls.tenant-idazure_storage_tenant_idzadls.client-idazure_storage_client_idzadls.client-secretazure_storage_client_secretzadls.account-hostazure_storage_authority_hostz
adls.tokenazure_storage_tokenbearer_tokentoken)zgcs.oauth2.tokenzhf.tokenzFinal[dict[str, str]]r  )r	  r
   r   r
   )r=   r^   r   r^   )>
__future__r   r   abcr   r   dataclassesr   	functoolsr   timer   typingr	   r
   r   r   r   polars._reexport	_reexportr   rl   r   r   r   polars.exceptionsr   r   r   r   r   r   #polars.io.scan_options.cast_optionsr   r   parj   rs   pyiceberg.schemarF   pyiceberg.typedefpolars._typingr   polars.io.cloud._utilsr   polars.lazyframe.framer   r   r-   r2   r]   r   r6   r8   r4   r|   r   r   r   r  r   r  r"   r&   r$   <module>r:     sI   " " " " " " " 				 # # # # # # # # ! ! ! ! ! !             @ @ @ @ @ @ @ @ @ @ @ @ @ @       J J J J J J J J J J * * * * * *            @ ? ? ? ? ? 	1111111555555000000S S S S SS S S S' ' ' ' '!7 ' ' ' 4) ) ) ) ) ) ) )
 #&(E"E  E E E E 48 8 8 8 8 8 8 8v 4S S S S S S S Sl 4VJ VJ VJ VJ VJ VJ VJ VJr
0 0 0 0 0C 0 0 0
 4!
 !
 !
 !
 !
2 !
 !
 !
H 4	 	 	 	 	. 	 	 	      4A%A +A 3	A
 +A A KA +A )A "#EA 5A 3A -A /A  /!A" 7#A$ 7%A& ''A* '/A A A &      r&   