
    j/                    H   d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
mZ d dlZd dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ e
r&d dlmZ d dlmZ d dlm Z m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z'  ed           G d d                      Z(d!dZ)d"d Z*dS )#    )annotationsN)	dataclass)partial)perf_counter)TYPE_CHECKINGAny)eprint)parse_version)+_get_credentials_from_provider_expiry_aware)0_extract_table_statistics_from_delta_add_actions)scan_parquet)ScanCastOptions)Schema)datetime)
DeltaTable)DeletionFilesStorageOptionsDict)NoPickleOption)CredentialProviderBuilder	LazyFrameT)kw_onlyc                      e Zd ZU dZded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ded<   ded<   d)dZddddddd*dZd+d!Zd,d#Zd-d%Z	d.d(Z
dS )/DeltaDatasetzDataset interface for Delta.zNoPickleOption[DeltaTable]table_
str | None
table_uri_zint | str | datetime | NoneversionzStorageOptionsDict | Nonestorage_optionsz CredentialProviderBuilder | Nonecredential_provider_builderzdict[str, Any] | Nonedelta_table_optionsbooluse_pyarrowpyarrow_optionsrechunkreturnr   c                h    t          |                                                                           S )zFetch the schema of the table.)r   tableschemaselfs    ]/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/polars/io/delta/_dataset.pyr)   zDeltaDataset.schema4   s$    djjll))++,,,    N)existing_resolved_version_keylimit
projectionfilter_columnspyarrow_predicater.   r/   
int | Noner0   list[str] | Noner1   r2   tuple[LazyFrame, str] | Nonec                  ddl ddl}|j        j                                        }|r(t          d| j         d| d| d| d| j         
           |                                 | j        | j        n                                }t          |          }	|||	k    r|rt          d|	d	           dS | j        raddl
}dd
lm}
  j        d%i | j        pi }t          |j        j        j        j        ||||          } |
j        |j        |dd          |	fS                                 }t/          |j                  |                                 }t3          fd|                                D                       }t7                      }|rt          d                                           }|                                                     d          rd |D             }|r5t7                      |z
  }t          dt?          |           d|dd           |3tA           j!        "                                          |||          nd}#                                j$        }|duod|v }d}|raddl%}d}tM          |j'                  }||k     r5dd(                    d |D                        d| d}tS          |          d&fd }d!|f}nd}tU          |t?                    dk    r|ndt?                    dk    tW          j,                    d"d#| j-        | j.        | j/        ||$          |	fS )'zConstruct a LazyFrame scan.r   Nz*DeltaDataset: to_dataset_scan(): version: z	, limit: z, projection: z, filter_columns: z, use_pyarrow: z=DeltaDataset: to_dataset_scan(): early return (version_key = )r   )n_rows	predicatewith_columnsT)pyarrowis_purec                $    i | ]\  }}|v 	||S  r>   ).0kvpartition_columnss      r,   
<dictcomp>z0DeltaDataset.to_dataset_scan.<locals>.<dictcomp>w   s*    GGGda5F0F0FQ0F0F0Fr-   z5DeltaDataset: to_dataset_scan(): begin path expansion	lakefs://c                :    g | ]}|                     d d          S )rD   s3://)replace)r?   paths     r,   
<listcomp>z0DeltaDataset.to_dataset_scan.<locals>.<listcomp>   s&    JJJDT\\+w77JJJr-   zCDeltaDataset: to_dataset_scan(): native scan_parquet(): num_files: z, path expansion time: z.3fs)r1   r)   verbosedeletionVectors)         z5reading delta deletion vectors requires deltalake >= .c              3  4   K   | ]}t          |          V  d S N)str)r?   rA   s     r,   	<genexpr>z/DeltaDataset.to_dataset_scan.<locals>.<genexpr>   s(      ,L,LSVV,L,L,L,L,L,Lr-   z, found requested_pathspl.DataFramer&   c                    t                    }|: j        dd gt          |           z  id j        j                  i          S t          | |          S )Nselection_vector)r)   )_fetch_deletion_vectors	DataFramelenListBoolean_extract_delta_deletion_vectors)rU   delta_deletion_vectorsplr(   s     r,   _deletion_vector_callbackz?DeltaDataset.to_dataset_scan.<locals>._deletion_vector_callback   s|     *A)G)G&)1'2<+dVc/6J6J-JK 2GBGBJ4G4GH    7#%;  r-   zdelta-deletion-vectorinsertignore)
hive_schemahive_partitioningcast_optionsmissing_columnsextra_columnsr   credential_providerr%   _table_statistics_deletion_filesr>   )rU   rV   r&   rV   )0polarspolars._utils.logging_utilsloggingrK   r	   r   r#   r(   rS   (polars.io.pyarrow_dataset.anonymous_scanpolars.lazyframe.framer   to_pyarrow_datasetr$   r   iopyarrow_datasetanonymous_scan_scan_pyarrow_dataset_impl_scan_python_functionr)   metadatasetrB   r   itemsr   	file_uris	table_uri
startswithr[   r   rZ   get_add_actionsprotocolreader_features	deltalaker
   __version__joinImportErrorr   r   _default_icebergr   r    r%   )r+   r.   r/   r0   r1   r2   rl   rK   r   version_keyr   datasetfunctable_mdr)   rd   
start_timepathselapsedtable_statisticsr   has_deletion_vectorsdeletion_filesr   dv_min_version	installedmsgra   rB   r`   r(   s                               @@@r,   to_dataset_scanzDeltaDataset.to_dataset_scan8   s    	$$$$-'//11 	3 L3 33 3  *3 3 $2	3 3
 !% 03 3   

"&,":$,,'ll *5-<< WkWWW   4 	;;;;888888.e.NN$2F2L"NNG	)8S+'  D 392dD    >>## :;;GGGGfllnnGGG
 
 "^^
 	LJKKK!!>>&&{33 	KJJEJJJE 	"nnz1G7!%jj7 7 )067 7 7   ) =U224455-	     	  ..**:4'P,=,P 	 04 	"&N%i&;<<I>))*$'HH,L,L^,L,L,L$L$L* *&* * * 
 "#&&&       ()NN
 "N'*+<'='='A'At!"344q8(9;;$" 0 $ @L.*
 
 
  	r-   rS   c                    | j         9| j                                        J |                                 j        | _         | j         S )zFetch the table URI.)r   r   getr(   r|   r*   s    r,   r|   zDeltaDataset.table_uri   s<    ?";??$$000"jjll4DOr-   r   c                   | j                                         Sddlm} ddlm}m}m} |                    d           ddl	m
} | j        J i }| j        r,| j                                        x}rt          |          pi } || j        | j        | j        | j        i | j        pi |nd| j                  }|                                }	|	j        |k    s|	j        |k    rd|	j         d	| d
| }
 ||
          |	j        dk    rG|	j        @h |	j                            |          }t-          |          dk    rd| d}
 ||
          | j                             |           | j                                         S )zFetch the DeltaTable object.Nr   )DeltaProtocolError)MAX_SUPPORTED_READER_VERSIONNOT_SUPPORTED_READER_VERSIONSUPPORTED_READER_FEATURESrL   )_get_delta_lake_table)
table_pathr   r   r!   z&The table's minimum reader version is z5 but polars delta scanner only supports version 1 or z with these reader features:    z)The table has set these reader features: z= but these are not yet supported by the polars delta scanner.)r   r   deltalake.exceptionsr   deltalake.tabler   r   r   addpolars.io.delta._utilsr   r   r    build_credential_providerr   r   r   r!   r   min_reader_versionr   
differencer[   ry   )r+   r   r   r   r   r   credential_provider_credsproviderr(   table_protocolr   missing_featuress               r,   r(   zDeltaDataset.table   sV   ;??$??????          &))*;<<<DDDDDD?...(*%/  <VVXXX @IIOR * *)? +77C R,2Q7PQQ $($<
 
 
E #^^--N 14PPP!48TTTb^=^ b bKgb b G`b b  )(---1Q66"2>#D^%C#D#O#O-$ $  '((1,, VFV  V  V  VC,,S111KOOE"""{   r-   dict[str, Any]c                8    |                                   | j        S rR   )r|   __dict__r*   s    r,   __getstate__zDeltaDataset.__getstate__  s    }r-   stateNonec                    || _         d S rR   )r   )r+   r   s     r,   __setstate__zDeltaDataset.__setstate__  s    r-   )r&   r   )r.   r   r/   r3   r0   r4   r1   r4   r2   r   r&   r5   )r&   rS   )r&   r   )r&   r   )r   r   r&   r   )__name__
__module____qualname____doc____annotations__r)   r   r|   r(   r   r   r>   r-   r,   r   r      s         &&&&&&((((....AAAA....****MMM- - - - 59 '++/(,S S S S S Sr   >! >! >! >!@        r-   r   rU   rV   r_   r&   c                r   | j         dt          j        ik    sJ t          j        t          j        t          j                  d}|                    |                                          }|j         |k    sJ t          j        dk    rdnd}| 	                                
                    t          j        d          j                            dd          j                            |                                        |	                                
                    t          j        d          j                            dd          j                            |                    ddd	d	
                              dg                                          }|j        t%          |           k    sJ |S )a  
    Extract the deletion_vectors for the provided requested_paths.

    Input requested_paths schema is "path": String.
    Output series schema is "selection_vector": List(Boolean), maintaining order.

    The selection_vector from deltalake is a keep-mask (True = keep).
    rH   )filepathrX   win32zfile://zfile:///z
^lakefs://rF   r   left)left_onright_onhowmaintain_orderrX   )r)   r`   Stringr\   r]   selectkeyssysplatformlazyr:   colrS   rG   strip_prefixr   collectheightr[   )rU   r_   delta_dv_schemafile_prefix	joined_dfs        r,   r^   r^   !  s    !fbi%88888#%9"'"*BUBUVVO3::?;O;O;Q;QRR!(O;;;;"|w66))JK	F6NNw//k**

 


 
"''))66z""WW\733\\+.. 
 ! 
 


 


 
#$	%	%	' , s?333333r-   r(   r   pl.DataFrame | Nonec                   ddl }|j        j                                        }t	          j        |                                           }|r*|j        dk    rt          dt          |                      t          |          dk    rdS |S )aT  
    Fetch the deletion_vectors, mapping file_uri to "deletion_vector".

    Schema: {"filepath": pl.String, "selection_vector": pl.List(pl.Boolean)}

    The selection_vector from deltalake is a keep-mask (True = keep), so
    the more accurate term would be "selection_vector".

    Returns None if the table has no deletion vectors.
    r   Nz0DeltaDataset: has deletion_vectors, file_count: )
rm   rn   ro   rK   r`   rZ   deletion_vectorsr   r	   r[   )r(   rl   rK   dv_tables       r,   rY   rY   O  s     !   m#++--G|E224455H S8?Q&&Q#h--QQRRR
8}}tOr-   )rU   rV   r_   rV   r&   rV   )r(   r   r&   r   )+
__future__r   r   dataclassesr   	functoolsr   timer   typingr   r   rl   r`   rm   r	   polars._utils.variousr
   .polars.io.cloud.credential_provider._providersr   r   r   polars.io.parquet.functionsr   #polars.io.scan_options.cast_optionsr   polars.schemar   r   r   r   polars._typingr   r   polars.io.cloud._utilsr   ,polars.io.cloud.credential_provider._builderr   rq   r   r   r^   rY   r>   r-   r,   <module>r      s   " " " " " " 



 ! ! ! ! ! !             % % % % % % % %     ( ( ( ( ( ( / / / / / /      T S S S S S 4 4 4 4 4 4 ? ? ? ? ? ?             1!!!!!!$$$$$$@@@@@@@@555555VVVVVV000000 4~ ~ ~ ~ ~ ~ ~ ~B+ + + +\     r-   