
    jo                    t   U d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	m
Z
mZ d dlmZ d dlmZ er^d dlZd dlmZ d d	lmZ  ej        e          5  d d
lmZ ddd           n# 1 swxY w Y   d dlmZ d dlmZ d dlmZmZ d dlmZ d dl m!Z!  G d d          Z" G d d          Z# ed           G d d                      Z$ ed           G d d                      Z% ed           G d d                      Z&ee&gdf         Z'de(d<    ed           G d d                      Z)d%d$Z*dS )&    )annotations)CallableMapping)	dataclass)TYPE_CHECKINGClassVarLiteral	TypeAlias)parse_into_list_of_expressions)issue_unstable_warningN)Path)	DataFrame)PyExpr)Sequence)IO)StorageOptionsDictSyncOnCloseMethod)Expr)CredentialProviderBuilderc                      e Zd ZU ded<   dS )_InternalPlPathProviderConfigzClassVar[str]pl_path_provider_idN)__name__
__module____qualname____annotations__     X/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/polars/io/partition.pyr   r      s         &&&&&&r   r   c                  (    e Zd ZdZddddddddZdS )PartitionBya  
    Configuration for writing to multiple output files.

    .. warning::
        This functionality is currently considered **unstable**. It may be
        changed at any point without it being considered a breaking change.

    Parameters
    ----------
    base_path
        Base path to write to.
    file_path_provider
        Callable for custom file output paths.
    key
        Expressions to partition by.
    include_key
        Include the partition key expression outputs in the output files.
    max_rows_per_file
        Maximum number of rows to write for each file. Note that files may have
        less than this amount of rows.
    approximate_bytes_per_file
        Approximate number of bytes to write to each file. This is measured as
        the estimated size of the DataFrame in memory.

    Examples
    --------
    Split to multiple files partitioned by year:

    >>> pl.LazyFrame({"year": [2026, 2027, 1970], "month": [0, 0, 0]}).sink_parquet(
    ...     pl.PartitionBy("data/", key="year")
    ... )  # doctest: +SKIP

    Split to multiple files based on size:

    >>> pl.LazyFrame({"year": [2026, 2027, 1970], "month": [0, 0, 0]}).sink_parquet(
    ...     pl.PartitionBy(
    ...         "data/", max_rows_per_file=1000, approximate_bytes_per_file=100_000_000
    ...     )
    ... )  # doctest: +SKIP

    Split to multiple files partitioned by year, with limits on individual file sizes:

    >>> pl.LazyFrame({"year": [2026, 2027, 1970], "month": [0, 0, 0]}).sink_parquet(
    ...     pl.PartitionBy(
    ...         "data/",
    ...         key="year",
    ...         max_rows_per_file=1000,
    ...         approximate_bytes_per_file=100_000_000,
    ...     )
    ... )  # doctest: +SKIP
    Nauto)file_path_providerkeyinclude_keymax_rows_per_fileapproximate_bytes_per_file	base_path
str | Pathr#   eCallable[[FileProviderArgs], str | Path | IO[bytes] | IO[str]] | _InternalPlPathProviderConfig | Noner$   =str | Expr | Sequence[str | Expr] | Mapping[str, Expr] | Noner%   bool | Noner&   
int | Noner'   int | Literal['auto'] | NonereturnNonec                  d}t          |           |||dk    rd}t          |          ||d}t          |          t          |          }|dk    r|dnd }|d}t          |||t	          |          nd |||          | _        d S )Nz2`PartitionBy` functionality is considered unstabler"   zlat least one of ('key', 'max_rows_per_file', 'approximate_bytes_per_file') must be specified for PartitionByz1cannot use 'include_key' without specifying 'key'l    l    )r(   r#   r$   r%   r&   r'   )r   
ValueErrorstr_PartitionByInner_parse_to_pyexpr_list_pl_partition_by)selfr(   r#   r$   r%   r&   r'   msgs           r   __init__zPartitionBy.__init__T   s     Cs### K!)*f444 
 S//!;;2ECS//!	NN	%//!2!: ' &-)6& 11.1o%c***4#/'A!
 !
 !
r   )r(   r)   r#   r*   r$   r+   r%   r,   r&   r-   r'   r.   r/   r0   )r   r   r   __doc__r9   r   r   r   r!   r!      sN        2 2x MQ#'(,CI2
 2
 2
 2
 2
 2
 2
 2
r   r!   T)kw_onlyc                  (    e Zd ZU dZded<   ded<   dS )FileProviderArgsz
    Holds information on the file being sinked to.

    .. warning::
        This functionality is currently considered **unstable**. It may be
        changed at any point without it being considered a breaking change.
    intindex_in_partitionr   partition_keysNr   r   r   r:   r   r   r   r   r=   r=      s6           r   r=   c                  P    e Zd ZU dZded<   ded<   ded<   ded	<   d
ed<   ded<   dS )r4   zG
    Holds parsed partitioned sink options.

    For internal use.
    r3   r(   r*   r#   zlist[PyExpr] | Noner$   r,   r%   r-   r&   r>   r'   NrA   r   r   r   r4   r4      sm           NNN   
 !!!!######r   r4   c                      e Zd ZU dZded<   dS )SinkedPathsCallbackArgszInformation on sinked paths.z	list[str]pathsNrA   r   r   r   rD   rD      s$         &&r   rD   r
   SinkedPathsCallbackc                  `    e Zd ZU dZded<   ded<   dZded<   dZded	<   dZd
ed<   dZded<   dS )_SinkOptionsz
    Holds sink options that are generic over file / target type.

    For internal use. Most of the options will parse into `UnifiedSinkArgs`.
    boolmkdirmaintain_orderNzSyncOnCloseMethod | Nonesync_on_closezStorageOptionsDict | Nonestorage_optionsz CredentialProviderBuilder | Nonecredential_providerzSinkedPathsCallback | Nonesinked_paths_callback)	r   r   r   r:   r   rL   rM   rN   rO   r   r   r   rH   rH      s{           KKK.2M2222 26O5555<@@@@@8<<<<<<<r   rH   exprs_or_columns6str | Expr | Sequence[str | Expr] | Mapping[str, Expr]r/   list[PyExpr]c                    t          | t                    rd |                                 D             S t          |           S )Nc                H    g | ]\  }}|                     |          j         S r   )alias_pyexpr).0kes      r   
<listcomp>z)_parse_to_pyexpr_list.<locals>.<listcomp>   s)    HHHtq!

"HHHr   )
isinstancer   itemsr   )rP   s    r   r5   r5      sH     "G,, IHH/?/E/E/G/GHHHH)*:;;;r   )rP   rQ   r/   rR   )+
__future__r   collections.abcr   r   dataclassesr   typingr   r   r	   r
   polars._utils.parse.exprr   polars._utils.unstabler   
contextlibpathlibr   polarsr   suppressImportErrorpolars._plrr   r   r   polars._typingr   r   polars.exprr   ,polars.io.cloud.credential_provider._builderr   r   r!   r=   r4   rD   rF   r   rH   r5   r   r   r   <module>rl      sB   " " " " " " " - - - - - - - - ! ! ! ! ! ! > > > > > > > > > > > > C C C C C C 9 9 9 9 9 9 W      		[	)	) ' '&&&&&&' ' ' ' ' ' ' ' ' ' ' ' ' ' ' )(((((DDDDDDDD      VVVVVV' ' ' ' ' ' ' 'g
 g
 g
 g
 g
 g
 g
 g
T 4
 
 
 
 
 
 
 
 4$ $ $ $ $ $ $ $& 4        "*+B*CT*I!J  J J J J 4= = = = = = = ="< < < < < <s   A##A'*A'