
    j,                       d dl mZ d dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZmZmZmZ d dlmZmZ d d	lmZmZmZmZ d d
lmZ erd dlmZ d dlmZ d dl m!Z! dAdZ"dBdZ#	 	 dCdDdZ$e	 dEdddddFd'            Z%e	 dEdddddGd)            Z%e	 dEdddddHd,            Z%	 dId-d.dddHd/Z%dd0dJd7Z&dKd9Z'dIdLd:Z(dMd;Z)dMd<Z*dNd@Z+dS )O    )annotationsN)Sequence)contextmanager)BytesIOStringIO)Path)IOTYPE_CHECKINGAnycastoverload)_FSSPEC_AVAILABLEfsspec)is_int_sequenceis_path_or_str_sequenceis_str_sequencenormalize_filepath)NoDataError)Iterator)AbstractContextManager)StorageOptionsDictcolumns0Sequence[str] | Sequence[int] | str | int | Nonereturn1tuple[Sequence[int] | None, Sequence[str] | None]c                $   | dS d}d}t          | t                    r| g}nlt          | t                    r| g}nSt          |           rt	          |            | }n2t          |           rt	          |            | }nd}t          |          ||fS )a0  
    Parse the `columns` argument of an I/O function.

    Disambiguates between column names and column indices input.

    Returns
    -------
    tuple
        A tuple containing the columns as a projection and a list of column names.
        Only one will be specified, the other will be `None`.
    N)NNzQthe `columns` argument should contain a list of all integers or all string values)
isinstancestrintr   _ensure_columns_are_uniquer   	TypeError)r   
projectioncolumn_namesmsgs       U/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/polars/io/_utils.pyparse_columns_argr&      s     z'+J)-L'3 y	GS	!	! 
Y

		!	! "7+++		!	! "7+++

ann|##    Sequence[str] | Sequence[int]Nonec                    t          |           t          t          |                     k    rd| }t          |          d S )Nz2`columns` arg should only have unique values, got )lenset
ValueError)r   r$   s     r%   r    r    @   sB    
7||s3w<<((((N7NNoo )(r'   row_index_name
str | Nonerow_index_offsetr   tuple[str, int] | Nonec                    | dS | |fS )z
    Parse the `row_index_name` and `row_index_offset` arguments of an I/O function.

    The Rust functions take a single tuple rather than two separate arguments.
    N )r.   r0   s     r%   parse_row_index_argsr4   F   s     t 011r'   .)use_pyarrowraise_if_emptystorage_optionsfile*str | Path | list[str] | IO[bytes] | bytesencodingr5   boolr6   r7   StorageOptionsDict | NoneContextManager[str | BytesIO]c                   d S Nr3   r8   r:   r5   r6   r7   s        r%   prepare_file_argrA   U   	     %(Cr'   (str | Path | IO[str] | IO[bytes] | bytesc                   d S r?   r3   r@   s        r%   rA   rA   `   rB   r'   4str | Path | list[str] | IO[str] | IO[bytes] | bytes9ContextManager[str | list[str] | BytesIO | list[BytesIO]]c                   d S r?   r3   r@   s        r%   rA   rA   k   s     ADr'   FTc               	   |r|                                 ni }|rt          sd}t          |          t          d"d            }|r|dv nd}|r|nd	}|                    d
          r|dd         dfn|df\  }}	| t          | t                    rK|s*|                     ||	                              d	          } t          t          |           d|          S t          | t                    rWt          t          |                                                     d	                    d|                                 |          S t          | t                    r|slt          t          |                                                     ||	                              d	                    d|                                 |          S  |t          | d|                                 |                    S t          | t                    ry|s]t          t          |                                                     ||	                              d	                    d| d|          S  |t!          |                     S t          | t"                    rt%          |           rt'          | |          S t          rddlm  |           d         dk    r|r |t!          |                     S t          |                               ||	          5 }
t          t          |
                                                    d	                    | |          cddd           S # 1 swxY w Y   ||d<   |	|d<   t/          j        | fi |S t          | t0                    rt3          |           rvt5          d | D                       r]t          rVddlm |r2t5          fd | D                       r |fd!| D                       S ||d<   |	|d<   t/          j        | fi |S t          | t"                    rt!          |           } |st          |                               ||	          5 }
t          t          |
                                                    d	                    | |          cddd           S # 1 swxY w Y    ||           S )#u  
    Prepare file argument.

    Utility for read_[csv, parquet]. (not to be used by scan_[csv, parquet]).
    Returned value is always usable as a context.

    A `StringIO`, `BytesIO` file is returned as a `BytesIO`.
    A local path is returned as a string.
    An http URL is read into a buffer and returned as a `BytesIO`.

    When `encoding` is not `utf8` or `utf8-lossy`, the whole file is
    first read in Python and decoded using the specified encoding and
    returned as a `BytesIO` (for usage with `read_csv`). If encoding
    ends with "-lossy", characters that can't be decoded are replaced
    with `�`.

    A `bytes` file is returned as a `BytesIO` if `use_pyarrow=True`.

    When fsspec is installed, remote file(s) is (are) opened with
    `fsspec.open(file, **kwargs)` or `fsspec.open_files(file, **kwargs)`.
    If encoding is not `utf8` or `utf8-lossy`, decoding is handled by
    fsspec too.
    z3`fsspec` is required for `storage_options` argumentr8   r   r   Iterator[Any]c              3     K   	 | V  d S # w xY wr?   r3   r8   s    r%   managed_filez&prepare_file_arg.<locals>.managed_file   s"      	JJJDDDDDs   
 >   utf8
utf8-lossyTrM   z-lossyNireplacestrict)errorsbytes)contextr6   r   )rS   read_positionr6   r   )brS   rT   r6   zPath ()check_not_directoryr   )infer_storage_optionsprotocol)r:   rQ   r:   rQ   c              3  @   K   | ]}t          |t                    V  d S r?   )r   r   ).0fs     r%   	<genexpr>z#prepare_file_arg.<locals>.<genexpr>   s,      4V4VAZ35G5G4V4V4V4V4V4Vr'   c              3  B   K   | ]} |          d          dk    V  dS )rZ   r8   Nr3   )r\   r]   rY   s     r%   r^   z#prepare_file_arg.<locals>.<genexpr>  s9      TT!,,Q//
;vETTTTTTr'   c                2    g | ]}t          |           S )rW   r   )r\   r]   check_not_dirs     r%   
<listcomp>z$prepare_file_arg.<locals>.<listcomp>  s6        ! /qmTTT  r'   )r8   r   r   rI   )copyr   ImportErrorr   endswithr   rR   decodeencode_check_emptyr   r   readtellr   
read_bytesr   r   looks_like_urlprocess_file_urlfsspec.utilsrY   openr   listr;   all
open_files)r8   r:   r5   r6   r7   r$   rL   has_utf8_utf8_lossy_encodingencoding_strencoding_errorsr]   rb   rY   s              @@r%   rA   rA   v   s;   > 1@Go**,,,RO 0 C#    ^ /7@***D !  (388VL   **	&crc	I&&H% "L/ $OM$ 
+ 	T;;|O;DDKKFSSDDMM7>
 
 
 	
 $!! 
DIIKK&&v..//))++)	
 
 
 	
 $   
+ 
	IIKKVLVAAVF^^ 
 ""iikk-	 	 	 	 |!"iikk-	  
 
 	
 $ Y+ 		OO%%VLVAAVF^^ 
 +***-    |.tWWWXXX$ 8 $ 	8#D,777 	8:::::: %$T**:6&@@/ '<*4]SSS   $ZZ__)/ %   ' 7 788#''5                  +3OJ'(7OH%;t77777$ >$t** >4V4VQU4V4V4V1V1V > 	>::::::+ TTTTtTTTTT '<   %)     +3OJ'(7OH%$T==_===$ !$MJJJ+ 	d,OO ST#AFFHHOOF3344#K#1                  <s&    AM22M69M6?ASSS)rT   rU   r   rS   r   rT   
int | Nonec                   |rA|                                  j        dk    r$|dv r|rd| dnd}d| | }t          |          | S )Nr   )r   r   z (buffer position = z; try seek(0) before reading?) zempty data from )	getbuffernbytesr   )rU   rS   r6   rT   hintr$   s         r%   ri   ri     sw      !++--.!33 111m1 Q=PPPP 	
 10$00#Hr'   pathc                F    t          j        d| t           j                  d uS )Nz^(ht|f)tps?://)rematch
IGNORECASE)r}   s    r%   rm   rm   )  s    8%tR];;4GGr'   c                L   ddl m}  ||           5 }|r|dv r-t          |                                          cd d d            S t          |                                                    |                              d                    cd d d            S # 1 swxY w Y   d S )Nr   )urlopen>   rM   rN   rM   )urllib.requestr   r   rj   rg   rh   )r}   r:   r   r]   s       r%   rn   rn   -  s   &&&&&&	 E! 	E8'===16688$$E E E E E E E E 16688??844;;FCCDD	E E E E E E E E E E E E E E E E E Es   'BABB Bc                :     t           fddD                       S )Nc              3      K   | ]}|v V  	d S r?   r3   )r\   charr8   s     r%   r^   z"is_glob_pattern.<locals>.<genexpr>8  s'      88tt|888888r'   )*?[)anyrK   s   `r%   is_glob_patternr   7  s&    8888888888r'   c                p    	 t          t          j        | d                     dS # t          $ r Y dS w xY w)NT)	recursiveF)nextglobiglobStopIterationrK   s    r%   is_local_filer   ;  sO    TZ---... t    uus   #' 
55sourceqstr | Path | IO[bytes] | IO[str] | bytes | list[str] | list[Path] | list[IO[bytes]] | list[IO[str]] | list[bytes]Flist[str] | list[Path] | list[IO[str]] | list[IO[bytes]] | list[bytes]c                    t          | t          t          f          rt          | d          } nt	          |           rd | D             } t          | t
                    rt          | t          t          f          r| g}t          d|          S | S )NFrW   c                0    g | ]}t          |d           S )FrW   ra   )r\   r   s     r%   rc   zget_sources.<locals>.<listcomp>S  s3     
 
 
FLv5AAA
 
 
r'   z9list[bytes] | list[str] | list[IO[bytes]] | list[IO[str]])r   r   r   r   r   r   rR   r   )r   outs     r%   get_sourcesr   D  s     &3+&& 
#FFFF	 	(	( 

 
PV
 
 
 fh'' V:fsEl+K+K V8>xOQTUUUMr'   )r   r   r   r   )r   r(   r   r)   )Nr   )r.   r/   r0   r   r   r1   ).)r8   r9   r:   r/   r5   r;   r6   r;   r7   r<   r   r=   )r8   rC   r:   r/   r5   r;   r6   r;   r7   r<   r   r=   )r8   rE   r:   r/   r5   r;   r6   r;   r7   r<   r   rF   r?   )
rU   r   rS   r   r6   r;   rT   rw   r   r   )r}   r   r   r;   )r}   r   r:   r/   r   r   )r8   r   r   r;   )r   r   r   r   ),
__future__r   r   r   collections.abcr   
contextlibr   ior   r   pathlibr   typingr	   r
   r   r   r   polars._dependenciesr   r   polars._utils.variousr   r   r   r   polars.exceptionsr   r   r   ContextManagerpolars._typingr   r&   r    r4   rA   ri   rm   rn   r   r   r   r3   r'   r%   <module>r      s1   " " " " " "  				 $ $ $ $ $ $ % % % % % %                       9 9 9 9 9 9 9 9 9 9 9 9 9 9 : : : : : : : :            * ) ) ) ) ) 2((((((CCCCCC111111"$ "$ "$ "$J    "&2 2 2 2 2 
 ( 14( ( ( ( ( 
( 
 ( 14( ( ( ( ( 
( 
 D 14D D D D D 
D  b 15b b b b b bL TX     H H H HE E E E E9 9 9 9        r'   