
    /j                       d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlZd dlZd dlmZmZ d dlmZ d d	lmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$ d d
l%m&Z&m'Z'm(Z( d dl)m*Z*m+Z+m,Z, d dl-m.Z. dZ/h dZ0h dZ1de0 de1 Z2dYdZ3	 dZd[d"Z4d\d$Z5d]d(Z6d^d+Z7d_d-Z8d`d2Z9	 dadbd:Z:	 dcdbd;Z;	 dcddd>Z<dedAZ=dfdgdGZ>dhdidJZ? G dK dL          Z@djdkdSZAdldUZBdmdXZCdS )n    )annotationsN)
ThreadPool)Path)
is_tarfile)Any)ImageImageOps)check_class_names)
ASSETS_URLDATASETS_DIRLOGGERNUM_THREADSROOTSETTINGS_FILETQDMYAML	clean_urlcolorstremojisis_dir_writeable)
check_file
check_fontis_ascii)downloadsafe_download
unzip_file)segments2boxeszJSee https://docs.ultralytics.com/datasets for dataset formatting guidance.>   bmpdngjp2jpgmpopngtifavifheicheifjpegtiffwebpjpeg2000>   tsasfavigifm4vmkvmovmp4mpgwmvmpegwebmzSupported formats are:
images: z	
videos: 	img_paths	list[str]returnc                    t           j         dt           j         t           j         dt           j         cfd| D             S )zaConvert image paths to label paths by replacing 'images' with 'labels' and extension with '.txt'.imageslabelsc                    g | ]H}                     |                    d                                         dd           d         dz   IS )   .r   z.txt)joinrsplit).0xsasbs     [/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/ultralytics/data/utils.py
<listcomp>z#img2label_paths.<locals>.<listcomp>?   sK    SSSABGGAHHROO$$++C33A6?SSS    )ossep)r8   rE   rF   s    @@rG   img2label_pathsrL   <   sP    &&bf&&26(A(A(A(AFBSSSSSSSSSrI   
   2       filesthreshold_msfloatthreshold_mb	max_filesintprefixstrc           	        | st          j        | d           dS t          j        | t	          |t          |                               } g }g }g }| D ]}	 t          j                    }	t          j	        |          j
        }
|                    t          j                    |	z
  dz             |                    |
           t          j                    }	t          |d          5 }|                                }ddd           n# 1 swxY w Y   t          j                    |	z
  }|dk    r|                    |
dz  |z             # t          $ r Y w xY w|st          j        | d           dS t          j        |          }t          |          dk    rt          j        |d	          nd}d
t          j        |          dz  dd}d|dd|dd}|rKt          j        |          }t          |          dk    rt          j        |d	          nd}d|dd|dd}nd}||k     s||k     r t          j        | d| | | d           dS t          j        | d| | | d           dS )aw  Check dataset file access speed and provide performance feedback.

    This function tests the access speed of dataset files by measuring ping (stat call) time and read speed. It samples
    up to `max_files` files from the provided list and warns if access times exceed the threshold.

    Args:
        files (list[str]): List of file paths to check for access speed.
        threshold_ms (float, optional): Threshold in milliseconds for ping time warnings.
        threshold_mb (float, optional): Threshold in megabytes per second for read speed warnings.
        max_files (int, optional): The maximum number of files to check.
        prefix (str, optional): Prefix string to add to log messages.

    Examples:
        >>> from pathlib import Path
        >>> image_files = list(Path("dataset/images").glob("*.jpg"))
        >>> check_file_speeds(image_files, threshold_ms=15)
    z%Image speed checks: No files to checkNi  rbr   i   z*Image speed checks: failed to access filesr?   )ddofz, size: i   .1fz KBzping:    ±z msz, read: z MB/srP   u   Fast image access ✅ ()zSlow image access detected (z). Use local storage instead of remote/mounted storage for better performance. See https://docs.ultralytics.com/guides/model-training-tips/)r   warningrandomsampleminlentimeperf_counterrJ   statst_sizeappendopenread	Exceptionnpmeanstdinfo)rQ   rR   rT   rU   rW   
ping_times
file_sizesread_speedsfstart	file_sizefile_obj_	read_timeavg_pingstd_pingsize_msgping_msg	avg_speed	std_speed	speed_msgs                        rG   check_file_speedsr   B   sC   (  &GGGHHH M%YE

!;!;<<E JJK  	%''E

*It022U:dBCCCi((( %''Ea $(MMOO$ $ $ $ $ $ $ $ $ $ $ $ $ $ $)++e3I1}}""9#89#DEEE 	 	 	D	  &LLLMMM wz""H-0__q-@-@rvjq))))aHB"'*--9BBBBH99999999H GK((	36{3C3Ca3G3GBF;Q////Q	DyDDD	DDDD			,)l":":vVVhV	V8VVVWWWWW L L8 LY L L L L	
 	
 	
 	
 	
s7   BE(D	=E	D	ED	:E
EEpathsc                   d}| D ]/}	 |t          j        |          j        z  } # t          $ r Y ,w xY wt	          d                              t          |                                                    }|                    d	                    |                                                      |
                                S )z>Return a single hash value of a list of paths (files or dirs).r   hashlibrP   )rJ   rf   rg   OSError
__import__sha256rX   encodeupdaterA   	hexdigest)r   sizephs       rG   get_hashr      s    D  	BGAJJ&&DD 	 	 	H	9$$SYY%5%5%7%788AHHRWWU^^""$$%%%;;==s   %
22imgImage.Imagetuple[int, int]c                    | j         }| j        dk    rR	 |                                 x}r*|                    dd          }|dv r|d         |d         f}n# t          $ r Y nw xY w|S )zReturn exif-corrected PIL size.JPEGi  N>         r?   r   )r   formatgetexifgetrk   )r   sexifrotations       rG   	exif_sizer      s    A
zV	{{}}$t #88C..v%%!ad
A 	 	 	D	Hs   A A 
A"!A"argstuplec                4   | \  \  }}}d\  }}}	 t          j        |          }|                                 t          |          }|d         |d         f}|d         dk    |d         dk    z  sJ d| d            |j                                        t          v sJ d|j         dt                       |j                                        d	v rt          |d
          5 }	|	                    dd           |		                                dk    rDt          j        t          j        |                                        |ddd           | | d}ddd           n# 1 swxY w Y   d}n"# t          $ r}
d}| | d|
 }Y d}
~
nd}
~
ww xY w||f|||fS )zVerify one image.)r   r   rP   r?   r   	   image size  <10 pixelszInvalid image format . >   r!   r(   rZ         r   d   subsamplingquality!: corrupt JPEG restored and savedN : ignoring corrupt image/label: )r   ri   verifyr   r   lowerIMG_FORMATSFORMATS_HELP_MSGseekrj   r	   exif_transposesaverk   )r   im_fileclsrW   nfncmsgimshapers   es              rG   verify_imager      s$   !NWcFKBCFZ  
		"q58$a1qA.PP0Pe0P0P0PPP.y  K///1h1h1hVf1h1h///9??//gt$$ Pr16688{**+EJw,?,?@@EEgvcdnqErrr#OWOOOC	P P P P P P P P P P P P P P P
  F F FEEE!EEF S>2r3&&s=   CE0 #A3E"E0 "E&&E0 )E&*E0 0
F:F

Flistc                2   | \  }}}}}}}}dddddg df\  }	}
}}}}	 t          j        |          }|                                 t          |          }|d         |d         f}|d         dk    |d         dk    z  sJ d| d            |j                                        t          v sJ d|j         d	t                       |j                                        d
v rt          |d          5 }|                    dd           |	                                dk    rDt          j        t          j        |                                        |ddd           | | d}ddd           n# 1 swxY w Y   t          j                            |          rd}
t          |d          5 }d |	                                                                                                D             }t%          d |D                       rp|snt'          j        d |D             t&          j                  }d |D             t'          j        |                    dd          t1                    fd          }t'          j        |t&          j                  }ddd           n# 1 swxY w Y   t3          |          x}r|rZ|j        d         d||z  z   k    sJ dd||z  z    d            |ddddf                             d|          ddddf         }n8|j        d         dk    sJ d|j        d          d            |ddddf         }|                                d k    sJ d!||d k                          |                                d"k    sJ d#||d"k                           |rdn|dddf                                         }||k     s#J d$t;          |           d%| d&|dz
               t'          j        |dd'(          \  }}t3          |          |k     r2||         }rfd)|D             | | d*|t3          |          z
   d+}n]d}t'          j        d|rd||z  z   ndft&          j                  }n.d}	t'          j        d|rd||z  z   ndft&          j                  }|r|ddddf                             d||          }|dk    rht'          j         |d,         dk     |d-         dk     z  d.d/          !                    t&          j                  }t'          j        ||d0         gd1          }|ddddf         }|||||	|
|||f
S # tD          $ r!}d}| | d2| }ddddd|	|
|||g
cY d}~S d}~ww xY w)3zVerify one image-label pair.r   rP   Nr?   r   r   r   zinvalid image format r   >   r!   r(   rZ   r   r   r   r   r   r   r   utf-8encodingc                T    g | ]%}t          |          |                                &S  )rc   splitrC   rD   s     rG   rH   z&verify_image_label.<locals>.<listcomp>   s+    QQQA#a&&QaggiiQQQrI   c              3  <   K   | ]}t          |          d k    V  dS )r   N)rc   r   s     rG   	<genexpr>z%verify_image_label.<locals>.<genexpr>   s,      ..as1vvz......rI   c                    g | ]
}|d          S )r   r   r   s     rG   rH   z&verify_image_label.<locals>.<listcomp>   s    '9'9'9!'9'9'9rI   dtypec                    g | ]>}t          j        |d d         t           j                                      dd          ?S )r?   Nr   r   )rl   arrayfloat32reshaper   s     rG   rH   z&verify_image_label.<locals>.<listcomp>   sB    ]]]UV122bj A A A I I"a P P]]]rI   r   rO   zlabels require z columns eachzlabels require 5 columns, z columns detectedg)\(?z,non-normalized or out of bounds coordinates g{Gzz$negative class labels or coordinate zLabel class z exceeds dataset class count z. Possible class labels are 0-T)axisreturn_indexc                     g | ]
}|         S r   r   )rC   rD   segmentss     rG   rH   z&verify_image_label.<locals>.<listcomp>   s    #;#;#;AHQK#;#;#;rI   : z duplicate labels removed).r   ).r?   g              ?).N)r   r   )#r   ri   r   r   r   r   r   r   r   rj   r	   r   r   rJ   pathisfilestrip
splitlinesanyrl   r   r   concatenater   r   rc   r   maxrb   rV   uniquezeroswhereastyperk   )r   r   lb_filerW   keypointnum_clsnkptndim
single_clsnmr   ner   r   	keypointsr   r   rs   lbclassesnlpointsmax_clsrw   ikpt_maskr   r   s                              @rG   verify_image_labelr      s   JNGGWfhtZ/0!Q2r4/G,BBC9@CZ  
		"q58$a1qA.PP0Pe0P0P0PPP.y  K///1h1h1hVf1h1h///9??//gt$$ Pr16688{**+EJw,?,?@@EEgvcdnqErrr#OWOOOC	P P P P P P P P P P P P P P P 7>>'"" %	WBg000 4AQQ)9)9)D)D)F)FQQQ..2..... _ _ h'9'9b'9'9'9LLLG]]Z\]]]HQ)?)?PXAYAY(Z\]^^BXb
3334 4 4 4 4 4 4 4 4 4 4 4 4 4 4 WW}r [ '8A;1td{?;;;=oPQTX[_T_P_=o=o=o;;;122Y..r488BQB?FF8A;!+++-h"(ST+-h-h-h+++122YFzz||t+++-s\bcilpcp\q-s-s+++vvxx5(((*aQSTVY^T^Q_*a*a(((  *=!!r!!!Q$x||~~(((A3w<< A Ag A A3:Q;A A )(( y!$???1q66B;;AB <#;#;#;#;#;#;#;#VWVVSVVVVVCXqx"F1td{??QGrzZZZB18Bq4$;C2:VVVB 	V111abb5	))"dD99Iqyy8Yv%6%:y?PST?T$UWZ\_``gghjhrssNIx	7J+KRTUUU	2A2YE8YBBKK C C CEEE!EEdD$b"b"cBBBBBBBCsi   CU+ 2A3E1%U+ 1E55U+ 8E596U+ /C,J'U+ 'J++U+ .J+/J;U+ +
V5VVV
image_pathtxt_path	label_mapdict[int, str]c                   ddl m} ddlm} t	          j        t          j        |                     }|j        dd         \  }}g }t          |d          5 }	|	D ]q}
t          t          |
                                          \  }}}}}||dz  z
  |z  }||dz  z
  |z  }||z  }||z  } |j        ||||t          |          f           r	 ddd           n# 1 swxY w Y   |                    d          \  }}|D ]\  }}}}}t          d  ||d	          D                       }|                    ||f||d|d
          }|                    |           d|d         z  d|d         z  z   d|d         z  z   }|                    ||dz
  ||         |dk     rdnd|           |                    |           |                                 dS )a  Visualize YOLO annotations (bounding boxes and class labels) on an image.

    This function reads an image and its corresponding annotation file in YOLO format, then draws bounding boxes around
    detected objects and labels them with their respective class names. The bounding box colors are assigned based on
    the class ID, and the text color is dynamically adjusted for readability, depending on the background color's
    luminance.

    Args:
        image_path (str): Path to the image file to annotate. The file must be readable by PIL.
        txt_path (str): Path to the annotation file in YOLO format, which should contain one line per object.
        label_map (dict[int, str]): A dictionary that maps class IDs (integers) to class labels (strings).

    Examples:
        >>> label_map = {0: "cat", 1: "dog", 2: "bird"}  # Should include all annotated classes
        >>> visualize_image_annotations("path/to/image.jpg", "path/to/annotations.txt", label_map)
    r   N)colorsr   r   r   r?   c              3      K   | ]	}|d z  V  
dS )   Nr   )rC   cs     rG   r   z.visualize_image_annotations.<locals>.<genexpr>-  s&      <<!a#g<<<<<<rI   Fnone)	linewidth	edgecolor	facecolorgz6?g,C?g]m{?rO   g      ?whiteblack)colorbackgroundcolor)matplotlib.pyplotpyplotultralytics.utils.plottingr   rl   r   r   ri   r   maprS   r   rh   rV   subplotsr   	Rectangle	add_patchtextimshowshow)r   r   r   pltr   r   
img_height	img_widthr   filelineclass_idx_centery_centerwidthheightrD   ywr   rw   axlabelr   rect	luminances                             rG   visualize_image_annotationsr    sc   " $#####111111
(5:j))
*
*CIbqbMJ	K	h	)	)	) <T 	< 	<D:=eTZZ\\:R:R7Hh%EAI%2AFQJ&*4A	!A#AK1aCMM:;;;;	<< < < < < < < < < < < < < < < LLOOEAr( r r1aE<<vveU';';<<<<<}}aVQQ%SY}ZZ
TU1X%q(99FU1X<MM	
1q5)E*Y__''RYkpqqqqIIcNNNHHJJJJJs   A5CC!Cr?   imgszpolygonslist[np.ndarray]r   downsample_ratio
np.ndarrayc                P   t          j        | t           j                  }t          j        |t           j                  }|                    |j        d         ddf          }t          j        |||           | d         |z  | d         |z  }}t          j	        |||f          S )a]  Convert a list of polygons to a binary mask of the specified image size.

    Args:
        imgsz (tuple[int, int]): The size of the image as (height, width).
        polygons (list[np.ndarray]): A list of polygons. Each polygon is a 1D array of coordinates with length M, where
            M % 2 = 0 (alternating x, y values).
        color (int, optional): The color value to fill in the polygons on the mask.
        downsample_ratio (int, optional): Factor by which to downsample the mask.

    Returns:
        (np.ndarray): A binary mask of the specified image size with the polygons filled in.
    r   r   r   r   )r   r?   )
rl   r   uint8asarrayint32r   r   cv2fillPolyresize)r  r  r   r   masknhnws          rG   polygon2maskr,  6  s     8E***Dz("(333H!2B :;;HLxu----Ah**E!H8H,HB:dRH%%%rI   c                L     t          j         fd|D                       S )a`  Convert a list of polygons to a set of binary masks of the specified image size.

    Args:
        imgsz (tuple[int, int]): The size of the image as (height, width).
        polygons (list[np.ndarray]): A list of polygons. Each polygon is an array of coordinates that can be reshaped to
            (-1, 2) as (x, y) point pairs.
        color (int): The color value to fill in the polygons on the masks.
        downsample_ratio (int, optional): Factor by which to downsample each mask.

    Returns:
        (np.ndarray): A set of binary masks of the specified image size with the polygons filled in.
    c                \    g | ](}t          |                    d           g          )S )r   )r,  r   )rC   rD   r   r   r  s     rG   rH   z"polygons2masks.<locals>.<listcomp>]  s5    eeeWX\%!))B--%AQRReeerI   )rl   r   )r  r  r   r   s   ` ``rG   polygons2masksr/  N  s3     8eeeeee\deeefffrI   r   tuple[np.ndarray, np.ndarray]c                   t          j        | d         |z  | d         |z  ft          |          dk    rt           j        nt           j                  }g }g }|D ]}}t          | |                    d          g|d          }|                    |                    |j	                             |                    |
                                           ~t          j        |          }t          j        |           }t          j        |          |         }t          t          |                    D ]/}	||	         |	dz   z  }||z   }t          j        |d|	dz             }0||fS )z:Return a downsampled overlap mask and sorted area indices.r   r?   r   r   r   )r   r   )a_mina_max)rl   r   rc   r%  r#  r,  r   rh   r   r   sumr$  argsortr   rangeclip)
r  r   r   masksareasmssegmentr)  indexr   s
             rG   polygons2masks_overlapr=  `  si    H	q%	%uQx3C'CDh--#--bhh28  E E	B ! !__R  !-	
 
 
 			$++ek**+++TXXZZ    JuEJvE	"e	B3x==!! 5 5!uAQa!e444%<rI   r   r   c                    t                               d                    p!t                               d                    }|s J d                                  d            t	          |          dk    r fd|D             }t	          |          dk    s2J d                                  dt	          |           d|             |d	         S )
a  Find and return the YAML file associated with a Detect, Segment or Pose dataset.

    This function searches for a YAML file at the root level of the provided directory first, and if not found, it
    performs a recursive search. It prefers YAML files that have the same stem as the provided path.

    Args:
        path (Path): The directory path to search for the YAML file.

    Returns:
        (Path): The path of the found YAML file.
    z*.yamlzNo YAML file found in ''r?   c                4    g | ]}|j         j         k    |S r   )stem)rC   rs   r   s     rG   rH   z%find_dataset_yaml.<locals>.<listcomp>  s'    999qQVty%8%8%8%8%8rI   zExpected 1 YAML file in 'z', but found z.
r   )r   globrglobresolverc   )r   rQ   s   ` rG   find_dataset_yamlrE  }  s     8$$%%Cdjj.B.B)C)CE===DLLNN=====5
5zzA~~9999E999u::???kkkUXY^U_U_kkdikk???8OrI   Tdatasetautodownloadbooldict[str, Any]c           
     
   t          |           }d}t          j        |          st          |          r8t	          |t
          dd          }t          t
          |z            }|j        d}}t          j	        |d          dD ]^}|vrX|dk    sdvr"t          t          |  d	| d
                    t          j        d                               d          d<   _dvr#dvrt          t          |  d                    dv radv r]t          d                   d         k    r>t          t          |  dt          d                    dd          d                    dvr#d t!          d                   D             d<   nt          d                   d<   t#          d                   d<                       dd          d<   t'          |p<                    d          p't'                              dd                    j                                                  s0                                st
          z                                  d<   dD ]}                    |          rt/          |         t0                    r|         z                                  }|                                s@|                             d          r%|         dd         z                                  }t1          |          |<   fd|         D             |<   ΈfddD             \  }}|rd t/          |t4                    r|n|gD             }t7          d |D                       st9          |           }	t          j        d           d|	 d t=          d! |D                        d"}
|r|rt          j        |
           n%|
d#t
           d$t>           d"z  }
tA          |
          tC          j!                    }d}|                    d%          r-|"                    d&          rt	          |t
          d'           nh|                    d(          rAt          j        d)| d*           tG          j$        |%                                d+           ntM          |d,i           d-tO          tC          j!                    |z
  d.           d/}|d0v rd1| d2tQ          d3t
                     nd4| d5}t          j        d6| d7           tS          tU          d                   rd8nd9           S ):am  Download, verify, and/or unzip a dataset if not found locally.

    This function checks the availability of a specified dataset, and if not found, it has the option to download and
    unzip the dataset. It then reads and parses the accompanying YAML data, ensuring key requirements are met and also
    resolves paths related to the dataset.

    Args:
        dataset (str): Path to the dataset or dataset descriptor (like a YAML file).
        autodownload (bool, optional): Whether to automatically download the dataset if not found.

    Returns:
        (dict[str, Any]): Parsed dataset information and paths.
    rP   TFdirunzipdelete)append_filename)trainvalrQ  
validation 'uE   :' key missing ❌.
'train' and 'val' are required in all data YAMLs.zBrenaming data YAML 'validation' key to 'val' to match YOLO format.namesr   uI    key missing ❌.
 either 'names' or 'nc' are required in all data YAMLs.z 'names' length z
 and 'nc: z' must match.c                    g | ]}d | S )class_r   )rC   r   s     rG   rH   z%check_det_dataset.<locals>.<listcomp>  s    AAA!!AAArI   channels   r   	yaml_file)rP  rQ  testminivalz../Nc                X    g | ]&}t          |z                                            'S r   )rX   rD  )rC   rD   r   s     rG   rH   z%check_det_dataset.<locals>.<listcomp>  s1    FFF3q113344FFFrI   c              3  B   K   | ]}                     |          V  d S N)r   )rC   rD   datas     rG   r   z$check_det_dataset.<locals>.<genexpr>  s-      77adhhqkk777777rI   )rQ  r   c                P    g | ]#}t          |                                          $S r   )r   rD  r   s     rG   rH   z%check_det_dataset.<locals>.<listcomp>  s(    TTTQtAww  TTTrI   c              3  >   K   | ]}|                                 V  d S r^  existsr   s     rG   r   z$check_det_dataset.<locals>.<genexpr>  s*      ++!188::++++++rI   z	Dataset 'z"' images not found, missing path 'c              3  B   K   | ]}|                                 |V  d S r^  rb  r   s     rG   r   z$check_det_dataset.<locals>.<genexpr>  s4      HjHjq_`_g_g_i_iHjHjHjHjHjHjHjrI   r?  z%
Note dataset download directory is 'z'. You can update this in 'http.zip)urlrL  rN  zbash zRunning z ...checkyaml(r?   zs)>   Nr   u   success ✅ z, saved to boldzfailure u    ❌zDataset download 
z	Arial.ttfzArial.Unicode.ttf)+r   zipfile
is_zipfiler   r   r   rE  parentr   loadSyntaxErrorr   r   r_   poprc   r6  r
   r   r   rc  is_absoluterD  
isinstancerX   
startswithr   allr   ro   nextr   FileNotFoundErrorrd   endswith
subprocessrunr   execroundr   r   r   )rF  rG  r  extract_dirnew_dirkrD   rQ  r   namemtrdtr_  r   s                 @@rG   check_det_datasetr    s    gD K$ 7:d#3#3 7,d5QQQ !788$(K\ 9T4000D  1 1D==Ezz\55!grrrrrss   N_```((<00DKdt4//&G!w!w!wxxyyy$44<<CW,>,>$t*,L,L&G!t!tSg=O=O!t!t[_`d[e!t!t!tuuvvvdAAuT$Z/@/@AAAWg''T
%d7m44DMxx
A..D Ytxx//Y4b8Q8Q3R3R3YZZD;;== /!1!1!3!3 /t#,,.. DL. G G88A;; 	G$q'3'' GDG^,,..xxzz 7d1g&8&8&?&? 7Q+4466Aa&&QFFFFd1gFFFQ 8777#6777FC
 3TT*S$2G2G+R33cUTTT++s+++++ 	3W%%DKOOOmDmmDHjHjTWHjHjHjDjDjmmmA +\ +q!!!!wlwwgtwwww'***	AA||F## (

6(:(: (!dCCCCCg&& (.q...///qwwyy55555Q'''2U49;;?A..222BRSW`R`R`NrNNhv|.L.LNNNfyqsfyfyfyAK1A111222htG}55N{{;NOOOKrI   
str | Pathr   c                	   t          |                               d          rt          | t          dd          } nIt          |                               d          r't          |           }t          |t          dd          } t          |           } |                                 r| n	t          | z                                  }|                                s|j	        dk    rt          d|  d          t          j        d           t          j        d	| d
           t          j                    }t          |           dk    r.t          j        dt          t"          dz            gd           n!t%          t&           d|  d|j                   t          j        dt          j                    |z
  ddt+          d|           d           |dz  }|                                st          j        d|            t-          |                    d                    t-          |                    d                    z   x}r>ddlm} t          j        dt5          |           d            ||d           }|dz  }nt          j        d!| d"           |d#z                                  r|d#z  n9|d$z                                  r|d$z  n|d%z                                  r|d%z  nd&}|d'z                                  r|d'z  nd&}	|d#k    r|st          j        d(           |	}n|d'k    r|	st          j        d)           |}	t5          d* |dz                      d+          D                       }
d, |dz                                  D             }t?          tA          tC          |                              }|||	d-"                                D ]\  }}t+          | d.           d/| d0}|t          j        |           5d1 |                    d2          D             }t5          |          }t5          d3 |D                       }|dk    r9|dk    rtG          |  d4| d5          t          j        | d6| d7| d8           ||
k    r$t          j        | d6| d7| d9|
 d:| d;
           t          j        | d6| d7| d<           |||	|
|d=d>S )?a  Check a classification dataset such as Imagenet.

    This function accepts a `dataset` name and attempts to retrieve the corresponding dataset information. If the
    dataset is not found locally, it attempts to download the dataset from the internet and save it locally.

    Args:
        dataset (str | Path): The name of the dataset.
        split (str, optional): The split of the dataset. Either 'val', 'test', or ''.

    Returns:
        (dict[str, Any]): A dictionary containing the following keys:

            - 'train' (Path): The directory path containing the training set of the dataset.
            - 'val' (Path): The directory path containing the validation set of the dataset.
            - 'test' (Path): The directory path containing the test set of the dataset.
            - 'nc' (int): The number of classes in the dataset.
            - 'names' (dict[int, str]): A dictionary of class names in the dataset.
    )zhttp:/zhttps:/TFrK  )rf  z.tarz.gzrP   zSClassification datasets must be a directory (data="path/to/dir") not a file (data="z7"), See https://docs.ultralytics.com/datasets/classify/z Dataset not found, missing path z, attempting download...imagenetbashzdata/scripts/get_imagenet.shrh  /rf  )rL  u   Dataset download success ✅ (r\   zs), saved to rl  rm  rP  z#Dataset 'split=train' not found at z*.jpgz*.pngr   )split_classify_datasetzFound z1 images in subdirectories. Attempting to split...g?)train_ratiozNo images found in z or its subdirectories.rQ  rR  validNrZ  z:Dataset 'split=val' not found, using 'split=test' instead.z:Dataset 'split=test' not found, using 'split=val' instead.c                :    g | ]}|                                 |S r   )is_dirr   s     rG   rH   z%check_cls_dataset.<locals>.<listcomp>5  s%    FFFA188::FaFFFrI   *c                D    g | ]}|                                 |j        S r   )r  r  r   s     rG   rH   z%check_cls_dataset.<locals>.<listcomp>6  s'    JJJqxxzzJQVJJJrI   rP  rQ  rZ  : ...c                b    g | ],}|j         d d                                         t          v *|-S r?   Nsuffixr   r   )rC   r   s     rG   rH   z%check_cls_dataset.<locals>.<listcomp>?  s9    ___dABB8M8M8O8OS^8^8^T8^8^8^rI   *.*c                    h | ]	}|j         
S r   )rp  )rC   r  s     rG   	<setcomp>z$check_cls_dataset.<locals>.<setcomp>A  s    444ddk444rI   rS  z:' no training images foundz found z images in z classes (no images found)z classes (requires z classes, not r^   u    classes ✅ rX  )rP  rQ  rZ  r   rT  rW  )$rX   rv  r   r   rz  r   r   r  rD  r  
ValueErrorr   ro   r_   rd   r{  r|  r   r   r   rp  r   r   rC  ultralytics.data.splitr  rc   errorrc  rB  iterdirdict	enumeratesorteditemsry  )rF  r   r  data_dirr  	train_setimage_filesr  val_settest_setr   rT  r  vrW   rQ   r   nds                     rG   check_cls_datasetr    s   ( 7||455 R\eTTT	W		6	7	7 R'"",d5QQQ7mmG">>++I,2HRRTTH?? w?b  Ffm F F F   	B\(\\\]]]IKKw<<:%%NFC/M(M$N$NOW[\\\\\
22W222HHHHuTY[[1_uuuW_`fhpWqWquuuvvv7"I 	RHYHHIIIx~~g6677$x~~g?V?V:W:WWW; 	REEEEEEKd[!1!1dddeee--hCHHHH 7*IILPxPPPQQQ u$$&&	5 |#++--X$$ w&&((X  &.%6$>$>$@$@Jx&  dH~~g~STTT	&STTT	FF(W,22377FFF	G	GBJJh099;;JJJE6%==))**E $GXFFLLNN P P1WWW%%.....9K__aggenn___EUB44e44455BQww<<+w,X,X!,X,X,XYYYNf#b#bR#b#bB#b#b#bccccrllrllbllUWllgilllmmmmvNNbNNRNNNOOOOwUZhijjjrI   c                  N    e Zd ZdZddd
Zedd            ZddZdddZddZ	dS )HUBDatasetStatsaD  A class for generating HUB dataset JSON and `-hub` dataset directory.

    Args:
        path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip).
        task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify', 'obb'.
        autodownload (bool): Attempt to download dataset if not found locally.

    Attributes:
        task (str): Dataset task type.
        hub_dir (Path): Directory path for HUB dataset files.
        im_dir (Path): Directory path for compressed images.
        stats (dict): Statistics dictionary containing dataset information.
        data (dict): Dataset configuration data.

    Methods:
        get_json: Return dataset JSON for Ultralytics HUB.
        process_images: Compress images for Ultralytics HUB.

    Examples:
        >>> from ultralytics.data.utils import HUBDatasetStats
        >>> stats = HUBDatasetStats("path/to/coco8.zip", task="detect")  # detect dataset
        >>> stats = HUBDatasetStats("path/to/coco8-seg.zip", task="segment")  # segment dataset
        >>> stats = HUBDatasetStats("path/to/coco8-pose.zip", task="pose")  # pose dataset
        >>> stats = HUBDatasetStats("path/to/dota8.zip", task="obb")  # OBB dataset
        >>> stats = HUBDatasetStats("path/to/imagenet10.zip", task="classify")  # classification dataset
        >>> stats.get_json(save=True)
        >>> stats.process_images()

    Notes:
        Download *.zip files from https://github.com/ultralytics/hub/tree/main/example_datasets
        i.e. https://github.com/ultralytics/hub/raw/main/example_datasets/coco8.zip for coco8.zip.
    
coco8.yamldetectFr   rX   taskrG  rH  c                   t          |                                          }t          j        d| d           || _        | j        dk    r$t          |          }t          |          }||d<   n|                     t          |                    \  }}}	 t          j	        |          }d|d<   t          j
        ||           t          ||          }||d<   n"# t          $ r}	t          d          |	d}	~	ww xY wt          |d          d          | _        | j        d	z  | _        t          |d
                   t!          |d
                                                   d| _        || _        dS )zInitialize class.z Starting HUB dataset checks for z....classifyr   rP   zerror/HUB/dataset_stats/initNz-hubr<   rT  )r   rT  )r   rD  r   ro   r  r   r  _unzipr   rq  r   r  rk   hub_dirim_dirrc   r   valuesstatsr_  )
selfr   r  rG  	unzip_dirr_  rw   r  	yaml_pathr   s
             rG   __init__zHUBDatasetStats.__init__q  st   Dzz!!##AtAAABBB	9
"""4((I$Y//D$DLL%)[[d%<%<"AxGy++!V	)T***(LAA'V G G G >??QFG tF|11122lX-W..d7m>R>R>T>T9U9UVV
			s   AC 
C:%C55C:r   r:   tuple[bool, str, Path]c                   t          |                               d          sdd| fS t          | | j                  }|                                sJ d|  d| d            dt          |          t          |          fS )	zUnzip data.zip.rf  FN)r   zError unzipping z, z6 not found. path/to/abc.zip MUST unzip to path/to/abc/T)rX   rz  r   rp  r  rE  )r   r  s     rG   r  zHUBDatasetStats._unzip  s     4yy!!&)) 	%$$$t$+666	!! 	
 	
hthhyhhh	
 	
! S^^%6y%A%AAArI   rs   c                Z    t          || j        t          |          j        z             dS )z)Save a compressed image for HUB previews.N)compress_one_imager  r   r  )r  rs   s     rG   _hub_opszHUBDatasetStats._hub_ops  s'    1dkDGGL899999rI   r   verboser  c           
          fddD ]}d j         |<    j                            |          }|*d t          |                              d          D             }|sY j        dk    rddlm}  | j        |                   }t          j	        t          |j                                                t                    }|j        D ]}	||	d	         xx         d	z  cc<   t          |          |                                d
t          |          d|                                dd |j        D             d j         |<   Bddlm}
  |
 j        |          j         j                  }t          j         fdt'          |j        t          |          d          D                       }t          |                                          |                    d                                          d
t          |          t          t          j        |dk    d	                                                    |dk                        d                                          dfdt/          |j        |j                  D             d j         |<   |r j                            dd            j        dz  }t7          j        d|                                 d           t=          |dd          5 }t?          j          j         |           ddd           n# 1 swxY w Y   |r.t7          j        t?          j!         j         dd                      j         S )z(Return dataset JSON for Ultralytics HUB.c                   j         dk    r	| d         }nj         dv rd | d         D             }noj         dk    rL| d         j        \  }}}t          j        | d         | d                             |||z            fd          }nt          d	j          d
          t          | d         |          }d |D             S )z:Update labels to integer class and 4 decimal place floats.r  bboxes>   obbr;  c                6    g | ]}|                                 S r   )flattenr   s     rG   rH   z<HUBDatasetStats.get_json.<locals>._round.<locals>.<listcomp>  s     GGGqqyy{{GGGrI   r   poser   r?   zUndefined dataset task=r@   r   c                X    g | ]'\  }}t          |d                    gd |D             (S )r   c              3  P   K   | ]!}t          t          |          d           V  "dS )   N)r~  rS   r   s     rG   r   zFHUBDatasetStats.get_json.<locals>._round.<locals>.<listcomp>.<genexpr>  s2      !E!E%a!"4"4!E!E!E!E!E!ErI   )rV   )rC   r   r   s      rG   rH   z<HUBDatasetStats.get_json.<locals>._round.<locals>.<listcomp>  s=    ___91fS1YYF!E!Ef!E!E!EF___rI   )r  r   rl   r   r   r  zip)r=   coordinatesnnkr  zippedr  s         rG   _roundz(HUBDatasetStats.get_json.<locals>._round  s    yH$$$X.000GGF:4FGGGf$$";/5	2r nfX.>{@S@[@[\]_adf_f@g@g-hjkll !G49!G!G!GHHH44F__X^____rI   r  Nc                b    g | ],}|j         d d                                         t          v *|-S r  r  )rC   rs   s     rG   rH   z,HUBDatasetStats.get_json.<locals>.<listcomp>  s9    ___118ABB<;M;M;O;OS^;^;^Q;^;^;^rI   r  r  r   )ImageFolderr?   )total	per_class)r  
unlabelledr  c                @    g | ]\  }}t          |          j        |iS r   r   r  )rC   r  r  s      rG   rH   z,HUBDatasetStats.get_json.<locals>.<listcomp>  s)    JJJTQQa0JJJrI   )instance_statsimage_statsr=   YOLODataset)img_pathr_  r  c                    g | ]S}t          j        |d                              t                                                    j        d                   TS )r   r   )	minlength)rl   bincountr   rV   r  r_  )rC   r  r  s     rG   rH   z,HUBDatasetStats.get_json.<locals>.<listcomp>  s`       ! E%L$7$7$<$<$D$D$F$FRVR[\`Rabbb  rI   
Statisticsr  descc                T    g | ]$\  }}t          |          j         |          i%S r   r  )rC   r  r  r  s      rG   rH   z,HUBDatasetStats.get_json.<locals>.<listcomp>  s2    kkkTQQffQii8kkkrI   Tparentsexist_okz
stats.jsonzSaving r  r  r   r   r   F)indent	sort_keys)"r  r_  r   r   rC  r  torchvision.datasetsr  rl   r   rc   r   r   rV   imgstolistultralytics.datar  r   r   r=   r4  rw  r  im_filesr  mkdirr   ro   rD  ri   jsondumpdumps)r  r   r  r   r   rQ   r  rF  rD   r   r  
stats_pathrs   r  s   `            @rG   get_jsonzHUBDatasetStats.get_json  s   	` 	` 	` 	` 	` , ,	 ,	E $DJu9==''D |__T

 0 0 7 7___E  yJ&&<<<<<<%+di&677HS112299#>>!, " "BbeHHHMHHHH 14G188::&V&V-0\\YZYaYaYcYc#d#dJJW\JJJ% %
5!! 988888%+ty/?diVZV_```H   %)'.GS_%`%`%`    14AEEGG155QR88??K\K\&]&]!$W&)"&a*;*;*?*?*A*A&B&B&'!e[[^^%:%:%<%<$ $
 lkkkSIY[b[iEjEjkkk% %
5!!  	)Ltd;;;4JK;*"4"4"6"6;;;<<<j#888 )A	$*a((() ) ) ) ) ) ) ) ) ) ) ) ) ) ) 	KK
4:a5IIIJJJzs   L--L14L1c                   ddl m} | j                            dd           dD ]}| j                            |           || j        |         | j                  }t          t                    5 }t          |	                    | j
        |j                  t          |          | d	          D ]}	 ddd           n# 1 swxY w Y   t          j        d
| j                    | j        S )z$Compress images for Ultralytics HUB.r   r  Tr  r  N)r  r_  z imagesr  zDone. All images saved to )r  r  r  r  r_  r   r   r   r   imapr  r  rc   r   ro   )r  r  r   rF  poolrw   s         rG   process_imageszHUBDatasetStats.process_images  sE   000000$666+ 	 	Ey}}U##+!k49U+;$)LLLGK(( Ddiiw7GHHPST[P\P\fkctctctuuu  A               	>>>???{s   3ACC	C	N)r  r  F)r   rX   r  rX   rG  rH  )r   r   r:   r  )rs   rX   )FF)r   rH  r  rH  r:   r  )r:   r   )
__name__
__module____qualname____doc__r  staticmethodr  r  r  r  r   rI   rG   r  r  O  s         B    6 B B B \B: : : :H H H H HT     rI   r    rs   f_new
str | Nonemax_dimr   c           	     2   	 dt           _        t          j        |           }|j        dv r|                    d          }|t          |j        |j                  z  }|dk     rA|                    t          |j        |z            t          |j        |z            f          }|
                    |p| d|d           dS # t          $ r}t          j        d|  d	|            t          j        |           }|j        dd
         \  }}|t          ||          z  }|dk     rCt          j        |t          ||z            t          ||z            ft          j                  }t          j        t'          |p|           |           Y d}~dS d}~ww xY w)a6  Compress a single image file to reduced size while preserving its aspect ratio and quality using either the
    Python Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it
    will not be resized.

    Args:
        f (str): The path to the input image file.
        f_new (str, optional): The path to the output image file. If not specified, the input file will be overwritten.
        max_dim (int, optional): The maximum dimension (width or height) of the output image.
        quality (int, optional): The image compression quality as a percentage.

    Examples:
        >>> from pathlib import Path
        >>> from ultralytics.data.utils import compress_one_image
        >>> for f in Path("path/to/dataset").rglob("*.jpg"):
        >>>    compress_one_image(f)
    N>   LARGBARGBr   r   T)r   optimizezHUB ops PIL failure r   r   )interpolation)r   MAX_IMAGE_PIXELSri   modeconvertr   r  r  r(  rV   r   rk   r   r_   r&  imreadr   
INTER_AREAimwriterX   )	rs   r  r  r   r   r  r   	im_heightim_widths	            rG   r  r    s   ")!%Z]]7n$$E""Bc")RX...s77C1--s29q=/A/ABCCB

FGdCCCCC ) ) )6a66166777Z]] hrrl	8c)X...s77BX\!2!2C	A4F4F GWZWefffBC
OOR((((((((()s   B=C 
FC FFr  c                    ddl }|                                 t          j        t	          |           d                                          }|                                 |S )z1Load an Ultralytics *.cache dictionary from path.r   NT)allow_pickle)gcdisablerl   rq  rX   itemenable)r   r  caches      rG   load_dataset_cache_filer    sP    IIIJJLLLGCIID1116688EIIKKKLrI   rD   versionc                @   ||d<   t          |j                  r|                                r|                                 	 t	          t          |          d          5 }t          j        ||           ddd           n# 1 swxY w Y   t          j	        |  d|            dS # t          $ rJ}t          |                              d           t          j        |  d| d|            Y d}~dS d}~ww xY wt          j        |  d	|j         d
           dS )z9Save an Ultralytics dataset *.cache dictionary x to path.r  wbNzNew cache created: T)
missing_oku'   WARNING ⚠️ Failed to save cache to r   zCache directory z" is not writable, cache not saved.)r   rp  rc  unlinkri   rX   rl   r   r   ro   rk   r   r_   )rW   r   rD   r  r  r   s         rG   save_dataset_cache_filer   #  s   AiL$$ c;;== 	KKMMM	Zc$ii&& !$a   ! ! ! ! ! ! ! ! ! ! ! ! ! ! !K6<<d<<===== 	Z 	Z 	ZJJ...NfXXTXXUVXXYYYYYYYYY	Z 	&aa$+aaabbbbbs<   B(  B6B( BB( 	B
B( (
C<2?C77C<)r8   r9   r:   r9   )rM   rN   rO   rP   )
rQ   r9   rR   rS   rT   rS   rU   rV   rW   rX   )r   r9   r:   rX   )r   r   r:   r   )r   r   r:   r   )r   r   r:   r   )r   rX   r   rX   r   r   )r?   r?   )
r  r   r  r  r   rV   r   rV   r:   r!  )r?   )r  r   r   r  r   rV   r:   r0  )r   r   r:   r   )T)rF  rX   rG  rH  r:   rI  )rP   )rF  r  r   rX   r:   rI  )Nr  rN   )rs   rX   r  r  r  rV   r   rV   )r   r   r:   r  )rW   rX   r   r   rD   r  r  rX   )D
__future__r   r  rJ   r`   r{  rd   rn  multiprocessing.poolr   pathlibr   tarfiler   typingr   r&  numpyrl   PILr   r	   ultralytics.nn.autobackendr
   ultralytics.utilsr   r   r   r   r   r   r   r   r   r   r   r   ultralytics.utils.checksr   r   r   ultralytics.utils.downloadsr   r   r   ultralytics.utils.opsr   HELP_URLr   VID_FORMATSr   rL   r   r   r   r   r   r  r,  r/  r=  rE  r  r  r  r  r  r   r   rI   rG   <module>r/     s   # " " " " "  				        + + + + + +                   



             8 8 8 8 8 8                            F E E E E E E E E E K K K K K K K K K K 0 0 0 0 0 0W    dccZkZZ[ZZ T T T T moJ
 J
 J
 J
 J
Z
 
 
 
   ' ' ' '2EC EC EC ECP( ( ( (X ab& & & & &2 ]^g g g g g& QR    :   ([ [ [ [ [|]k ]k ]k ]k ]k@c c c c c c c cL!) !) !) !) !)H   c c c c c crI   