
    /j1                    6   d dl mZ d dlmZmZ d dlmZ d dlZd dlmZm	Z	 d dl
mZ d dlmZmZ d dlmZ d d	lmZmZmZ d d
lmZ ddlmZ ddlmZ  G d de          Z G d de          Z G d dee          Z G d dee          Z G d de          ZdS )    )annotations)copydeepcopy)PathN)YOLOConcatDatasetbuild_yolo_dataset)LoadVisualPrompt)DetectionTrainerDetectionValidator)
YOLOEModel)DEFAULT_CFGLOGGERRANK)unwrap_model   )WorldTrainerFromScratch   )YOLOEDetectValidatorc                  F     e Zd ZdZeddfd fdZddd
Zd ZdddZ xZ	S )YOLOETrainera`  A trainer class for YOLOE object detection models.

    This class extends DetectionTrainer to provide specialized training functionality for YOLOE models, including custom
    model initialization, validation, and dataset building with multi-modal support.

    Attributes:
        loss_names (tuple): Names of loss components used during training.

    Methods:
        get_model: Initialize and return a YOLOEModel with specified configuration.
        get_validator: Return a YOLOEDetectValidator for model validation.
        build_dataset: Build YOLO dataset with multi-modal support for training.
    N	overridesdict | None
_callbacksc                    |i }|                     d          rJ d|d          d            d|d<   t                                          |||           dS )a  Initialize the YOLOE Trainer with specified configurations.

        Args:
            cfg (dict): Configuration dictionary with default training settings from DEFAULT_CFG.
            overrides (dict, optional): Dictionary of parameter overrides for the default configuration.
            _callbacks (dict, optional): Dictionary of callback functions to be applied during training.
        NcompilezTraining with 'model=modelz' requires 'compile=False'Foverlap_mask)getsuper__init__)selfcfgr   r   	__class__s       h/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/ultralytics/models/yolo/yoloe/train.pyr    zYOLOETrainer.__init__$   sm     I==++ss-sYwEW-s-s-sss+$)	.!i44444    Tverboseboolc                    t          t          |t                    r|d         n|| j        d         t	          | j        d         d          |o
t
          dk              }|r|                    |           |S )aO  Return a YOLOEModel initialized with the specified configuration and weights.

        Args:
            cfg (dict | str, optional): Model configuration. Can be a dictionary containing a 'yaml_file' key, a direct
                path to a YAML file, or None to use default configuration.
            weights (str | Path, optional): Path to pretrained weights file to load into the model.
            verbose (bool): Whether to display model information during initialization.

        Returns:
            (YOLOEModel): The initialized YOLOE model.

        Notes:
            - The number of classes (nc) is hard-coded to a maximum of 80 following the official configuration.
            - The nc parameter here represents the maximum number of different text samples in one image,
              rather than the actual number of classes.
        	yaml_filechannelsncP   chr+   r&   )r   
isinstancedictdataminr   load)r!   r"   weightsr&   r   s        r$   	get_modelzYOLOETrainer.get_model2   s~    &  *3 5 5>C3y$49T?B''*
	
 
 
  	 JJwr%   c                x    d| _         t          | j        | j        t	          | j                  | j                  S )z9Return a YOLOEDetectValidator for YOLOE model validation.boxclsdflsave_dirargsr   )
loss_namesr   test_loaderr=   r   r>   	callbacksr!   s    r$   get_validatorzYOLOETrainer.get_validatorP   s:    -#t}4	??W[We
 
 
 	
r%   trainimg_pathstrmodebatch
int | Nonec                    t          t          | j        r+t          | j                  j                                         nd          d          }t          | j        ||| j        ||dk    ||dk              S )a  Build YOLO Dataset.

        Args:
            img_path (str): Path to the folder containing images.
            mode (str): 'train' mode or 'val' mode, users are able to customize different augmentations for each mode.
            batch (int, optional): Size of batches, this is for rectangular training.

        Returns:
            (Dataset): YOLO dataset configured for training or validation.
        r       valrD   )rG   rectstridemulti_modal)maxintr   r   rN   r   r>   r2   )r!   rE   rG   rH   gss        r$   build_datasetzYOLOETrainer.build_datasetW   sy     djO\$*--488:::aPPRTUU!Ix	45=Y[imqxix
 
 
 	
r%   )r   r   r   r   NNTr&   r'   rD   N)rE   rF   rG   rF   rH   rI   )
__name__
__module____qualname____doc__r   r    r6   rC   rS   __classcell__r#   s   @r$   r   r      s          'ae 5 5 5 5 5 5 5    <
 
 

 
 
 
 
 
 
 
 
r%   r   c                      e Zd ZdZdddZdS )	YOLOEPETraineraH  Fine-tune YOLOE model using linear probing approach.

    This trainer freezes most model layers and only trains specific projection layers for efficient fine-tuning on new
    datasets while preserving pretrained features.

    Methods:
        get_model: Initialize YOLOEModel with frozen layers except projection layers.
    NTr&   r'   c                   t          t          |t                    r|d         n|| j        d         | j        d         |o
t          dk              }|j        d         `|
J d            |r|                    |           |                                 t          | j        d         
                                          }|                    |          }|                    ||           |j        d                             |j                   t          |j        d         j        d	         d
                                       d          |j        d         j        d	         d
<   t          |j        d         j        d         d
                                       d          |j        d         j        d         d
<   t          |j        d         j        d
         d
                                       d          |j        d         j        d
         d
<   t%          |j        d         dd          t          |j        d         j        d	         d
                                       d          |j        d         j        d	         d
<   t          |j        d         j        d         d
                                       d          |j        d         j        d         d
<   t          |j        d         j        d
         d
                                       d          |j        d         j        d
         d
<   |                                 |S )a  Return YOLOEModel initialized with specified config and weights.

        Args:
            cfg (dict | str, optional): Model configuration.
            weights (str, optional): Path to pretrained weights.
            verbose (bool): Whether to display model information.

        Returns:
            (YOLOEModel): Initialized model with frozen layers except for specific projection layers.
        r)   r*   r+   r-   r.   Nz7Pretrained weights must be provided for linear probing.namesr   r   Tr   one2one_cv3)r   r0   r1   r2   r   r   savper4   evallistvaluesget_text_peset_classesfuseper   cv3requires_grad_getattrra   rD   )r!   r"   r5   r&   r   r`   tpes          r$   r6   zYOLOEPETrainer.get_modelr   s     *3 5 5>C3y$y*
	
 
 
 KO!""$]""" 	 JJw

TYw'..0011 &&%%%%BUX&&&$,U[_-@-CA-F$G$G$V$VW[$\$\BAq!$,U[_-@-CA-F$G$G$V$VW[$\$\BAq!$,U[_-@-CA-F$G$G$V$VW[$\$\BAq!5;r?M488D08R9LQ9OPQ9R0S0S0b0bcg0h0hEKO'*1-08R9LQ9OPQ9R0S0S0b0bcg0h0hEKO'*1-08R9LQ9OPQ9R0S0S0b0bcg0h0hEKO'*1-r%   rT   rU   )rW   rX   rY   rZ   r6    r%   r$   r^   r^   h   s7         , , , , , , ,r%   r^   c                  $    e Zd ZdZddd
ZddZdS )YOLOETrainerFromScratcha  Train YOLOE models from scratch with text embedding support.

    This trainer combines YOLOE training capabilities with world training features, enabling training from scratch with
    text embeddings and grounding datasets.

    Methods:
        build_dataset: Build datasets for training with grounding support.
        generate_text_embeddings: Generate and cache text embeddings for training.
    rD   NrE   list[str] | strrG   rF   rH   rI   c                0    t          j        | |||          S )a  Build YOLO Dataset for training or validation.

        This method constructs appropriate datasets based on the mode and input paths, handling both standard YOLO
        datasets and grounding datasets with different formats.

        Args:
            img_path (list[str] | str): Path to the folder containing images or list of paths.
            mode (str): 'train' mode or 'val' mode, allowing customized augmentations for each mode.
            batch (int, optional): Size of batches, used for rectangular training/validation.

        Returns:
            (YOLOConcatDataset | Dataset): The constructed dataset for training or validation.
        )r   rS   )r!   rE   rG   rH   s       r$   rS   z%YOLOETrainerFromScratch.build_dataset   s     '4T8T5QQQr%   texts	list[str]rQ   	cache_dirr   c                   t          | j                  j        }|d|                    dd                              dd           dz  }|                                rgt          j        d| d           t          j        || j	                  }t          |                                          t          |          k    r|S t          j        d	| d           t          | j                                      ||d
d          }t          t          ||                    d                              }t          j        ||           |S )ab  Generate text embeddings for a list of text samples.

        Args:
            texts (list[str]): List of text samples to encode.
            batch (int): Batch size for processing.
            cache_dir (Path): Directory to save/load cached embeddings.

        Returns:
            (dict): Dictionary mapping text samples to their embeddings.
        text_embeddings_:_/z.ptzReading existed cache from '')map_locationzCaching text embeddings to 'TF)without_reprtacache_clip_modelr   )r   r   
text_modelreplaceexistsr   infotorchr4   devicesortedkeysrf   r1   zipsqueezesave)r!   rs   rH   ru   r   
cache_pathtxt_map	txt_featss           r$   generate_text_embeddingsz0YOLOETrainerFromScratch.generate_text_embeddings   s8    TZ((3!bEMM#s4K4K4S4STWY\4]4]!b!b!bb
 	KDzDDDEEEj$+FFFGgllnn%%66@:@@@AAA ,,88VZmr8ss	s5)"3"3A"6"67788
7J'''r%   rV   rE   rq   rG   rF   rH   rI   )rs   rt   rH   rQ   ru   r   )rW   rX   rY   rZ   rS   r   rn   r%   r$   rp   rp      sP         R R R R R      r%   rp   c                  &    e Zd ZdZd Zd ZddZdS )	YOLOEPEFreeTrainera  Train prompt-free YOLOE model.

    This trainer combines linear probing capabilities with from-scratch training for prompt-free YOLOE models that don't
    require text prompts during inference.

    Methods:
        get_validator: Return standard DetectionValidator for validation.
        preprocess_batch: Preprocess batches without text features.
        set_text_embeddings: Set text embeddings for datasets (no-op for prompt-free).
    c                x    d| _         t          | j        | j        t	          | j                  | j                  S )z6Return a DetectionValidator for YOLO model validation.r8   r<   )r?   r   r@   r=   r   r>   rA   rB   s    r$   rC   z YOLOEPEFreeTrainer.get_validator   s:    -!t}4	??W[We
 
 
 	
r%   c                ,    t          j        | |          S )z_Preprocess a batch of images for YOLOE training, adjusting formatting and dimensions as needed.)r
   preprocess_batch)r!   rH   s     r$   r   z#YOLOEPEFreeTrainer.preprocess_batch   s    0u===r%   rH   rQ   c                    dS )a  No-op override for prompt-free training that does not require text embeddings.

        Args:
            datasets (list[Dataset]): List of datasets containing category names to process.
            batch (int): Batch size for processing text embeddings.
        Nrn   )r!   datasetsrH   s      r$   set_text_embeddingsz&YOLOEPEFreeTrainer.set_text_embeddings   s	     	r%   N)rH   rQ   )rW   rX   rY   rZ   rC   r   r   rn   r%   r$   r   r      sP        	 	
 
 
> > >     r%   r   c                  0     e Zd ZdZdd fd
Z fdZ xZS )YOLOEVPTrainera4  Train YOLOE model with visual prompts.

    This trainer extends YOLOETrainerFromScratch to support visual prompt-based training, where visual cues are provided
    alongside images to guide the detection process.

    Methods:
        build_dataset: Build dataset with visual prompt loading transforms.
    rD   NrE   rq   rG   rF   rH   rI   c                &   t                                          |||          }t          |t                    r1|j        D ](}|j                            t                                 )n&|j                            t                                 |S )a$  Build YOLO Dataset for training or validation with visual prompts.

        Args:
            img_path (list[str] | str): Path to the folder containing images or list of paths.
            mode (str): 'train' mode or 'val' mode, allowing customized augmentations for each mode.
            batch (int, optional): Size of batches, used for rectangular training/validation.

        Returns:
            (YOLOConcatDataset | Dataset): YOLO dataset configured for training or validation, with visual prompts for
                training mode.
        )r   rS   r0   r   r   
transformsappendr	   )r!   rE   rG   rH   datasetdr#   s         r$   rS   zYOLOEVPTrainer.build_dataset   s     ''''$>>g011 	:% 8 8##$4$6$677778 %%&6&8&8999r%   c                ^   t                                                       t          | j        j        t
                    r<| j        j        j        D ](}|j                            t                                 )dS | j        j        j                            t                                 dS )zPClose mosaic augmentation and add visual prompt loading to the training dataset.N)
r   _close_dataloader_mosaicr0   train_loaderr   r   r   r   r   r	   )r!   r   r#   s     r$   r   z'YOLOEVPTrainer._close_dataloader_mosaic  s    ((***d'/1BCC 	L&.7 8 8##$4$6$677778 8 %0778H8J8JKKKKKr%   rV   r   )rW   rX   rY   rZ   rS   r   r[   r\   s   @r$   r   r      sn               (L L L L L L L L Lr%   r   ) 
__future__r   r   r   pathlibr   r   ultralytics.datar   r   ultralytics.data.augmentr	   ultralytics.models.yolo.detectr
   r   ultralytics.nn.tasksr   ultralytics.utilsr   r   r   ultralytics.utils.torch_utilsr   world.train_worldr   rL   r   r   r^   rp   r   r   rn   r%   r$   <module>r      s   # " " " " "                B B B B B B B B 5 5 5 5 5 5 O O O O O O O O + + + + + + 7 7 7 7 7 7 7 7 7 7 6 6 6 6 6 6 7 7 7 7 7 7 % % % % % %P
 P
 P
 P
 P
# P
 P
 P
f6 6 6 6 6% 6 6 6r1 1 1 1 1l,C 1 1 1h    )@   B%L %L %L %L %L, %L %L %L %L %Lr%   