
    /j#                        d dl mZ d dlmZ d dlmZ d dlZd dlmZ d dl	m
Z
mZmZ d dlmZ d dlmZmZ d	Z G d
 de          Z G d de          ZdS )    )annotations)Path)AnyN)YOLODataset)ComposeFormatv8_transforms)DetectionValidator)colorstrops)RTDETRValidatorc                  <     e Zd ZdZdd fd
Zd fd	Zd	dZ xZS )
RTDETRDataseta  Real-Time DEtection and TRacking (RT-DETR) dataset class extending the base YOLODataset class.

    This specialized dataset class is designed for use with the RT-DETR object detection model and is optimized for
    real-time detection and tracking tasks.

    Attributes:
        augment (bool): Whether to apply data augmentation.
        rect (bool): Whether to use rectangular training.
        use_segments (bool): Whether to use segmentation masks.
        use_keypoints (bool): Whether to use keypoint annotations.
        imgsz (int): Target image size for training.

    Methods:
        load_image: Load one image from dataset index.
        build_transforms: Build transformation pipeline for the dataset.

    Examples:
        Initialize an RT-DETR dataset
        >>> dataset = RTDETRDataset(img_path="path/to/images", imgsz=640)
        >>> image, hw0, hw = dataset.load_image(0)
    N)datac               >     t                      j        |d|i| dS )aL  Initialize the RTDETRDataset class by inheriting from the YOLODataset class.

        This constructor sets up a dataset specifically optimized for the RT-DETR (Real-Time DEtection and TRacking)
        model, building upon the base YOLODataset functionality.

        Args:
            *args (Any): Variable length argument list passed to the parent YOLODataset class.
            data (dict | None): Dictionary containing dataset information. If None, default values will be used.
            **kwargs (Any): Additional keyword arguments passed to the parent YOLODataset class.
        r   N)super__init__)selfr   argskwargs	__class__s       b/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/ultralytics/models/rtdetr/val.pyr   zRTDETRDataset.__init__)   s,     	$4T4V44444    Fc                J    t                                          ||          S )a  Load one image from dataset index 'i'.

        Args:
            i (int): Index of the image to load.
            rect_mode (bool, optional): Whether to use rectangular mode for batch inference.

        Returns:
            im (np.ndarray): Loaded image as a NumPy array.
            hw_original (tuple[int, int]): Original image dimensions in (height, width) format.
            hw_resized (tuple[int, int]): Resized image dimensions in (height, width) format.

        Examples:
            Load an image from the dataset
            >>> dataset = RTDETRDataset(img_path="path/to/images")
            >>> image, hw0, hw = dataset.load_image(0)
        )i	rect_mode)r   
load_image)r   r   r   r   s      r   r   zRTDETRDataset.load_image6   s"    " ww!!A!;;;r   c                   | j         rm| j         r| j        s|j        nd|_        | j         r| j        s|j        nd|_        | j         r| j        s|j        nd|_        t          | | j        |d          }nt          g           }|                    t          dd| j
        | j        d|j        |j                             |S )zBuild transformation pipeline for the dataset.

        Args:
            hyp (dict, optional): Hyperparameters for transformations.

        Returns:
            (Compose): Composition of transformation functions.
        g        T)stretchxywh)bbox_format	normalizereturn_maskreturn_keypoint	batch_idx
mask_ratiomask_overlap)augmentrectmosaicmixupcutmixr	   imgszr   appendr   use_segmentsuse_keypointsr&   overlap_mask)r   hyp
transformss      r   build_transformszRTDETRDataset.build_transformsI   s     < 	%'+|NDIN3CJ%)\L$)L		CI'+|NDIN3CJ&tTZdKKKJJ !J" - $ 2> -  
	
 
	
 
	
 r   )F)N)__name__
__module____qualname____doc__r   r   r4   __classcell__)r   s   @r   r   r      s         , $( 5 5 5 5 5 5 5< < < < < <&       r   r   c                  2    e Zd ZdZddZdd
ZddZddZdS )r   a  RTDETRValidator extends the DetectionValidator class to provide validation capabilities specifically tailored for
    the RT-DETR (Real-Time DETR) object detection model.

    The class allows building of an RTDETR-specific dataset for validation, applies confidence thresholding for
    post-processing, and updates evaluation metrics accordingly.

    Attributes:
        args (Namespace): Configuration arguments for validation.
        data (dict): Dataset configuration dictionary.

    Methods:
        build_dataset: Build an RTDETR Dataset for validation.
        postprocess: Apply confidence thresholding to prediction outputs.

    Examples:
        Initialize and run RT-DETR validation
        >>> from ultralytics.models.rtdetr import RTDETRValidator
        >>> args = dict(model="rtdetr-l.pt", data="coco8.yaml")
        >>> validator = RTDETRValidator(args=args)
        >>> validator()

    Notes:
        For further details on the attributes and methods, refer to the parent DetectionValidator class.
    valNc                    t          || j        j        |d| j        d| j        j        pdt	          | d          | j        	  	        S )a  Build an RTDETR Dataset.

        Args:
            img_path (str): Path to the folder containing images.
            mode (str, optional): `train` mode or `val` mode, users are able to customize different augmentations for
                each mode.
            batch (int, optional): Size of batches, this is for `rect`.

        Returns:
            (RTDETRDataset): Dataset configured for RT-DETR validation.
        FNz: )	img_pathr-   
batch_sizer(   r2   r)   cacheprefixr   )r   r   r-   r?   r   r   )r   r=   modebatchs       r   build_datasetzRTDETRValidator.build_dataset   sU     )/	)/)TtKKK((

 

 

 
	
r   predndict[str, torch.Tensor]pbatchdict[str, Any]returnc                    |S )zJReturn predictions unchanged as RT-DETR handles scaling in postprocessing. )r   rD   rF   s      r   scale_predszRTDETRValidator.scale_preds   s    r   preds7torch.Tensor | list[torch.Tensor] | tuple[torch.Tensor]list[dict[str, torch.Tensor]]c                d   t          |t          t          f          s|dg}|d         j        \  }}}|d                             d|dz
  fd          \  }}|| j        j        z  }t          j        d|j	                  g|z  }t          |          D ]\  }}	t          j        |	          }	||                             d          \  }
}t          j        |	|
d         |d         gd          }||
                    d	
                   }||
| j        j        k             ||<   d |D             S )a  Apply confidence thresholding to prediction outputs.

        Args:
            preds (torch.Tensor | list | tuple): Raw predictions from the model. If tensor, should have shape
                (batch_size, num_predictions, num_classes + 4) where last dimension contains bbox coords and
                class scores.

        Returns:
            (list[dict[str, torch.Tensor]]): List of dictionaries for each image, each containing:
                - 'bboxes': Tensor of shape (N, 4) with bounding box coordinates
                - 'conf': Tensor of shape (N,) with confidence scores
                - 'cls': Tensor of shape (N,) with class indices
        Nr      )dim)r      )device).NT)
descendingc                Z    g | ](}|d d d df         |d d df         |d d df         d)S )NrP      )bboxesconfclsrJ   .0xs     r   
<listcomp>z/RTDETRValidator.postprocess.<locals>.<listcomp>   sJ    WWW!1QQQU8Qqqq!tWQqqq!tWEEWWWr   )
isinstancelisttupleshapesplitr   r-   torchzerosrT   	enumerater   	xywh2xyxymaxcatargsortrY   )r   rL   bs_ndrX   scoresoutputsr   bboxscorerZ   preds                r   postprocesszRTDETRValidator.postprocess   s1     %$// 	"DME!HN	ArqBF<<$)/!;vfm<<<=B (( 	6 	6GAt=&&Dr**JE39dE)$4c)nE2NNND667Dedin45GAJJWWwWWWWr   Nonec                @   t          |d                   }|j        }|                                rt          |          n|}|d                                         }|dddgfxx         |d         d         | j        j        z  z  cc<   |dddgfxx         |d         d         | j        j        z  z  cc<   t          j        |          }|d	d	d	dfxx         |d	d	dd	f         dz  z  cc<   t          |
                                |d
         
                                |d         
                                          D ][\  }}}	| j                            ||j        | j        t          |	                   d |D             t          |d          d           \d	S )a  Serialize YOLO predictions to COCO json format.

        Args:
            predn (dict[str, torch.Tensor]): Predictions dictionary containing 'bboxes', 'conf', and 'cls' keys with
                bounding box coordinates, confidence scores, and class predictions.
            pbatch (dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
        im_filerX   .r      	ori_shape      NrY   rZ   c                .    g | ]}t          |d           S )rz   )roundr[   s     r   r^   z0RTDETRValidator.pred_to_json.<locals>.<listcomp>   s     444QU1a[[444r   rW   )image_id	file_namecategory_idrp   rq   )r   stem	isnumericintcloner   r-   r   	xyxy2xywhziptolistjdictr.   name	class_mapr|   )
r   rD   rF   pathr   r}   boxbscs
             r   pred_to_jsonzRTDETRValidator.pred_to_json   s    F9%&&y $ 0 0:3t999dHo##%%C!QKF;/2TY_DDC!QKF;/2TY_DDmC  AAArrE


c!!!QRR%j1n$


3::<<v)=)=)?)?uATATAVAVWW 		 		GAq!J (!%#'>#a&&#944!444"1a[[    		 		r   )r;   N)rD   rE   rF   rG   rH   rE   )rL   rM   rH   rN   )rD   rE   rF   rG   rH   rt   )r5   r6   r7   r8   rC   rK   rs   r   rJ   r   r   r   r   h   ss         2
 
 
 
0   X X X XB     r   r   )
__future__r   pathlibr   typingr   rd   ultralytics.datar   ultralytics.data.augmentr   r   r	   ultralytics.models.yolo.detectr
   ultralytics.utilsr   r   __all__r   r   rJ   r   r   <module>r      s   # " " " " "              ( ( ( ( ( ( C C C C C C C C C C = = = = = = + + + + + + + +
S S S S SK S S Slp p p p p( p p p p pr   