
    /j)                     Z    d dl Z d dlmZ d dlmZ d dlmZ d dlmZ  G d de          Z	dS )    N)	LetterBox)BasePredictor)Results)opsc                       e Zd ZdZd Zd ZdS )RTDETRPredictoraH  RT-DETR (Real-Time Detection Transformer) Predictor extending the BasePredictor class for making predictions.

    This class leverages Vision Transformers to provide real-time object detection while maintaining high accuracy. It
    supports key features like efficient hybrid encoding and IoU-aware query selection.

    Attributes:
        imgsz (int): Image size for inference (must be square and scale-filled).
        args (dict): Argument overrides for the predictor.
        model (torch.nn.Module): The loaded RT-DETR model.
        batch (list): Current batch of processed inputs.

    Methods:
        postprocess: Postprocess raw model predictions to generate bounding boxes and confidence scores.
        pre_transform: Pre-transform input images before feeding them into the model for inference.

    Examples:
        >>> from ultralytics.utils import ASSETS
        >>> from ultralytics.models.rtdetr import RTDETRPredictor
        >>> args = dict(model="rtdetr-l.pt", source=ASSETS)
        >>> predictor = RTDETRPredictor(overrides=args)
        >>> predictor.predict_cli()
    c           	      *   t          |t          t          f          s|dg}|d         j        d         }|d                             d|dz
  fd          \  }}t          |t                    st          j        |          ddddf         }g }t          |||| j        d                   D ]e\  }}	}
}t          j	        |          }|	
                    dd          \  }}|                    d          | j        j        k    }| j        j        ?|t          j        | j        j        |j        	          k                        d
          |z  }t          j        |||gd          |         }||dddf                             d                   d| j        j                 }|
j        dd         \  }}|dddgfxx         |z  cc<   |dd
dgfxx         |z  cc<   |                    t/          |
|| j        j        |                     g|S )a  Postprocess the raw predictions from the model to generate bounding boxes and confidence scores.

        The method filters detections based on confidence and class if specified in `self.args`. It converts model
        predictions to Results objects containing properly scaled bounding boxes.

        Args:
            preds (list | tuple): List of [predictions, extra] from the model, where predictions contain bounding boxes
                and scores.
            img (torch.Tensor): Processed input images with shape (N, 3, H, W).
            orig_imgs (list | torch.Tensor): Original, unprocessed images.

        Returns:
            (list[Results]): A list of Results objects containing the post-processed bounding boxes, confidence scores,
                and class labels.
        Nr      )dim.T)keepdim)device   )
descending      )pathnamesboxes)
isinstancelisttupleshapesplitr   convert_torch2numpy_batchzipbatch	xywh2xyxymaxsqueezeargsconfclassestorchtensorr   anycatargsortmax_detappendr   modelr   )selfpredsimg	orig_imgsndbboxesscoresresultsbboxscoreorig_imgimg_path	max_scoreclsidxpredohows                     f/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/ultralytics/models/rtdetr/predict.pypostprocesszRTDETRPredictor.postprocess#   s&     %$// 	"DME1X^BqBF<<)T** 	L5i@@dddKI/2669djYZm/\/\ 	a 	a+D%8=&&D"YYr4Y88NIs##B''$).8Cy ,el49+<SZPPPPUUVWXX[^^9dIs3<<<SADQQQT
**d*;;<=Pty?P=PQD^BQB'FBq!f#q!f#NN78($*BRZ^___````    c                 N    t          | j        dd          fd|D             S )aj  Pre-transform input images before feeding them into the model for inference.

        The input images are letterboxed to ensure a square aspect ratio and scale-filled.

        Args:
            im (list[np.ndarray]): Input images of shape [(H, W, 3) x N].

        Returns:
            (list): List of pre-transformed images ready for model inference.
        FT)auto
scale_fillc                 (    g | ]} |           S ))image ).0x	letterboxs     r>   
<listcomp>z1RTDETRPredictor.pre_transform.<locals>.<listcomp>W   s&    ///q		"""///r@   )r   imgsz)r,   imrI   s     @r>   pre_transformzRTDETRPredictor.pre_transformK   s5     djuFFF	////B////r@   N)__name__
__module____qualname____doc__r?   rM   rF   r@   r>   r   r      s=         .& & &P0 0 0 0 0r@   r   )
r$   ultralytics.data.augmentr   ultralytics.engine.predictorr   ultralytics.engine.resultsr   ultralytics.utilsr   r   rF   r@   r>   <module>rV      s     . . . . . . 6 6 6 6 6 6 . . . . . . ! ! ! ! ! !L0 L0 L0 L0 L0m L0 L0 L0 L0 L0r@   