Source code for lours.evaluation.evaluator

import warnings
from pathlib import Path

import pandas as pd
from typing_extensions import Self

from ..dataset import Dataset
from ..utils.label_map_merger import merge_label_maps
from ..utils.parquet_saver import dict_from_parquet, dict_to_parquet
from ..utils.testing import assert_frame_intersections_equal



[docs]
class Evaluator:
    """Abstract class of Evaluator, made to measure prediction quality with respect to a
    Dataset of groundtruth annotations. Depending on data type, the method used for
    evaluation might differ, refer to the specialized classes for information. The
    fundamental building block is the Dataset object representing the groundtruth.
    additional kwargs given to the constructor are also Dataset objects that must match
    the groundtruth, in terms of image and label maps (if any)
    """

    name: str | None
    """Name of Evaluator. Can be deduced from groundtruth's dataset name and will be
    used in export functions like :meth:`.DetectionEvaluator.to_fiftyone`"""

    groundtruth: pd.DataFrame
    """DataFrame comprising annotation data. Must have at least ``image_id`` column"""

    predictions_dictionary: dict[str, pd.DataFrame]
    """dictionary of DataFrames comprising prediction data. Must have at least
    ``image_id`` and ``confidence`` columns"""

    images: pd.DataFrame
    """DataFrame comprising image data. This dataframe should be referred to by both gt
    and predictions with the ``image_id`` column"""

    images_root: Path
    """Root folder where to grab images. Image filepath will be concatenation of
    images_root and their relative path"""

    label_map: dict[int, str]
    """Mapping from category_id to category_str. If used, is generally taken from the
    groundtruth Dataset. The prediction must be compatible with it"""

    def __init__(
        self,
        groundtruth: Dataset,
        name: str | None = None,
        **predictions: Dataset,
    ):
        """Constructor of the Evaluator object.

        Args:
            groundtruth: Dataset object representing the ground truth with
                annotations, image data and label_map
            name: Name of Evaluator. If set to None, will be deduced from groundtruth's
                dataset name
            **predictions: keyword arguments for additional datasets to compare the
                groundtruth to. Its images must match the groundtruth dataset (see
                add_prediction_dataset method below).
        """
        if name is None:
            self.name = groundtruth.dataset_name
        else:
            self.name = name
        self.images_root = groundtruth.images_root
        self.groundtruth = groundtruth.annotations
        self.images = groundtruth.images.drop("split", axis=1, errors="ignore")
        self.label_map = groundtruth.label_map
        self._default_annotation_columns_with_types = (
            groundtruth._default_annotation_columns_with_types
        )
        self._default_image_columns_width_types = (
            groundtruth._default_image_columns_with_types
        )
        self.predictions_dictionary = {}
        for predictions_name, predictions_df in predictions.items():
            self.add_predictions_dataset(predictions_name, predictions_df)


[docs]
    def get_image_attributes(self) -> list[str]:
        """Get the name of columns related to image attributes. In other words, get
        columns that are NOT the default ones.

        The actual attribute values can then be
        ``self.images[self.get_image_attributes()]``

        Returns:
            list of column names in ``self.images`` that represent attributes
        """
        return [
            str(c)
            for c in self.images.columns
            if c not in self._default_image_columns_width_types.keys()
        ]



[docs]
    def get_annotations_attributes(
        self, predictions_name: str | None = None
    ) -> list[str]:
        """Get the name of columns related to annotations attributes. In other words,
        get columns that are NOT the default ones.

        the actual attribute values can then be

        .. code-block:: python

            self.predictions_dictionary[predictions_name][
                self.get_annotations_attributes()
            ]

        Args:
            predictions_name: name of predictions to extract not default column from.
                If None, will use ``self.groundtruth``. Defaults to None.

        Returns:
            list of column names in ``self.annotations`` that represent attributes
        """
        if predictions_name is None:
            predictions = self.groundtruth
        else:
            predictions = self.predictions_dictionary[predictions_name]
        return [
            str(c)
            for c in predictions.columns
            if c not in self._default_annotation_columns_with_types.keys()
        ]



[docs]
    def add_predictions_dataset(self, predictions_name: str, predictions: Dataset):
        """Method to add predictions to the Evaluator from a Dataset object.
        The prediction dataset must match the Evaluator data:

        - prediction label_map must be equal or a subset to the
            Evaluator's label map
        - image data must be the same, except the relative path
            (it can change although the image has not) i.e. there must be
            the same number and ids of images and all the columns in the prediction
            image data must match the corresponding ones
            in the evaluator image data.

        Note that this method will overwrite a potentially already existing prediction
        dataframe

        Args:
            predictions_name: name of predictions to add. It will then be used as key in
                the ``self.predictions_dictionary`` attribute.
            predictions: prediction Dataset, from which the annotations will
                be extracted and added to the evaluator.
        """
        assert "confidence" in predictions.annotations, "Not a prediction dataset"

        new_label_map = merge_label_maps(
            self.label_map, predictions.label_map, method="outer"
        )
        if new_label_map != self.label_map:
            warnings.warn(
                f"Although compatible, '{predictions_name}' prediction label map is"
                " larger than groundtruth label map",
                RuntimeWarning,
            )
            self.label_map = new_label_map

        if not predictions.images.index.isin(self.images.index).all():
            raise ValueError(
                "Some image ids in given predictions are not present in the evaluator"
                " image index"
            )

        try:
            assert_frame_intersections_equal(self.images, predictions.images)
        except AssertionError as e:
            raise AssertionError(
                "Groundtruth and Prediction images are not consistent on their"
                " overlapping indices and columns. You might want to consider the"
                " Dataset.reindex() method."
            ) from e

        self.add_predictions(predictions_name, predictions.annotations)



[docs]
    def add_predictions(self, predictions_name: str, predictions: pd.DataFrame):
        """Method to add predictions to the Evaluator from a dataframe.
        No check will be done on image data the annotations refer to. However, it will
        check that ``image_id`` values of ``predictions`` are contained in the
        evaluator's ``image_data`` and ``category_id`` values are contained in the
        label map

        Note that this method will overwrite a potentially already existing prediction
        dataframe

        Args:
            predictions_name: name of predictions to add. It will then be used as key in
                the ``self.predictions_dictionary`` attribute.
            predictions: prediction dataframe to be added to
                the evaluator.
        """
        predictions_image_ids = set(predictions["image_id"].unique())
        assert set(self.images.index).issuperset(predictions_image_ids)
        predictions_class_ids = set(predictions["category_id"].unique())
        assert predictions_class_ids.issubset(self.label_map.keys())
        self.predictions_dictionary[predictions_name] = predictions



[docs]
    def to_parquet(self, output_dir: Path | str, overwrite: bool = False) -> None:
        """Save the current object to a folder containing parquet files for dataframes
        inside this object, and a metadata.yaml file for other attributes.

        Args:
            output_dir: output directory where the files will be created.
                If ``overwrite`` is set to False, it must not already exist.
            overwrite: if set to True, will remove the directory at ``output_dir``
                if it already exists. Defaults to False.
        """
        dict_to_parquet(
            {k: v for k, v in vars(self).items() if not k.startswith("_")}
            | {"__name__": self.__class__.__name__},
            Path(output_dir),
            overwrite=overwrite,
        )



[docs]
    @classmethod
    def from_parquet(cls, input_dir: Path | str) -> Self:
        """Class method to construct an instance of this class or a subclass.
        the parquet folder must have been created with the method ``to_parquet``
        (see above)

        Args:
            input_dir: Path to directory containing the metadata.yaml file along with
                the different parquet files

        Raises:
            ValueError: Raised when the object name contained in
                ``input_dict['__name__']`` is not the same as the name of the class this
                method is called from. For example, you can't call
                :meth:`.Evaluator.from_parquet` with a folder created by a
                :class:`DetectionEvaluator` object.

        Returns:
            New object of the same subclass as the method is caled from,
            containing data loaded from the parquet files in the input directory
        """
        input_dict = dict_from_parquet(Path(input_dir))
        if cls.__name__ != input_dict["__name__"]:
            raise ValueError(
                f"Wrong object type for parquet archive. Expected {cls.__name__}, got"
                f" {input_dict['__name__']}"
            )
        groundtruth_dataset = Dataset(
            images_root=input_dict["images_root"],
            images=input_dict["images"].assign(split=None),
            annotations=input_dict["groundtruth"],
            label_map=input_dict["label_map"],
        )
        predictions = input_dict["predictions_dictionary"]
        evaluator = cls(groundtruth_dataset)
        for name, predictions in predictions.items():
            evaluator.add_predictions(name, predictions)
        for k, v in vars(evaluator).items():
            loaded_value = input_dict.get(k, None)
            if loaded_value is not None:
                evaluator.__dict__[k] = loaded_value
        return evaluator


    def _ipython_display_(self):
        """Function to display the Dataset as an HTML widget when using notebooks"""
        import ipywidgets as widgets
        from IPython.display import display

        tab = widgets.Tab()

        descr_str = (
            "<b> Evaluation object, containing "
            f"{len(self.images):,} images, {len(self.groundtruth):,} groundtruth "
            f"objects, and {len(self.predictions_dictionary)} prediction sets </b>"
        )

        title = widgets.HTML(descr_str)

        components_widgets = self._get_widgets()

        tab.children = [*components_widgets.values()]
        tab.titles = [*components_widgets.keys()]

        display(widgets.VBox([title, tab]))

    def _get_widgets(self):
        import ipywidgets as widgets
        from IPython.display import display

        label_map_df = pd.Series(self.label_map, name="category string").to_frame()
        label_map_df.index.name = "categorty_id"

        # create output widgets
        widget_images = widgets.Output()
        widget_groundtruth = widgets.Output()
        widget_predictions = {
            p_name: widgets.Output() for p_name in self.predictions_dictionary
        }
        widget_label_map = widgets.Output()

        # render in output widgets
        with widget_images:
            display(self.images)
        with widget_groundtruth:
            display(self.groundtruth)
        for p_name, p in self.predictions_dictionary.items():
            with widget_predictions[p_name]:
                display(p)
        with widget_label_map:
            display(label_map_df)

        return {
            "Images": widget_images,
            "Groundtruth": widget_groundtruth,
            **widget_predictions,
            "label_map": widget_label_map,
        }