Source code for lours.dataset.indexing

"""Module dedicated to Dataset indexers, to be able to index Dataset with pandas style
loc and iloc methods
"""

from typing import Any, Generic, Literal, TypeVar

import pandas as pd

from lours.dataset import Dataset

D = TypeVar("D", bound=Dataset)


[docs] class DatasetImLocator(Generic[D]): """Locator class dedicated to index a dataset by its images as if we used pandas indexing methods on ``dataset.images`` and filtered annotations accordingly. Usually used in the context of :meth:`.Dataset.loc` and :meth:`.Dataset.iloc` """ def __init__(self, dataset: D, mode: Literal["loc", "iloc"] = "loc") -> None: """Constructor Args: dataset: Dataset object to index mode: whether to use ``dataset.images.loc`` or ``dataset.images.iloc``. Defaults to "loc" """ self.dataset = dataset self.mode = mode def __getitem__(self, index: Any) -> D: """Index the dataset with an index applied on ``dataset.images`` Args: index: index object expected to be compatible with ``dataset.images.loc`` or ``dataset.images.iloc``. Returns: Indexed sub-dataset """ if self.mode == "loc": new_images = self.dataset.images.loc[index] else: new_images = self.dataset.images.iloc[index] if isinstance(new_images, pd.Series): # The Series object indicates only one row. Convert it back to a frame new_images = new_images.to_frame().T.astype(self.dataset.images.dtypes) new_annotations = self.dataset.annotations.loc[ self.dataset.annotations["image_id"].isin(new_images.index) ] return self.dataset.from_template( images=new_images, annotations=new_annotations, reset_booleanized=False, )
[docs] class DatasetAnnotLocator(Generic[D]): """Locator class dedicated to index a dataset by its annotations as if we used pandas indexing methods on ``dataset.annotations`` and potentially filtered empty images accordingly. Usually used in the context of :meth:`.Dataset.loc_annot` and :meth:`.Dataset.iloc_annot` """ def __init__( self, dataset: D, mode: Literal["loc", "iloc"] = "loc", remove_emptied_images: bool = False, ) -> None: """Constructor Args: dataset: Dataset object to index mode: whether to use ``dataset.annotations.loc`` or ``dataset.annotations.iloc``. Defaults to "loc" remove_emptied_images: If se to True, will remove images that no longer have annotations, but will keep images that were already empty before. Defaults to False """ self.dataset = dataset self.mode = mode self.remove_emptied_images = remove_emptied_images def __getitem__(self, index: Any) -> D: """Index the dataset with an index applied on ``dataset.annotations`` Args: index: index object expected to be compatible with ``dataset.annotations.loc`` or ``dataset.annotations.iloc``. Returns: Indexed sub-dataset """ if self.mode == "loc": new_annotations = self.dataset.annotations.loc[index] else: new_annotations = self.dataset.annotations.iloc[index] if isinstance(new_annotations, pd.Series): new_annotations = new_annotations.to_frame().T.astype( self.dataset.annotations.dtypes ) if self.remove_emptied_images: already_empty_images = ~self.dataset.images.index.isin( self.dataset.annotations["image_id"] ) already_empty_images_ids = self.dataset.images.index[ already_empty_images ].tolist() remaining_images = new_annotations["image_id"].unique().tolist() new_images = self.dataset.images.loc[ [ *already_empty_images_ids, *remaining_images, ] ] else: new_images = self.dataset.images return self.dataset.from_template( images=new_images, annotations=new_annotations, reset_booleanized=False, )