Source code for lours.dataset.io.pascalvoc

from collections.abc import Iterable
from itertools import combinations
from pathlib import Path

import pandas as pd
import xmltodict
from tqdm import tqdm

from lours.dataset import Dataset
from lours.utils.bbox_converter import import_bbox



[docs]
def from_pascalVOC_generic(
    annotations_root: Path,
    images_root: Path | str | None = None,
    split_folder: Path | str | None = None,
    split_values: Iterable[str] | str = ("train", "val"),
) -> Dataset:
    """Load a dataset in pascalVOC format

    See `specifications`__ (only Object detection)

        .. __: http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/devkit_doc.html

    See Also:
        :func:`.from_pascalVOC_detection`

    Notes:
        - This has been tested against PascalVOC2012
        - If loading official detection splits, not all images will be assigned a split
          value.
        - For objects with "parts" (like hands for persons), a new object will be
          created and a ``body_id`` column will link to the corresponding root object.
        - ``actions.<value>`` columns are converted to boolean and included in the
          booleanized column ``actions``.
        - ``difficult``, ``truncated`` and ``occluded`` columns are converted to
          boolean.

    Args:
        annotations_root: Folder containing the xml files containing annotations
        images_root: Folder containing the image files. Path to images are given in the
            corresponding annotation files, relative to this folder. If set to None,
            will be assumed to be the same as ``annotations_root``. Defaults to None.
        split_folder: Folder containing txt file for each split. Files are named
            ``<split>.txt`` and contain all the file name without extension contained
            in that split. If set to None, will be assumed to be the same as
            ``annotations_root``. Defaults to None.
        split_values: split value or list of split values to read. Following the
            aforementioned syntax, will try to open the corresponding split files and
            assign this split value to the corresponding images. Note that split values
            need to be exclusive to each other. For example, you cannot load both
            "train" and "trainval" splits. Defaults to ("train", "val").

    Returns:
        Loaded dataset with split values assigned
    """
    if images_root is None:
        images_root = annotations_root
    if split_folder is None:
        split_folder = annotations_root
    annotations_root = Path(annotations_root)
    images_root = Path(images_root)
    split_folder = Path(split_folder)
    if isinstance(split_values, str):
        split_values = [split_values]
    xml_files = list(annotations_root.glob("**/*.xml"))

    def image_set(split: str) -> set[str]:
        try:
            with open(split_folder / f"{split}.txt") as f:
                split_images = f.read().strip().split("\n")
            return set(split_images)
        except FileNotFoundError:
            return set()

    split_files: dict[str, set[str]] = {}
    for value in split_values:
        split_files[value] = image_set(value)

    for s1, s2 in combinations(split_files.keys(), 2):
        overlap = split_files[s1].intersection(split_files[s2])
        assert not overlap, f"Splits {s1} and {s2} have non null overlap : {overlap}"

    reversed_splits = {}
    for split_name, split_set in split_files.items():
        for file_name in split_set:
            reversed_splits[file_name] = split_name
    annotations_dicts = []

    image_data_list = {}

    current_object_id = 0

    for image_id, xml_file in enumerate(tqdm(xml_files)):
        annotation_dict = xmltodict.parse(xml_file.read_text())["annotation"]

        image_data = {}
        image_data["relative_path"] = annotation_dict["filename"]
        image_data["width"] = int(annotation_dict["size"]["width"])
        image_data["height"] = int(annotation_dict["size"]["height"])
        image_data["split"] = reversed_splits.get(xml_file.stem, None)

        image_data_list[image_id] = image_data

        objects = annotation_dict["object"]
        if isinstance(objects, dict):
            objects = [objects]
        object_parts = []
        for object in objects:
            object["id"] = current_object_id
            object["image_id"] = image_id
            if "part" in object:
                parts = object.pop("part")
                if isinstance(parts, dict):
                    parts = [parts]
                for p in parts:
                    p["body_id"] = current_object_id
                object_parts.extend(parts)
            if "point" in object:
                point = object.pop("point")
                x, y = point["x"], point["y"]
                object_parts.append(
                    {
                        "bndbox": {"xmax": x, "xmin": x, "ymax": y, "ymin": y},
                        "image_id": image_id,
                        "body_id": current_object_id,
                        "name": "person of interest",
                    }
                )
            current_object_id += 1
        for part in object_parts:
            part["id"] = current_object_id
            part["image_id"] = image_id
            current_object_id += 1
        annotations_dicts.extend(objects)
        annotations_dicts.extend(object_parts)
    images_df = pd.DataFrame.from_dict(image_data_list, orient="index")
    images_df.index = images_df.index.rename("id")
    annotations_df = pd.json_normalize(annotations_dicts).set_index("id")

    annotations_df = annotations_df.astype(
        {"body_id": pd.Int64Dtype()}, errors="ignore"
    )
    action_columns = [
        name for name in annotations_df.columns if name.startswith("actions.")
    ]
    annotations_df[action_columns] = (
        annotations_df[action_columns]
        .replace({"0": False, "1": True})
        .fillna(False)
        .astype(bool)
    )

    to_boolean = list(
        {"difficult", "occluded", "truncated"}.intersection(set(annotations_df.columns))
    )
    annotations_df[to_boolean] = (
        annotations_df[to_boolean]
        .replace(
            {
                "0": False,
                "1": True,
            }
        )
        .astype(pd.BooleanDtype)
    )

    bounding_boxes = import_bbox(
        annotations_df[["bndbox.xmin", "bndbox.xmax", "bndbox.ymin", "bndbox.ymax"]]
        .astype(float)
        .to_numpy(),
        images_df,
        image_ids=annotations_df["image_id"],
        input_format="XXYY",
    )

    annotations_df = pd.concat(
        [
            annotations_df.drop(
                [
                    "bndbox.xmin",
                    "bndbox.xmax",
                    "bndbox.ymin",
                    "bndbox.ymax",
                ],
                axis=1,
            ),
            bounding_boxes,
        ],
        axis=1,
    )

    label_map = dict(enumerate(annotations_df["name"].unique()))
    reverse_label_map = {v: k for k, v in label_map.items()}
    annotations_df["category_id"] = annotations_df["name"].replace(reverse_label_map)
    annotations_df = annotations_df.rename(columns={"name": "category_str"})

    dataset = Dataset(
        images_root=images_root,
        images=images_df,
        annotations=annotations_df,
        label_map=label_map,
    )

    dataset.booleanized_columns["annotations"] = {"actions"}

    return dataset




[docs]
def from_pascalVOC_detection(input_folder: Path) -> Dataset:
    """Load a pascalVOC detection dataset that follows the official structure.

    Folder is assumed to contain three sub-folders:

    - "Annotations" containing the annotation xml files
    - "JPEGImages" containing the images files
    - "ImageSets/Main" containing the detection split files

    See `specifications`__

        .. __: http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/devkit_doc.html

    See Also:
        :func:`.from_pascalVOC_generic`

    Notes:
        - This has been tested against PascalVOC2012
        - If loading official detection splits, not all images will be assigned a split
          value.
        - For objects with "parts" (like hands for persons), a new object will be
          created and a ``body_id`` column will link to the corresponding root object.
        - ``actions.<value>`` columns are converted to boolean and included in the
          booleanized column ``actions``.
        - ``difficult``, ``truncated`` and ``occluded`` columns are converted to
          boolean.
        - The dataset will remove images without split. If you wish to load all images
          with available annotation, use :func:`.from_pascalVOC_generic`.

    Args:
        input_folder: Folder containing annotations, images, and split folders.

    Returns:
        Loaded dataset
    """
    annotations_folder = input_folder / "Annotations"
    images_root = input_folder / "JPEGImages"
    split_folder = input_folder / "ImageSets" / "Main"
    pascal_dataset = from_pascalVOC_generic(
        annotations_folder, images_root, split_folder
    )
    # Remove images that neither in the train nor in the valid split
    return pascal_dataset.loc[~pascal_dataset.images["split"].isna()]