Source code for lours.dataset.io.darknet

import json
import shutil
from collections.abc import Iterable, Mapping
from os.path import relpath
from pathlib import Path
from typing import Any

import pandas as pd
import yaml
from imageio.v3 import imread, imwrite
from tqdm.auto import tqdm

from lours.utils.bbox_converter import column_names_from_format_string, export_bbox
from lours.utils.testing import assert_images_valid

from ..dataset import Dataset
from .common import (
    convert_str,
    get_image_info,
    get_images_from_folder,
    get_relative_image_path,
    to_dataset_object,
)

CX_COLUMN, W_COLUMN, CY_COLUMN, H_COLUMN = column_names_from_format_string("cxwcyh")



[docs]
def bbox_to_txt(bbox_df: pd.DataFrame) -> list[str]:
    """Convert dataframe of yolo bboxes to a list of strings to be written in
    a file readable by darknet

    Args:
        bbox_df: bboxes dataframe. Must be in yolo format (see function above)

    Returns:
        list of string describing bboxes according to darknet format
    """
    lines = []
    for _, box in bbox_df.iterrows():
        box_string = (
            f"{box['category_id']} "
            f"{box[CX_COLUMN]} "
            f"{box[CY_COLUMN]} "
            f"{box[W_COLUMN]} "
            f"{box[H_COLUMN]}"
        )
        lines.append(f"{box_string}\n")
    return lines




[docs]
def txt_to_bbox(text_lines: Iterable[str], image_id: int) -> list[dict[str, Any]]:
    """Read lines coming from a darknet txt annotation file and convert it
    to a list of annotation dictionaries

    Args:
        text_lines: list of text lines in a particular folder all given lines are
            assumed to come from the same image
        image_id: image index of corresponding image

    Returns:
        list of annotations for the particular image. bboxes are in the darknet format,
        i.e. with relative coordinates
    """
    bboxes = []
    for line in text_lines:
        cat_id, x, y, width, height, *rest = map(convert_str, line.split())
        bbox = {
            "category_id": cat_id,
            CX_COLUMN: x,
            CY_COLUMN: y,
            W_COLUMN: width,
            H_COLUMN: height,
            "image_id": image_id,
        }
        if rest:
            bbox["confidence"] = rest[0]
        bboxes.append(bbox)
    return bboxes




[docs]
def open_data_file(data_file: Path) -> dict[str, str | int | float]:
    """Open a ``.data`` file typically used in darknet.

    Args:
        data_file: path to ``.data`` file

    Returns:
        dictionary of values contained in the .data file
    """
    data_dict = {}
    with open(data_file) as f:
        lines = f.read().split("\n")
    for line in lines:
        if line:
            try:
                key, value = line.split(" = ")
            except ValueError:
                key = line.strip("=").strip()
                value = ""
            data_dict[key] = convert_str(value)
    return data_dict




[docs]
def write_data_file(data: dict[str, str | int], data_file: Path) -> None:
    """Save a dictionary to a ``.data`` file typically used in darknet.
    Note that only the typical keys expected by darknet are considered,
    i.e. classes, train, valid, names and backup

    Args:
        data: content to save to the ``.data`` file
        data_file: path where to save the given data dictionary
    """
    keys = ["classes", "train", "valid", "names", "backup"]
    with open(data_file, "w") as f:
        for key in keys:
            f.write(f"{key} = {data.get(key, '')}\n")




[docs]
def yolov5_img_path_to_label_path(img_path: Path) -> Path:
    """Convert the path of an image to its corresponding label

    That is, if the path has a folder named "images", rename that folder into "labels"
    and change the image extension to "txt"

    See `Yolov5's image2labels <https://github.com/ultralytics/yolov5/blob/master/utils/dataloaders.py#L424>`__ .

    Args:
        img_path: path to an image file

    Returns:
        corresponding label file as it would have been searched for by yolov5
    """  # noqa: E501
    annotation_path = img_path.with_suffix(".txt")
    folder_parts = list(annotation_path.parts)
    if "images" in folder_parts:
        # In case they are multiple occurrences of images folder, use the last one
        images_folder_index = len(folder_parts) - 1 - folder_parts[::-1].index("images")
        folder_parts[images_folder_index] = "labels"
        annotation_path = Path(*folder_parts)
    return annotation_path




[docs]
def deduce_data_file_and_dataset_folder(
    dataset_path: Path | str | None = None,
    data_file: Path | str | None = None,
    default_data_file: str = "train_job.data",
) -> tuple[Path, Path]:
    """Get dataset path and data file, deduce one from another if needed."""
    if dataset_path is None and data_file is None:
        raise ValueError("dataset_path and data_file cannot be both None !")
    if dataset_path is not None:
        dataset_path = Path(dataset_path)
        if not dataset_path.is_dir():
            raise ValueError("Dataset path must be a path to valid directory")
        if data_file is None:
            data_file = dataset_path / default_data_file
            if not data_file.is_file():
                raise ValueError(f"Data file {data_file} does not exist")
    if data_file is not None:
        data_file = Path(data_file)
        if not data_file.is_file():
            raise ValueError(f"Data file {data_file} does not exist")
        if dataset_path is None:
            dataset_path = data_file.parent

    # Do a final assert, otherwise pyright will complain
    assert dataset_path is not None and data_file is not None
    return dataset_path, data_file




[docs]
def from_darknet(
    dataset_path: Path | str | None = None,
    data_file: Path | str | None = None,
    ids_map: dict[int, dict[str, Any]] | str | Path | None = None,
    image_info: pd.DataFrame | None = None,
) -> Dataset:
    """Creates dataset object from a darknet dataset. Note that category ids and
    image ids are not given in the dataset format and thus can only be sequential
    As such, if we want to convert the dataset back to another format that keeps track
    of image and category ids, we need to give image_info and class mapping from
    an external source. Here we expect it to be contained in a json label map for
    annotations and a DataFrame with similar columns as in the final dataset's images
    DataFrame for images

    Args:
        dataset_path: folder containing the dataset, from which the relative path
            are given. If not set, will use ``data_file``'s parent directory.
            Defaults to None
        data_file: data file containing info about names, lists of train and
            validation images. Can be either a .data file or a .yml file (for yolov5).
            If not set, will use the file ``train_job.data`` at the root of
            ``dataset_path``. Defaults to None.
        ids_map: Optional dictionary containing the id_remapping that was initially
            applied to create the darknet dataset. Will reverse it to get back to the
            original class mapping. The dictionary must have darknet dataset's category
            ids (in sequential order then) as keys and with corresponding values that
            are dictionaries containing ``name`` and ``id`` keys relative to this
            class. Note that this can also be a path to a json file containing the
            dictionary. Defaults to None.
        image_info: Optional DataFrame containing image information. Must contain at
            least the following columns : ``relative_path``, ``id``, ``width``,
            ``height``. Defaults to None

    Raises:
        ValueError: Errors when neither ``dataset_path`` nor ``data_file`` is specified

    Returns:
        Loaded dataset object
    """
    dataset_path, data_file = deduce_data_file_and_dataset_folder(
        dataset_path, data_file
    )

    if data_file.suffix in [".yml", ".yaml"]:
        return from_darknet_yolov5(
            dataset_path, data_file, ids_map=ids_map, image_info=image_info
        )

    if isinstance(ids_map, Path | str):
        with open(ids_map) as f:
            ids_map = json.load(f)
        assert isinstance(ids_map, dict)

    data_dict = open_data_file(data_file)
    train_file = data_dict["train"]
    valid_file = data_dict["valid"]
    names_file = data_dict["names"]
    assert isinstance(train_file, str)
    assert isinstance(valid_file, str)
    assert isinstance(names_file, str)
    # Filter is here to dismiss blank lines, especially at the end of the file
    with open(dataset_path / names_file) as f:
        name_list = filter(bool, f.read().split("\n"))
        dict(enumerate(name_list))

    if train_file:
        with open(dataset_path / train_file) as f:
            train_list = filter(bool, f.read().split("\n"))
    else:
        train_list = []
    with open(dataset_path / valid_file) as f:
        valid_list = filter(bool, f.read().split("\n"))

    dataset = from_darknet_generic(
        dataset_path,
        dataset_path,
        name_list,
        valid_list,
        "valid",
        ids_map=ids_map,
        image_info=image_info,
    )
    if train_list:
        dataset += from_darknet_generic(
            dataset_path,
            dataset_path,
            name_list,
            train_list,
            "train",
            ids_map=ids_map,
            image_info=image_info,
        )
    return dataset




[docs]
def from_darknet_yolov5(
    dataset_path: Path | str | None = None,
    data_file: Path | str | None = None,
    splits: Iterable[str] | None = None,
    split_name_mapping: Mapping[str, str] | None = None,
    ids_map: dict[int, dict[str, Any]] | str | Path | None = None,
    image_info: pd.DataFrame | None = None,
) -> Dataset:
    """Creates dataset object from a darknet dataset. Note that category ids and
    image ids are not given in the dataset format and thus can only be sequential
    As such, if we want to convert the dataset back to another format that keeps track
    of image and category ids, we need to give image_info and class mapping from
    an external source. Here we expect it to be contained in a json label map for
    annotations and a DataFrame with similar columns as in the final dataset's images
    DataFrame for images

    Args:
        dataset_path: folder containing the dataset, from which the relative path
            are given. If not set, will use ``data_file``'s parent directory.
            Defaults to None
        data_file: data file containing info about names, lists of train and
            validation images. Must be a valid path to a Yaml file
            (either .yml or .yaml). If set to None, will use the file
            ``data.yaml`` at the root of ``dataset_path``. Defaults to None.
        splits: name of splits to load. if set to None, will consider every key in
            ``data_file`` that is neither ``names`` nor ``path`` to be a split.
            Every value in said keys must be either a valid folder or a valid text file,
            both relative to ``dataset_path``. Defaults to None.
        split_name_mapping: mapping dict to replace split names to other ones. split
            names not present in mapping will not be modified. If set to None,
            will apply yolov5 conventional mapping, i.e. 'val' -> 'valid' and
            'test' -> 'eval'. Defaults to None
        ids_map: Optional dictionary containing the id_remapping that was initially
            applied to create the darknet dataset. Will reverse it to get back to the
            original class mapping. The dictionary must have darknet dataset's category
            ids (in sequential order then) as keys and with corresponding values that
            are dictionaries containing ``name`` and ``id`` keys relative to this
            class. Note that this can also be a path to a json file containing the
            dictionary. Defaults to None.
        image_info: Optional DataFrame containing image information. Must contain at
            least the following columns : ``relative_path``, ``id``, ``width``,
            ``height``. Defaults to None

    Raises:
        ValueError: Errors when neither ``dataset_path`` nor ``data_file`` is specified

    Returns:
        Loaded dataset object
    """

    def get_folders_and_file(
        root_path: Path, data_path: str
    ) -> tuple[Path, list[str] | None]:
        full_path = root_path / data_path
        if full_path.is_dir():
            return full_path, None
        elif full_path.is_file():
            with open(full_path) as f:
                file_list = list(filter(bool, f.read().split("\n")))
            return root_path, file_list
        else:
            raise ValueError(f"Error, {full_path} is neither a folder nor a text file")

    if split_name_mapping is None:
        split_name_mapping = {"val": "valid", "test": "eval"}
    dataset_path, data_file = deduce_data_file_and_dataset_folder(
        dataset_path, data_file, default_data_file="data.yaml"
    )
    assert data_file.suffix in [".yml", ".yaml"]

    with open(data_file) as f:
        data_dict = yaml.safe_load(f)
    dataset_root = Path(data_dict.get("path", ""))
    if not dataset_root.is_absolute():
        dataset_root = dataset_path / dataset_root

    split_datasets = []
    if splits is None:
        splits = [
            name
            for name, value in data_dict.items()
            if name not in ["path", "names"] and isinstance(value, str)
        ]

    if not splits:
        raise ValueError("Not split was found or specified")

    for split_name in splits:
        split_path = data_dict[split_name]
        split_folder, split_files = get_folders_and_file(dataset_root, split_path)
        split_dataset = from_darknet_generic(
            split_folder,
            split_folder,
            data_dict["names"],
            split_files,
            split_name_mapping.get(split_name, split_name),
            ids_map,
            image_info,
        )
        split_datasets.append(split_dataset)
    return sum(split_datasets)  # pyright: ignore




[docs]
def from_darknet_generic(
    images_root: Path | str,
    labels_root: Path | str,
    names: Iterable[str],
    image_files_list: Iterable[str | Path] | None = None,
    split: str | None = None,
    ids_map: dict[int, dict[str, Any]] | str | Path | None = None,
    image_info: pd.DataFrame | None = None,
) -> Dataset:
    """Generic function to load a darknet like dataset by only giving it folders,
    class names and optionally file list instead of a data file.

    Note:
        Unlike the darknet and yolov5 loaders, this function does only accept a single
        split value.

    Note:
        If no file list is given, the function will simply glob all image files in
        ``images_root`` folder.

    Args:
        images_root: Folder containing the image files to load
        labels_root: Folder containing the txt annotations files. Each annotation file
            is obtained by using the same relative path as the image equivalent and
            replacing the extension by txt. If no such file exist, it will not error
            but will assume there is no annotation
        names: list of classes used in the label map when saving this darknet dataset.
        image_files_list: list of images to read. Their path must be relative to
            ``images_root``. If set to None, will glob all files in ``images_root``
        split: Split name of the constructed dataset. Defaults to None.
        ids_map: Optional dictionary containing the id_remapping that was initially
            applied to create the darknet dataset. Will reverse it to get back to the
            original class mapping. The dictionary must have darknet dataset's category
            ids (in sequential order then) as keys and with corresponding values that
            are dictionaries containing ``name`` and ``id`` keys relative to this
            class. Note that this can also be a path to a json file containing the
            dictionary. Defaults to None.
        image_info: Optional DataFrame containing image information. Must contain at
            least the following columns : ``relative_path``, ``id``, ``width``,
            ``height``. Defaults to None

    Raises:
        FileNotFoundError: when the annotation file corresponding to a particular image
            file can not be found, be it using usual darknet convention or yolov5 one.

    Returns:
        Loaded dataset object
    """
    images_root = Path(images_root)
    labels_root = Path(labels_root)
    if image_files_list is None:
        image_files_list = get_images_from_folder(images_root)
    else:
        image_files_list = [Path(f) for f in image_files_list]
    images = []
    annotations = []

    if isinstance(ids_map, Path | str):
        with open(ids_map) as f:
            ids_map = json.load(f)
        assert isinstance(ids_map, dict)

    for image_number, image_path in enumerate(tqdm(image_files_list)):
        current_image = get_image_info(
            image_number=image_number,
            relative_path=image_path,
            absolute_path=images_root / image_path,
            image_info=image_info,
        )
        current_image["split"] = split
        images.append(current_image)

        annotation_path_simple = labels_root / image_path.with_suffix(".txt")
        annotation_path_yolov5 = labels_root / yolov5_img_path_to_label_path(image_path)

        if annotation_path_simple.is_file():
            annotation_path = annotation_path_simple
        elif annotation_path_yolov5.is_file():
            annotation_path = annotation_path_yolov5
        else:
            raise FileNotFoundError(
                f"Annotation could not be found. Neither {annotation_path_simple} nor"
                f" {annotation_path_yolov5} are files"
            )
        with open(annotation_path) as f:
            annotations_strings = filter(bool, f.read().split("\n"))
        annotations.extend(txt_to_bbox(annotations_strings, current_image["id"]))

    return to_dataset_object(
        images_root,
        dict(enumerate(names)),
        images,
        annotations,
        "cxwcyh",
        ids_map,
    )




[docs]
def from_darknet_json(
    dataset_path: Path,
    json_path: Path,
    ids_map: dict[int, dict[str, Any]],
    image_info: pd.DataFrame | None,
    split_name: str = "eval",
) -> Dataset:
    """Same as from_darknet, expect the data file replaced with a json file containing
    directly annotations information. This is typically the format of predictions done
    by darknet's detector.

    Args:
        dataset_path: folder containing the dataset, from which
            the relative path are given
        json_path: json file containing a list of predictions as dictionaries. Each
            dictionary will have bbox info as well ad image path, which will be used
            to retrieve the original image id thanks to image_info DataFrame
        ids_map: dictionary containing the id_remapping that was initially applied to
            create the darknet dataset. Will reverse it to get back to the original
            class mapping. The dictionary must have darknet dataset's category ids
            (in sequential order then) as keys and with corresponding values that are
            dictionaries containing ``name`` and ``id`` keys relative to this class.
        image_info: DataFrame containing image information. Must contain at least the
            following columns : ``relative_path``, ``id``, ``width``, ``height``
        split_name: Name of the split that will be assigned to the ``split`` column of
            the resulting dataset's annotation dataframe.

    Returns:
        Loaded dataset object
    """
    images = []
    annotations = []
    with open(json_path) as f:
        json_list = json.load(f)
    for image_number, frame in enumerate(json_list):
        current_image = {}
        image_path = Path(frame["filename"])
        relative_path = get_relative_image_path(
            dataset_path=dataset_path, image_path=image_path
        )
        current_image["relative_path"] = relative_path
        current_image["type"] = image_path.suffix
        current_image["split"] = split_name
        absolute_path = dataset_path / relative_path
        current_image.update(
            **get_image_info(
                image_number=image_number,
                relative_path=relative_path,
                absolute_path=absolute_path,
                image_info=image_info,
            )
        )

        bboxes = []
        for current_object in frame["objects"]:
            coords = current_object["relative_coordinates"]
            bbox = {
                CX_COLUMN: coords["center_x"],
                CY_COLUMN: coords["center_y"],
                W_COLUMN: coords["width"],
                H_COLUMN: coords["height"],
                "category_id": current_object["class_id"],
                "category_str": current_object["name"],
                "confidence": current_object["confidence"],
                "image_id": current_image["id"],
            }
            bboxes.append(bbox)
        annotations.extend(bboxes)
        images.append(current_image)

    dataset = to_dataset_object(
        dataset_path,
        None,
        images,
        annotations,
        "cxwcyh",
        ids_map,
    )

    if image_info is not None:
        images_without_box = image_info[~image_info.index.isin(dataset.images.index)]
        dataset.images = pd.concat([image_info, images_without_box])
    dataset.images["split"] = split_name

    return dataset




[docs]
def dataset_to_darknet(
    dataset: Dataset,
    output_path: Path | str,
    copy_images: bool = False,
    overwrite_images: bool = True,
    overwrite_labels: bool = True,
    yolo_version: int = 1,
    data_yaml_name: str = "data.yaml",
    split_name_mapping: dict[str, str] | None = None,
    create_split_folder: bool = False,
) -> None:
    """Save given dataset to darknet. Will recreate folder structure
    of dataset_path + image relative path. Will also create metadata files needed
    by darknet, i.e. names file, json label maps, list of train, validation and
    evaluation images (depending on available splits) and train and eval data files

    Args:
        dataset: dataset object to save as darknet
        output_path: root folder where images, corresponding text files
            describing bboxes will be saved along metadata files described above
        copy_images: If set to False, will create a symbolic link instead of copying.
            Much faster, but needs to keep original images in the same relative path.
            Defaults to False.
        overwrite_images: if set to False, will skip images that are already copied.
            Defaults to True.
        overwrite_labels: if set to False, will skip annotation that are already
            created. Defaults to True.
        yolo_version: version number for yolo. Officially supported versions are
            ``1`` to ``4`` (regular darknet), ``5`` (Ultralytics), ``7``
            (Chien-Yao Wang). Versions ``6`` and ``8`` are NOT officially supported, and
            will be assumed to all be version ``5``.
        data_yaml_name: if ``yolo_version`` is > 4, will be used as filename to save
            the yaml file at the root of ``output_path``
        split_name_mapping: mapping dict to replace split names to other ones. split
            names not present in mapping will not be modified. If set to None,
            will apply yolov5 conventional mapping,
            i.e. ``{'valid': 'val', 'validation': 'val', 'eval': 'test'}``.
            Defaults to None
        create_split_folder: if set to True, will create a dedicated folder for each
            split and will save images in it. Image paths in {split}.txt will be changed
            accordingly. Note that this changes the dataset structure. Defaults to False
    """
    try:
        assert_images_valid(dataset, load_images=False)
    except AssertionError as e:
        raise ValueError(
            "Dataset images are missing, check that the images root folder is the"
            " right one"
        ) from e
    if split_name_mapping is None:
        split_name_mapping = {"valid": "val", "validation": "val", "eval": "test"}

    output_path = Path(output_path)
    output_path.mkdir(exist_ok=True, parents=True)
    id_remapping = {
        i: {"id": cat_id, "name": cat_name}
        for i, (cat_id, cat_name) in enumerate(dataset.label_map.items())
    }

    # Remap classes so that category ids are now sequential
    sorted_dataset = dataset.remap_classes(
        {value["id"]: i for i, value in id_remapping.items()}
    )
    name_list = [*sorted_dataset.label_map.values()]
    name_list_path = output_path / "classes.names"
    with open(output_path / "classes.names", "w") as f:
        f.writelines(f"{name}\n" for name in name_list)
    with open(output_path / "ids_map.json", "w") as f:
        json.dump(id_remapping, f, indent=2)
    with open(output_path / "label_map.json", "w") as f:
        json.dump(dataset.label_map, f, indent=2)

    valid_splits = ["train", "valid", "eval"]

    if len(dataset.images["split"].isin(valid_splits)) < len(dataset.images):
        print(
            f"Darknet saver only takes these splits : {', '.join(valid_splits)}. "
            "Other values will be ignored"
        )

    remapped_splits = dataset.images["split"].replace(split_name_mapping)
    if create_split_folder:
        output_relative_path = pd.concat(
            [dataset.images["relative_path"], remapped_splits], axis=1
        ).apply(lambda x: x["split"] / x["relative_path"].with_suffix(".jpg"), axis=1)
    else:
        output_relative_path = dataset.images["relative_path"].apply(
            lambda x: x.with_suffix(".jpg")
        )
    split_path_lists = {
        split_name: path_list
        for split_name, path_list in output_relative_path.groupby(remapped_splits)
    }

    yaml_data = {
        "nc": len(name_list),
        "names": name_list,
    }
    for split_name, split_paths in split_path_lists.items():
        list_name = f"{split_name}.txt"
        list_path = output_path / list_name
        if yolo_version == 7:
            # For yolov7, paths need to be absolute, for both the txt file and the
            # image paths inside of it.
            # If possible, avoid saving to yolov7 format because it's hard to move
            # elsewhere afterwards

            # For the following pyright ignore, see the bug issue here
            # https://github.com/pandas-dev/pandas-stubs/issues/706
            # TODO : remove it when it is solved
            yaml_data[split_name] = str(list_path.absolute())  # pyright: ignore
            with open(list_path, "w") as f:
                f.writelines(
                    f"{output_path.absolute() / sample}\n" for sample in split_paths
                )
        else:
            yaml_data[split_name] = list_name  # pyright: ignore
            with open(list_path, "w") as f:
                f.writelines(f"{sample}\n" for sample in split_paths)

    if yolo_version <= 4:
        if len(split_path_lists.get("train", [])) > 0:  # pyright: ignore
            assert len(split_path_lists.get("val", [])) > 0  # pyright: ignore
        train_list_name = yaml_data["train"]
        val_list_name = yaml_data["val"]
        train_data_path = output_path / "train.data"
        write_data_file(
            {
                "classes": len(name_list),
                "train": train_list_name,
                "valid": val_list_name,
                "names": name_list_path.name,
            },
            train_data_path,
        )

        if len(split_path_lists.get("eval", [])) > 0:  # pyright: ignore
            eval_list_name = yaml_data["eval"]
            eval_data_path = output_path / "eval.data"
            write_data_file(
                {
                    "classes": len(name_list),
                    "valid": eval_list_name,
                    "names": name_list_path.name,
                },
                eval_data_path,
            )
    else:
        with open(output_path / data_yaml_name, "w") as f:
            yaml.safe_dump(yaml_data, f)

    bboxes_yolo = export_bbox(
        sorted_dataset.annotations, sorted_dataset.images, output_format="cxwcyh"
    )
    yolo_annotations = pd.concat([sorted_dataset.annotations, bboxes_yolo], axis=1)
    output_resolved = output_path.resolve()
    for image_id, image_data in tqdm(dataset.images.iterrows(), total=len(dataset)):
        instances = yolo_annotations[yolo_annotations["image_id"] == image_id]
        output_image_relative_path = output_relative_path.loc[
            image_id
        ]  # pyright: ignore
        input_image_path = (dataset.images_root / image_data["relative_path"]).resolve()
        output_image_path = output_resolved / output_image_relative_path
        output_image_path.parent.mkdir(exist_ok=True, parents=True)
        if yolo_version > 4:
            output_txt_path = yolov5_img_path_to_label_path(output_image_path)
        else:
            output_txt_path = output_image_path.with_suffix(".txt")
        if not output_image_path.is_file() or overwrite_images:
            if image_data["type"].lower() not in [".jpg", ".jpeg"]:
                image = imread(input_image_path)
                imwrite(output_image_path, image)
            elif copy_images:
                shutil.copy(input_image_path, output_image_path)
            else:
                output_image_path.unlink(missing_ok=True)
                output_image_path.symlink_to(
                    relpath(input_image_path, output_image_path.parent)
                )
        if not output_txt_path.is_file() or overwrite_labels:
            output_txt_path.parent.mkdir(parents=True, exist_ok=True)
            with open(output_txt_path, "w") as f:
                f.writelines(bbox_to_txt(instances))