Source code for lours.utils.bbox_converter

"""Set of functions to import, export or simply convert bounding box numpy arrays
or dataframe depending on the convention used.

12 compatible formats, compatible with

- cAIpy JSON
- COCO
- darknet
- etc...
"""

from collections.abc import Sequence
from copy import deepcopy

import numpy as np
import pandas as pd
from numpy import ndarray

OPTIONS = {
    "cxwcyh": {"center": True, "size": True, "point": False, "order": [0, 1, 2, 3]},
    "xwyh": {"center": False, "size": True, "point": False, "order": [0, 1, 2, 3]},
    "xxyy": {"center": False, "size": False, "point": False, "order": [0, 1, 2, 3]},
    "xy": {"center": True, "size": False, "point": True, "order": [0, 1]},
}

OPTIONS["cxcywh"] = {**OPTIONS["cxwcyh"], "order": [0, 2, 1, 3]}
OPTIONS["xywh"] = {**OPTIONS["xwyh"], "order": [0, 2, 1, 3]}
OPTIONS["xyxy"] = {**OPTIONS["xxyy"], "order": [0, 2, 1, 3]}

FORMAT_TO_OPTIONS = {}
for name, options in OPTIONS.items():
    FORMAT_TO_OPTIONS[name] = {**options, "relative": True}
    FORMAT_TO_OPTIONS[name.upper()] = {**options, "relative": False}


[docs] def parse_format_string(format_string: str) -> tuple[dict[str, bool], list[int]]: """Function used to parse the format string and convert it to a dictionary with options as boolean and an order list output dictionary keys are: - "center": if the first x or y coordinate is the center of the box or the minimum value (left for x, top for y) - "size": if the second coordinate is the size of the box (width for x, height for y), or the maximum value (right for x, bottom for y) - "relative": if the coordinated are in pixels of normalized with image size (image width for x, image height for y) order output list is order in which x1, x2, y1, y2 needs to be taken for the input array of bounding boxes. Possible values are ``[0, 1, 2, 3]`` (for e.g. ``xxyy``) or ``[0, 2, 1, 3]`` (for e.g. ``xyxy``) Args: format_string: format string describing the convention used for box coordinates can be e.g. ``XYWH`` (cAIpy / COCO format), ``cxwcyh`` (darknet format) etc. See documentation in :func:`.import_bbox` and :func:`.export_bbox` for a breakdown of the syntax. Raises: ValueError: raised when the formatstring is not in the 12 allowed values. Returns: tuple containing two elements - dictionary of options (see detailed keys above) - list of column positions for x1, x2, y1, y1 in the bbox array """ valid_tokens = list(FORMAT_TO_OPTIONS) if format_string not in valid_tokens: raise ValueError( "invalid format string for bbox. " f"Valid formats are {valid_tokens}, got {format_string}" ) parsed_options = deepcopy(FORMAT_TO_OPTIONS[format_string]) order = parsed_options.pop("order") return parsed_options, order
[docs] def column_names_from_options( relative: bool, center: bool, size: bool, point: bool ) -> list[str]: """Generate the column names from dictionary of options, generated by :func:`.parse_format_string` This is useful to know what column to take from input dataframe when importing bounding boxes, or what name must be given to output dataframe when exporting bounding boxes Args: relative: Whether the box coordinates are relative to image size (coordinates are usually between 0 and 1) or not (coordinates are in pixels). center: Whether the first xy coordinate are linked to center of box or bottom left corner (in this case, it's xmin and ymin) size: Whether the second xy coordinates are box width and height or coordinate of top right corner (in this case, it's xmax and ymax). Note that size cannot be False if center is True. point: Whether the second xy coordinates are expected to be 0 (because it's only a point) or not. Returns: list of column names in the right order """ if relative: suffix = "_relative" else: suffix = "" if point: column_names = ["box_x_min", "box_y_min"] elif center: column_names = ["box_x_center", "box_width", "box_y_center", "box_height"] elif size: column_names = ["box_x_min", "box_width", "box_y_min", "box_height"] else: column_names = ["box_x_min", "box_x_max", "box_y_min", "box_y_max"] column_names = [f"{c}{suffix}" for c in column_names] return column_names
[docs] def column_names_from_format_string(format_string: str) -> list[str]: """Generate the column names from the format string. Essentially, it calls a composition of :func:`.parse_format_string` and :func:`.column_names_from_options` This function is useful when you want to drop these columns from the annotations dataframe in you Dataset object Args: format_string: short acronym describing the box format Returns: list[str]: List of verbose column names associated to given format. """ format_options, order = parse_format_string(format_string) column_names = column_names_from_options(**format_options) return [column_names[i] for i in order]
[docs] def import_bbox( bounding_boxes: pd.DataFrame | ndarray, images_df: pd.DataFrame, image_ids: Sequence[int] | pd.Series | np.ndarray | None = None, input_format: str = "XYWH", ) -> pd.DataFrame: """Convert bounding boxes from a particular format to cAIpy/COCO reference. Essentially, this will convert bounding box coordinates to pixel coordinates with box size. ``input_format`` is a format string following a particular syntax to quickly let the function figure what conversion operation to do. - ``cx`` stands for x-coordinate of the center of the box - first ``x`` stands for x-coordinate of the leftmost point of the box (i.e. the minimum x value) - second ``x`` stands for x-coordinate of the rightmost point of the box (i.e. the maximum x value) - ``w`` stands for the box width - ``cy`` stands for the y-coordinates of the center of the box - first ``y`` stands for y-coordinate of the upper point of the box (i.e. the minimum y value) - second ``y`` stands for y-coordinate of the bottom point of the box (i.e. the maximum y value) - ``h`` stands fo the box height In addition, letters in uppercase indicate that the coordinate are in pixels, while lowercase indicate that they are relative to the image size, i.e. 1 is the full width of height of the image. Example: cAIpy / COCO:: "XYWH" # x left, y top, width, height, all in pixels darknet:: "cxwcyh" # x center, width, y center, height, all in relative Note: - x-coordinates and y-coordinates must follow the same convention. As such, examples like ``xxYH`` are not allowed - order can be either ``{x1}{x2}{y1}{y2}`` or ``{x1}{y1}{x2}{y2}``. If ``bounding_boxes`` is a dataframe, it should not change anything as columns are named, but order is important if it is a numpy array instead - center coordinate can only be used with box size. ``cxxcyy`` is thus not allowed - Finally, only 12 options are allowed options. Here is the list : ``cxwcyh``, ``xwyh``, ``xxyy``, ``cxcywh``, ``xywh``, ``xyxy``, ``CXWCYH``, ``XWYH``, ``XXYY``, ``CXCYWH``, ``XYWH``, ``XYXY`` In the case ``bounding_boxes`` is a dataframe, columns must correspond to the given format string. The convention is the following: - center coordinates are named ``box_{x/y}_center`` - extremal coordinated are named ``box_{x/y}_{min/max}`` - box sizes are named ``box_{width/height}`` - If coordinates are relative, names get appended the ``_relative`` suffix Example: cAipy / COCO:: "XYWH" # ["box_x_min", "box_y_min", "box_width", "box_height"] darknet:: "cxwcyh" # ["box_x_center_relative", "box_width_relative", # "box_y_center_relative", "box_height_relative"] Args: bounding_boxes: array or dataframe containing the bbox coordinates - If it's an array, ``image_ids`` must be given, and its shape must be (4, N) - If it's a dataframe, corresponding columns of ``input_format`` must be present (see example above) images_df: dataframe containing image data, and especially image sizes. image_ids: optional list to link every bounding box to its corresponding image with its id. As such, this must be the same length as ``bounding_boxes`` array. Note that if ``bounding_boxes`` is an array and has a ``image_id`` column, this option is not used. Defaults to None. input_format: string defining the expected input format. Defaults to "XYWH". Raises: ValueError: This error is raised if the input format string is not well formatted or if bounding boxes and image ids shapes are incompatible Returns: DataFrame containing the reference cAIpy format, i.e. with x left, width, y top and height, in pixels. index is the same one os ``bounding_boxes``, and columns are, as described above, ``box_x_min``, ``box_width``, ``box_y_min`` and ``box_height``. This is the format expected in the :class:`Dataset <lours.dataset.Dataset>` or :class:`Evaluator <lours.evaluation.Evaluator` """ input_format_options, order = parse_format_string(input_format) if isinstance(bounding_boxes, ndarray): n_col = bounding_boxes.shape[1] if n_col != 4: raise ValueError( "Error with input bounding_boxes array shape. " f"Expected (N, 4), got {bounding_boxes.shape} instead" ) bbox_array = bounding_boxes[:, order] index = None else: index = bounding_boxes.index bbox_array = bounding_boxes[ column_names_from_options(**input_format_options) ].to_numpy() if "image_id" in bounding_boxes and image_ids is None: image_ids = bounding_boxes["image_id"] if image_ids is None: raise ValueError( "Expected image_ids to be either given in bounding_boxes " "dataframe within the 'image_id' column or in image_ids option" ) if bbox_array.shape[0] != len(image_ids): raise ValueError("Expected as many image_ids as bbox_array rows") if input_format_options["point"]: if bbox_array.shape[1] != 2: raise ValueError("Expected 2 values per row in input array for points") x1, y1 = bbox_array.T x2, y2 = np.zeros_like(x1), np.zeros_like(y1) else: if bbox_array.shape[1] != 4: raise ValueError( "Expected 4 values per row in input array for bounding boxes" ) x1, x2, y1, y2 = bbox_array.T if input_format_options["relative"]: im_width = ( images_df.loc[image_ids, "width"] # pyright: ignore .to_numpy() .astype(float) ) im_height = ( images_df.loc[image_ids, "height"] # pyright: ignore .to_numpy() .astype(float) ) x1 *= im_width y1 *= im_height if not input_format_options["point"]: x2 *= im_width y2 *= im_height if input_format_options["center"]: xmin, width = x1 - x2 / 2, x2 ymin, height = y1 - y2 / 2, y2 elif input_format_options["size"]: xmin, width, ymin, height = x1, x2, y1, y2 else: xmin, width = x1, x2 - x1 ymin, height = y1, y2 - y1 converted_bbox_array = np.stack([xmin, ymin, width, height], axis=1) bbox_df = pd.DataFrame( converted_bbox_array, columns=column_names_from_format_string("XYWH"), ) if index is not None: bbox_df.index = index return bbox_df
[docs] def export_bbox( annotations_df: pd.DataFrame, images_df: pd.DataFrame, input_format: str = "XYWH", output_format: str = "XYWH", ) -> pd.DataFrame: """Convert bounding boxes in Lours's reference to a desired format. ``output_format`` is a format string following the same syntax as ``input_format`` in :func:`.import_bbox`. - ``cx`` stands for x-coordinate of the center of the box - first ``x`` stands for x-coordinate of the leftmost point of the box (i.e. the minimum x value) - second ``x`` stands for x-coordinate of the rightmost point of the box (i.e. the maximum x value) - ``w`` stands for the box width - ``cy`` stands for the y-coordinates of the center of the box - first ``y`` stands for y-coordinate of the upper point of the box (i.e. the minimum y value) - second ``y`` stands for y-coordinate of the bottom point of the box (i.e. the maximum y value) - ``h`` stands fo the box height In addition, letters in uppercase indicate that the coordinate are in pixels, while lowercase indicate that they are relative to the image size, i.e. 1 is the full width of height of the image. Example: cAIpy / COCO:: "XYWH" # x left, y top, width, height, all in pixels darknet:: "cxwcyh" # x center, width, y center, height, all in relative Note: - x-coordinates and y-coordinates must follow the same convention. As such, examples like ``xxYH`` are not allowed - order can be either ``{x1}{x2}{y1}{y2}`` or ``{x1}{y1}{x2}{y2}``. Order is important if the output dataframe is expected to be converted to numpy after calling this function - center coordinate can only be used with box size. ``cxxcyy`` is thus not allowed - Finally, only 12 options are allowed options. Here is the list : ``cxwcyh``, ``xwyh``, ``xxyy``, ``cxcywh``, ``xywh``, ``xyxy``, ``CXWCYH``, ``XWYH``, ``XXYY``, ``CXCYWH``, ``XYWH``, ``XYXY`` The created dataframe will have the following column names - center coordinates are named ``box_{x/y}_center`` - extremal coordinated are named ``box_{x/y}_{min/max}`` - box sizes are named ``box_{width/height}`` - If coordinates are relative, names get appended the ``_relative`` suffix Example: cAipy / COCO:: "XYWH" # ["box_x_min", "box_y_min", "box_width", "box_height"] darknet:: "cxwcyh" # ["box_x_center_relative", "box_width_relative", # "box_y_center_relative", "box_height_relative"] Args: annotations_df: dataframe containing the annotations in Lours format. Must have at least ``box_x_min``, ``box_width``, ``box_y_min``, ``box_height`` and ``image_id`` columns. images_df: dataframe containing the image sizes in Lours format. Must have at least ``width``, ``height``, and the ids must correspond in the column ``image_id`` the annotations dataframe. input_format: string defining the expected input format. Defaults to "XYWH". output_format: string describing the desired bounding box format (see above). Defaults to "XWYH". Returns: dataframe containing the bounding box coordinates with the corresponding column names. It will have the same index as ``annotations_df`` """ output_format_options, order = parse_format_string(output_format) input_format_options, _ = parse_format_string(input_format) if input_format != "XYWH": annotations_df = import_bbox( annotations_df, images_df, input_format=input_format ) if input_format_options["point"]: xmin, ymin = annotations_df[column_names_from_format_string("XY")].to_numpy().T width, height = xmin * 0, ymin * 0 else: xmin, ymin, width, height = ( annotations_df[column_names_from_format_string("XYWH")].to_numpy().T ).astype(float) if output_format_options["relative"]: im_width = images_df.loc[annotations_df["image_id"], "width"].to_numpy() im_height = images_df.loc[annotations_df["image_id"], "height"].to_numpy() xmin /= im_width width /= im_width ymin /= im_height height /= im_height if output_format_options["center"]: x_center = xmin + width / 2 y_center = ymin + height / 2 if output_format_options["point"]: bbox_array = np.stack([x_center, y_center], axis=1) else: bbox_array = np.stack([x_center, width, y_center, height], axis=1) elif output_format_options["size"]: bbox_array = np.stack([xmin, width, ymin, height], axis=1) else: xmax = xmin + width ymax = ymin + height bbox_array = np.stack([xmin, xmax, ymin, ymax], axis=1) bbox_df = pd.DataFrame( bbox_array, columns=column_names_from_options(**output_format_options), index=annotations_df.index, ) new_column_order = [ # order needs to be reversed to construct the dataframe the right way bbox_df.columns[order.index(i)] for i in range(len(order)) ] return bbox_df[new_column_order]
[docs] def convert_bbox( bounding_boxes: ndarray | pd.DataFrame, images_df: pd.DataFrame, image_ids: Sequence[int] | None = None, input_format: str = "XYWH", output_format: str = "cxwcyh", ) -> pd.DataFrame: """Convert bounding box from a particular format to another, using a composition of :func:`.import_bbox` and :func:`.export_bbox`. See their documentation for an explanation of format strings Args: bounding_boxes: array or dataframe containing the bbox coordinates. - If it's an array, ``image_ids`` must be given. - If it's a dataframe, corresponding columns of ``input_format`` must be present images_df: dataframe containing image data, and especially image sizes. image_ids: optional list to link every bounding box to its corresponding image with its id. As such, this must be the same length as ``bounding_boxes`` array. Note that if ``bounding_boxes`` is a dataframe and has a ``image_id`` column, this option is not used. Defaults to None. input_format: string defining the expected input format. Defaults to "XYWH". output_format: string describing the desired bounding box format. Defaults to "XYWH". Raises: ValueError: This error is raised if the input format string is not well formatted or if bounding boxes and image ids shapes are incompatible Returns: dataframe containing the bounding box coordinates with the corresponding column names. It will have the same index as ``bounding_boxes`` if it is a dataframe. """ imported_bbox = import_bbox(bounding_boxes, images_df, image_ids, input_format) if ( isinstance(bounding_boxes, pd.DataFrame) and "image_id" in bounding_boxes.columns ): imported_bbox["image_id"] = bounding_boxes["image_id"] else: imported_bbox["image_id"] = image_ids return export_bbox( imported_bbox, images_df, "XYWH", output_format, )