Source code for lours.evaluation.detection.util

import warnings
from collections.abc import Iterable

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix as confusion_matrix_sklearn

from ...utils import BBOX_COLUMN_NAMES



[docs]
def construct_matches_df(
    predictions_targets_df: pd.DataFrame, min_iou: float = 0
) -> pd.DataFrame:
    """From a dataframe with targets and predictions, all concatenated together,
    construct a list of match pairs between prediction and targets. Unmatched
    predictions or targets get a <NA> match id. Note that all bounding boxes in the
    input dataframe are assumed to be of the same category and the same image, the
    grouping must have already been done by the user before.

    Args:
        predictions_targets_df: DataFrame comprising target and prediction info must
            have the following columns:

             - ``groundtruth`` : bool value to know if it's a target or a prediction
             - ``box_x_min``, ``box_y_min``, ``box_width``, ``box_height``: Bounding box
               information to compute IoU

        min_iou: IoU above which the detection is considered valid.
            Note that the lower bound is not inclusive. Defaults to 0.

    Returns:
        DataFrame of matches. Will contain prediction_id and groundtruth_id columns.
        Index is irrelevant. Each prediction id and target id should appear once and
        only once. As such, at worse (no match at all), the dataframe will be N+M rows
        with N the number of predictions and M the number of targets

    """
    groundtruth = predictions_targets_df[predictions_targets_df["groundtruth"]]
    predictions = predictions_targets_df[~predictions_targets_df["groundtruth"]]
    ious = get_ious(groundtruth, predictions)
    detection_matches, groundtruth_matches = get_matches(
        ious, predictions["confidence"], min_iou
    )
    matches = detection_matches.reset_index(names="prediction_id").rename(
        columns={"match_id": "groundtruth_id"}
    )
    not_detected = groundtruth_matches[groundtruth_matches["match_id"].isna()]
    not_detected = not_detected.reset_index(names="groundtruth_id").rename(
        columns={"match_id": "prediction_id"}
    )
    return pd.concat([matches, not_detected])




[docs]
def get_ious(groundtruth: pd.DataFrame, predictions: pd.DataFrame) -> pd.DataFrame:
    """From two dataframes of annotations, generate a matrix of iou of size N x M where
    N is the number of predictions and M is the number targets.
    Rows are sorted by prediction confidence

    Note that this does not check the category_id, only the bounding box coordinates.

    Next, encapsulate it in a dataframe with index and columns named after prediction
    and target ids.

    Args:
        groundtruth: DataFrame comprising bounding box targets data.
            Must include at least ``box_x_min``, ``box_y_min``, ``box_width``,
            ``box_height``
        predictions: DataFrame comprising bounding box prediction data.
            Must include same columns as groundtruth, plus the ``confidence`` column.

    Returns:
        DataFrame comprising iou values between groundtruth and predictions. Index is
        prediction id, column name is target id
    """
    # Extract bbox coordinates from groundtruth and pred. Note that prediction bbox data
    # is one more dimension to use array broadcasting
    # each array is of shape [M] (implicitly, [1, M])
    x1, y1, w1, h1 = groundtruth[BBOX_COLUMN_NAMES].values.T
    # each array is of shape [N, 1]
    x2, y2, w2, h2 = predictions[BBOX_COLUMN_NAMES].values.T[..., None]
    # Compute area of intersection
    # Here we use array broadcasting so that every constructed array is of size NxM
    xmin = np.maximum(x1, x2)
    xmax = np.minimum(x1 + w1, x2 + w2)
    ymin = np.maximum(y1, y2)
    ymax = np.minimum(y1 + h1, y2 + h2)
    area = (xmax - xmin) * (ymax - ymin)
    area[(xmax < xmin) | (ymax < ymin)] = 0
    ious = pd.DataFrame(
        area / (w1 * h1 + w2 * h2 - area),
        index=predictions.index,
        columns=groundtruth.index,
    )

    return ious




[docs]
def get_matches(
    iou_df: pd.DataFrame, confidence: pd.Series | None = None, min_iou: float = 0
) -> tuple[pd.DataFrame, pd.DataFrame]:
    """Get the best matching target for every prediction and
    return matching target (if any) for every prediction and
    matching prediction (if any) for every target
    Prediction are either reordered by confidence, or assumed already ordered in the
    first place.


    Args:
        iou_df: IoU values matrix encapsulated in a dataframe to index
            rows with prediction ids and columns with target ids
        confidence: series with the number of rows as iou_df, will
            be used to reorder iou_df's rows in descending order. If not given, will
            assume iou_df is already ordered.
        min_iou: Minimum IoU value above which a match is considered
            valid.

    Returns:
        dataframes of matching ids with corresponding
        ious. First df is indexed by prediction ids, second df is indexed by target id
    """
    if confidence is not None:
        ious = iou_df.reindex(confidence.sort_values(ascending=False).index)
    else:
        ious = iou_df.copy()
    # Note that we use the Int64 type, which is the regular int64 + NA value, which is
    # used here to designate the absence of match
    # Both matches dataframes are initialized to have zero match and will be iteratively
    # updated
    detection_matches = pd.DataFrame(
        np.zeros((len(ious), 2)),
        index=ious.index,
        columns=["iou", "match_id"],
    )
    groundtruth_matches = pd.DataFrame(
        np.zeros((len(ious.columns), 2)),
        index=ious.columns,
        columns=["iou", "match_id"],
    )
    detection_matches["match_id"] = pd.NA
    groundtruth_matches["match_id"] = pd.NA

    match_dtypes = {"iou": float, "match_id": "Int64"}
    detection_matches = detection_matches.astype(match_dtypes)
    groundtruth_matches = groundtruth_matches.astype(match_dtypes)

    # Iterative vectorize matching algorithm
    # 1 - Get best target match of each prediction
    # 2 - Remove every prediction and corresponding target until the first duplicate
    # 3 - Update aforementioned match dataframes accordingly
    # 4 - Repeat with this new subset
    # Note that we don't need to compute best target match each time (only until the
    # first duplicate), but that fact that it is vectorized across the iou matrix
    # makes it basically free.
    while len(ious) > 0:
        best_iou = ious.max(axis=1)
        valid = best_iou > min_iou
        ious = ious[valid]
        best_iou = best_iou[valid]
        if len(ious) == 0:
            break
        best_matches = ious.idxmax(axis=1)
        duplicated = best_matches.duplicated()
        if not duplicated.max():
            # No duplicate (max is False), perfect matching
            first_duplicated = len(duplicated)
        else:
            # Get first occurrence of duplicated == True
            first_duplicated = duplicated.argmax()

        # Partition between matched and not matched yet
        matched = best_matches.iloc[:first_duplicated]
        matched_iou = best_iou.iloc[:first_duplicated]
        not_matched = best_matches.iloc[first_duplicated:]

        ious = ious.loc[not_matched.index].drop(pd.Index(matched.values), axis=1)

        with warnings.catch_warnings():
            warnings.simplefilter(action="ignore", category=FutureWarning)
            detection_matches.loc[matched.index, "match_id"] = matched
            groundtruth_matches.loc[matched, "match_id"] = matched.index.to_numpy()

        # Get corresponding iou values
        detection_matches.loc[matched.index, "iou"] = matched_iou
        groundtruth_matches.loc[matched, "iou"] = matched_iou

    return detection_matches, groundtruth_matches




[docs]
def pr_curve(
    results: pd.DataFrame,
    min_iou: float = 0,
    betas: Iterable[float] = (1,),
    reindex_series: pd.Series | None = None,
) -> pd.DataFrame:
    r"""Construct Precision Recall curve from results dataframe and minimum iou below
    which detection is considered invalid

    Additionally, computes F-score with different :math:`\beta` values with the
    following equation.

    .. math:
        F_\beta = \frac{(1 + \beta^2)(\text{precision} \times \text{recall})}
                       {\text{precision} \times \beta^2 + \text{recall}}

    Args:
        results: Dataframe modelling detections, with corresponding
            confidence and groundtruth (whether this detection would be True positive or
            a False positive). Should include the columns ``groundtruth``, ``iou`` and
            ``confidence``, and rows should be sorted so that confidence values are
            sorted.
        min_iou: Value below which the detection is considered
            invalid. In other words, the groundtruth becomes ``False``. The prediction
            becomes a False Positive, and the corresponding groundtruth is a False
            negative. Defaults to 0.
        betas: beta values to compute the F-Score with. Must be an iterable of floats.
            Defaults to ``(1,)``
        reindex_series: Recall bins to reindex the curve. before returning it.

    Returns:
        Precision Recall curve dataframe. Columns are ``precision``,
        ``recall``, ``f{beta}_score`` and ``confidence_threshold``, where betas are the
        given :math:`\beta` values in ``betas`` (see equation above).
        Index is irrelevant.
    """
    results = results.sort_values("confidence", ascending=False)
    ntargets = results["groundtruth"].sum()
    confidence = results["confidence"].to_numpy()
    distinct_value_indices = np.diff(confidence).astype(bool)
    distinct_value_indices = np.append(distinct_value_indices, True)
    confidence = confidence[distinct_value_indices]
    # Cumulative sum of true positives, from which we only extract the maximum for
    # distinct confidence value
    tp_count = (results["groundtruth"] * (results["iou"] > min_iou)).to_numpy().cumsum()
    tp_count = tp_count[distinct_value_indices]

    # Precision and recall
    # Precision is true positive / number of positive predictions
    # Recall is true positive / number of total targets (even the ones with IOU of zero)
    precision = tp_count / (1 + distinct_value_indices.nonzero()[0])
    # In the degenerate case of no targets to be detected, the recall cannot be computed
    # Hence the NaN
    recall = tp_count / ntargets if ntargets > 0 else tp_count * np.NaN

    # Add 2 points for each extreme
    # Precision will not be above first value,
    # no matter how  high the confidence threshold is
    # Recall will not be above last value,
    # no matter how low the confidence threshold is
    # We still add the extremal points with recall = 1 and precision = 0 and
    # precision = 1 and recall = 0 for completeness
    # Wen this curve is reindexed by precision or recall
    # (which is the case for pycocotools).

    # Note pyright ignore flags to be removed as soon as we get the pre-commit hook
    # pyright 1.1.206
    # See https://github.com/microsoft/pyright/issues/2809
    precision = np.concatenate([[1], precision[:1], precision, [0, 0]])
    recall = np.concatenate([[0, 0], recall, recall[-1:], [1]])
    confidence = np.concatenate([[1, 1], confidence, [0, 0]])

    # Make sure the precision is only decreasing.
    # The rationale is that the true precision recall curve (thus with infinite number
    # of points) is only decreasing.
    # But the way it is constructed with a finite dataset makes precision drop when
    # A false positive occurs, and increase again at the next true positive.
    # Most conservative way of constructing a realistic curve is to make points of
    # dropping precision equal to the next highest precision.
    # For that we use numpy's universal function, and more specifically the accumulate
    # feature
    # see https://numpy.org/doc/stable/reference/generated/numpy.ufunc.accumulate.html
    precision = np.maximum.accumulate(precision[::-1])[::-1]
    f_scores = {}
    for beta in betas:
        # See https://en.wikipedia.org/wiki/F-score for formula
        f_scores[f"f{beta}_score"] = (
            (1 + beta**2) * (precision * recall) / (precision * beta**2 + recall + 1e-5)
        )
    result = pd.DataFrame(
        np.stack([precision, recall, confidence, *f_scores.values()], axis=1),
        columns=["precision", "recall", "confidence_threshold", *f_scores.keys()],
    ).fillna(0)

    # Remove points which are not useful to draw the curve or compute the mAP, ie the
    # points  that are on a straight line
    constant_precision = (result["precision"].diff() == 0) & (
        result["precision"].diff(-1) == 0
    )
    result = result[~constant_precision]

    constant_recall = (result["recall"].diff() == 0) & (result["recall"].diff(-1) == 0)
    result = result[~constant_recall]

    if reindex_series is not None:
        result = result.set_index(reindex_series.name)
        # Remove duplicated index values, otherwise reindex will error
        result = result[~result.index.duplicated(keep="last")]
        result = result.fillna(0)
        result = result.reindex(reindex_series, method="ffill").reset_index()
    return result




[docs]
def compute_average_precision(pr_curve: pd.DataFrame) -> float:
    """Compute average precision from dataframe with precision and recall values.
    Precision values are averaged over recall values.

    Note:
        We compute the right Riemann sum, i.e. we only consider the value on the right
        for a particular recall interval.

    Args:
        pr_curve: Dataframe with ``precision`` and ``recall`` columns.

    Returns:
        Average precision for this particular PR curve
    """
    sorted_pr_curve = pr_curve.sort_values("recall")
    precision = sorted_pr_curve["precision"]
    # First value of recall_diff is NaN, replace it with 0 so that we discard
    # the first precision value
    recall_diff = sorted_pr_curve["recall"].diff().fillna(0)
    return (precision * recall_diff).sum()




[docs]
def resample_count(
    original_confidences: Iterable[float], new_confidences: Iterable[float]
) -> pd.Series:
    """Take a sequence of confidence values and resample it assuming at each new
    original confdience value, one object is added.

    Result is the number of objects that would have been detected for each value in
    new confidence.

    Note:
        ``new_confidences`` must be sorted unique values.

    Args:
        original_confidences: Original set of confidence value. Each confidence value
            corresponds to one detected object.
        new_confidences: New set of confidence values to resample the number of detected
            objects from. Usually, a range of N elements, from 0 to 1.

    Returns:
        Series named ``count`` with the same length as ``new_confidences``, index set as
        ``new_confidences``, named ``confidence``, and values set to count values
        corresponding to confidence threshold given in the index.
    """
    counts = (
        pd.Series(list(original_confidences))
        .value_counts()
        .sort_index(ascending=False)
        .cumsum()
    )
    new_confidences = pd.Index(new_confidences, name="confidence")
    resampled = counts.reindex(new_confidences, method="ffill").fillna(0)

    return resampled




[docs]
def confusion_matrix(matches: pd.DataFrame) -> pd.DataFrame:
    """Compute the confusion matrix for a given DataFrame.

    Args:
        matches: DataFrame containing the matches between groundtruth and predictions
            in which we expect to have the following columns :

            - ``prediction_label``
            - ``groundtruth_label``

            corresponding to the predicted and groundtruth labels,
            respectively, in order to compute the confusion matrix.

    Returns:
        A confusion matrix as DataFrame with class names as column names and row ids.
    """
    y_pred = matches["prediction_label"].fillna("None").astype(str)
    y_true = matches["groundtruth_label"].fillna("None").astype(str)

    # Create a list of all possible classes
    all_classes = sorted(set(y_pred) | set(y_true))
    if "None" in all_classes:
        all_classes.remove("None")
        all_classes.append("None")

    cm = confusion_matrix_sklearn(y_true, y_pred, labels=all_classes, normalize="true")

    return pd.DataFrame(
        cm, index=pd.Index(all_classes, name="label"), columns=all_classes
    )




[docs]
def display_confusion_matrix(confusion_matrix: pd.DataFrame, title: str = ""):
    """Display a ConfusionMatrixDisplay object for a given Dataframe.

    Args:
        confusion_matrix: Dataframe containing the confusion matrix data
            as computed by :func:`.confusion_matrix`
        title: Confusion matrix's title

    """
    try:
        import matplotlib.pyplot as plt
        from sklearn.metrics import ConfusionMatrixDisplay
    except ImportError as e:
        raise ImportError(
            "Plotting libraries could not be loaded, make sure you have installed"
            " Lours with the 'plot-utils' extra"
        ) from e

    display_labels = confusion_matrix.columns.to_list()

    # scaling text inside the matrix cells.
    # Text is not scaled according to the number of labels.
    # We need to make the text smaller if the matrix cells are getting smaller as well
    # somehow, the size of the cell is both inversly proportional to the number of
    # labels, and also gets smaller if the longest label is very long.
    # this algorithm tries to find the right font size, from xx-small to regular
    text_kw = {}
    n_labels = len(display_labels)
    max_label_length = max(map(len, display_labels))
    n_labels += max_label_length / 3
    if n_labels > 15:
        text_kw["fontsize"] = "xx-small"
    elif n_labels > 11:
        text_kw["fontsize"] = "x-small"
    elif n_labels > 9:
        text_kw["fontsize"] = "small"

    plot = ConfusionMatrixDisplay(
        confusion_matrix.values.round(2),
        display_labels=display_labels,
    ).plot(text_kw=text_kw)
    # Use maptlotlib's tick labels function so that we can rotation around the tick
    # and not around the label center. Otherwise, long labels might end up overlapping
    # the next labels
    plot.ax_.set_xticklabels(
        display_labels,
        rotation=45,
        ha="right",
        rotation_mode="anchor",
        fontsize="small" if max_label_length > 10 else "medium",
    )
    if max_label_length > 10:
        plot.ax_.set_yticklabels(
            display_labels,
            fontsize="small",
        )
    plt.title(title)
    plt.xlabel("Prediction label")
    plt.ylabel("Grounthruth label")
    plt.tight_layout()