Source code for lours.evaluation.detection.crowd_detection_evaluator

from collections.abc import Iterable
from functools import partial
from typing import TYPE_CHECKING

import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from ...utils.grouper import (
    get_group_names,
    group_list,
    group_relational_data,
    groups_to_list,
)
from .detection_evaluator_base import DetectionEvaluatorBase
from .util import resample_count

if TYPE_CHECKING:
    pass


[docs] class CrowdDetectionEvaluator(DetectionEvaluatorBase): """Class specialization for crowd detection and counting tasks. Note that the constructor is the same as the base Evaluator See Also: :ref:`related tutorial </notebooks/4_demo_evaluation_crowd.ipynb>` """
[docs] def compute_count_error( self, groups: group_list = "category_id", quantiles: Iterable[float] = (0.25, 0.5, 0.75), confidence_index: Iterable[float] | None = None, ) -> tuple[pd.DataFrame, pd.DataFrame]: """Compute Count error metrics, both absolute (in number of objects found) and relative (with respect to groundtruth number of objects) with respect to confidence threshold. Along with these metrics, it computes standard deviation of absolute/relative error and quantiles for error values. See Also: :ref:`related tutorial </notebooks/4_demo_evaluation_crowd.ipynb>` Computed metrics: - Mean Absolute Error (MAE) - Root of Mean Square Error (RMSE) - Mean Relative Error (MRE) - Root of Mean Square Relative Error (RMSRE) Args: groups: Groups of image or annotation attributes to use to partition evaluation results to compute multiple PR curves. Must be a :obj:`.group_list` . Defaults to "category_id". quantiles: quantile values to get with respect to confidence threshold, aggregated per image. Must contain the median value (i.e. 0.5). Defaults to (0.25, 0.5, 0.75). confidence_index: sequence of confidence thresholds to compute the metric on. If set to None, will be 101 equidistant points, from 0 to 1. Defaults to None. Returns: A pair of DataFrames. - a DataFrame with computed metrics, with multiindex columns, for absolute and relative metrics with respect to confidence - a DataFrame with detailed error values with respect to confidence for each image, in order to compute statistics manually. Example: >>> from lours.utils.doc_utils import dummy_dataset >>> groundtruth = dummy_dataset( ... 10, 1000, label_map={0: "person", 1: "car"}, keypoints_share=1 ... ) >>> predictions = dummy_dataset( ... 10, ... 10000, ... label_map=groundtruth.label_map, ... images=groundtruth.images, ... keypoints_share=1, ... add_confidence=True, ... ) >>> evaluator = CrowdDetectionEvaluator( ... groundtruth=groundtruth, predictions=predictions ... ) >>> errors, detailed = evaluator.compute_count_error() >>> errors absolute ... relative MAE RMSE ... q0.75 model category_id confidence ... 0 0.00 452.0 452.378824 ... 9.913239 predictions 0.01 447.8 448.170057 ... 9.779314 predictions 0.02 442.3 442.670645 ... 9.655792 predictions 0.03 437.6 437.945887 ... 9.569740 predictions 0.04 433.5 433.856774 ... 9.412411 predictions ... ... ... ... ... ... 1 0.96 34.2 35.883144 ... -0.540094 predictions 0.97 38.0 39.549968 ... -0.630391 predictions 0.98 42.6 43.395852 ... -0.768797 predictions 0.99 46.3 46.764303 ... -0.911782 predictions 1.00 50.7 51.073476 ... -1.000000 predictions <BLANKLINE> [202 rows x 14 columns] Get the confidence threshold where the Mean Average Error is the lowest, and show the corresponding rows (one per category). >>> mae = errors[("absolute", "MAE")] >>> mae category_id confidence 0 0.00 452.0 0.01 447.8 0.02 442.3 0.03 437.6 0.04 433.5 ... 1 0.96 34.2 0.97 38.0 0.98 42.6 0.99 46.3 1.00 50.7 Name: (absolute, MAE), Length: 202, dtype: float64 >>> best_mae = errors.loc[mae.groupby(level=0).idxmin()] >>> best_mae absolute ... relative MAE RMSE ... q0.75 model category_id confidence ... 0 0.89 5.4 7.655064 ... 0.116153 predictions 1 0.88 10.9 13.939153 ... 0.310000 predictions <BLANKLINE> [2 rows x 14 columns] >>> best_mae.reset_index().iloc[0] category_id 0 confidence 0.89 absolute MAE 5.4 RMSE 7.655064 std 7.788881 q0.25 0.0 q0.50 2.5 q0.75 5.5 model predictions relative MRE 0.10748 RMSRE 0.145579 std 0.145805 q0.25 0.0 q0.50 0.055717 q0.75 0.116153 model predictions Name: 0, dtype: object """ def add_image_id(g: list) -> list: if "image_id" not in g: return [*g, "image_id"] return g groups = groups_to_list(groups) group_names = get_group_names(groups) gt_group_dict, *_ = group_relational_data(self.groundtruth, groups, self.images) gt_pandas_groups = [gt_group_dict[name] for name in group_names] gt_count = ( self.groundtruth.groupby(add_image_id(gt_pandas_groups)) .size() .rename("gt_count") # pyright: ignore ) mae_curves = [] detailed_error_counts = [] for ( current_predictions_name, current_predictions_frame, ) in self.predictions_dictionary.items(): if confidence_index is None: current_confidence_index = np.linspace(0, 1, 101) else: current_confidence_index = confidence_index group_dict, *_ = group_relational_data( current_predictions_frame, groups, self.images ) pandas_groups = [group_dict[name] for name in group_names] tqdm.pandas() prediction_counts = ( current_predictions_frame.groupby(add_image_id(pandas_groups))[ "confidence" ] .progress_apply( # pyright: ignore partial(resample_count, new_confidences=current_confidence_index) ) .rename("count") .to_frame() ) prediction_counts = prediction_counts.join(gt_count).fillna(0) prediction_counts["error"] = ( prediction_counts["count"] - prediction_counts["gt_count"] ) prediction_counts["rel_error"] = ( prediction_counts["error"] / prediction_counts["gt_count"] ) prediction_counts["abs_error"] = prediction_counts["error"].abs() prediction_counts["abs_rel_error"] = prediction_counts["rel_error"].abs() prediction_counts["sq_error"] = prediction_counts["error"].pow(2) prediction_counts["sq_rel_error"] = prediction_counts["rel_error"].pow(2) prediction_counts["model"] = current_predictions_name detailed_error_counts.append(prediction_counts) grouped = prediction_counts.groupby([*group_names, "confidence"]) mae = grouped["abs_error"].mean().rename("MAE") rmse = np.sqrt(grouped["sq_error"].mean()).rename("RMSE") mre = grouped["abs_rel_error"].mean().rename("MRE") rmsre = np.sqrt(grouped["sq_rel_error"].mean().rename("RMSRE")) def q_at(y): def q(x): return x.quantile(y) q.__name__ = f"q{y:0.2f}" return q stat_agg_functions = ["std", *[q_at(q) for q in quantiles]] stats = grouped["error"].agg(stat_agg_functions) rel_stats = grouped["rel_error"].agg(stat_agg_functions) absolute_result = pd.concat([mae, rmse, stats], axis=1) relative_result = pd.concat([mre, rmsre, rel_stats], axis=1) absolute_result["model"] = relative_result["model"] = ( current_predictions_name ) result = pd.concat( [absolute_result, relative_result], axis=1, keys=["absolute", "relative"], ) mae_curves.append(result) mae_curves = pd.concat(mae_curves) detailed_error_counts = pd.concat(detailed_error_counts) return mae_curves, detailed_error_counts
[docs] def compute_normalized_precision_recall(self) -> pd.DataFrame: """Compute nAP between detected points and ground truth according to the algorithm proposed in [Ref]_ .. [Ref] Song, Q., Wang, C., Jiang, Z., Wang, Y., Tai, Y., Wang, C. & Wu, Y. Rethinking counting and localization in crowds: A purely point-based framework. 2021 IEEE/CVF International Conference on Computer Vision (pp. 3365-3374). https://openaccess.thecvf.com/content/ICCV2021/html/Song_Rethinking_Counting_and_Localization_in_Crowds_A_Purely_Point-Based_Framework_ICCV_2021_paper.html """ raise NotImplementedError