Source code for monai.metrics.froc

# Copyright (c) MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from typing import Any, cast

import numpy as np
import torch

from monai.config import NdarrayOrTensor


def compute_fp_tp_probs_nd(
    probs: NdarrayOrTensor,
    coords: NdarrayOrTensor,
    evaluation_mask: NdarrayOrTensor,
    labels_to_exclude: list | None = None,
) -> tuple[NdarrayOrTensor, NdarrayOrTensor, int]:
    """
    This function is modified from the official evaluation code of
    `CAMELYON 16 Challenge <https://camelyon16.grand-challenge.org/>`_, and used to distinguish
    true positive and false positive predictions. A true positive prediction is defined when
    the detection point is within the annotated ground truth region.

    Args:
        probs: an array with shape (n,) that represents the probabilities of the detections.
            Where, n is the number of predicted detections.
        coords: an array with shape (n, n_dim) that represents the coordinates of the detections.
            The dimensions must be in the same order as in `evaluation_mask`.
        evaluation_mask: the ground truth mask for evaluation.
        labels_to_exclude: labels in this list will not be counted for metric calculation.

    Returns:
        fp_probs: an array that contains the probabilities of the false positive detections.
        tp_probs: an array that contains the probabilities of the True positive detections.
        num_targets: the total number of targets (excluding `labels_to_exclude`) for all images under evaluation.

    """
    if not (len(probs) == len(coords)):
        raise ValueError(f"the length of probs {probs.shape}, should be the same as of coords {coords.shape}.")
    if not (len(coords.shape) > 1 and coords.shape[1] == len(evaluation_mask.shape)):
        raise ValueError(
            f"coords {coords.shape} need to represent the same number of dimensions as mask {evaluation_mask.shape}."
        )

    if isinstance(probs, torch.Tensor):
        probs = probs.detach().cpu().numpy()
    if isinstance(coords, torch.Tensor):
        coords = coords.detach().cpu().numpy()
    if isinstance(evaluation_mask, torch.Tensor):
        evaluation_mask = evaluation_mask.detach().cpu().numpy()

    if labels_to_exclude is None:
        labels_to_exclude = []

    max_label = np.max(evaluation_mask)
    tp_probs = np.zeros((max_label,), dtype=np.float32)

    hittedlabel = evaluation_mask[tuple(coords.T)]
    fp_probs = probs[np.where(hittedlabel == 0)]
    for i in range(1, max_label + 1):
        if i not in labels_to_exclude and i in hittedlabel:
            tp_probs[i - 1] = probs[np.where(hittedlabel == i)].max()

    num_targets = max_label - len(labels_to_exclude)
    return fp_probs, tp_probs, cast(int, num_targets)


[docs] def compute_fp_tp_probs( probs: NdarrayOrTensor, y_coord: NdarrayOrTensor, x_coord: NdarrayOrTensor, evaluation_mask: NdarrayOrTensor, labels_to_exclude: list | None = None, resolution_level: int = 0, ) -> tuple[NdarrayOrTensor, NdarrayOrTensor, int]: """ This function is modified from the official evaluation code of `CAMELYON 16 Challenge <https://camelyon16.grand-challenge.org/>`_, and used to distinguish true positive and false positive predictions. A true positive prediction is defined when the detection point is within the annotated ground truth region. Args: probs: an array with shape (n,) that represents the probabilities of the detections. Where, n is the number of predicted detections. y_coord: an array with shape (n,) that represents the Y-coordinates of the detections. x_coord: an array with shape (n,) that represents the X-coordinates of the detections. evaluation_mask: the ground truth mask for evaluation. labels_to_exclude: labels in this list will not be counted for metric calculation. resolution_level: the level at which the evaluation mask is made. Returns: fp_probs: an array that contains the probabilities of the false positive detections. tp_probs: an array that contains the probabilities of the True positive detections. num_targets: the total number of targets (excluding `labels_to_exclude`) for all images under evaluation. """ if isinstance(y_coord, torch.Tensor): y_coord = y_coord.detach().cpu().numpy() if isinstance(x_coord, torch.Tensor): x_coord = x_coord.detach().cpu().numpy() y_coord = (y_coord / pow(2, resolution_level)).astype(int) x_coord = (x_coord / pow(2, resolution_level)).astype(int) stacked = np.stack([y_coord, x_coord], axis=1) return compute_fp_tp_probs_nd( probs=probs, coords=stacked, evaluation_mask=evaluation_mask, labels_to_exclude=labels_to_exclude )
[docs] def compute_froc_curve_data( fp_probs: np.ndarray | torch.Tensor, tp_probs: np.ndarray | torch.Tensor, num_targets: int, num_images: int ) -> tuple[np.ndarray, np.ndarray]: """ This function is modified from the official evaluation code of `CAMELYON 16 Challenge <https://camelyon16.grand-challenge.org/>`_, and used to compute the required data for plotting the Free Response Operating Characteristic (FROC) curve. Args: fp_probs: an array that contains the probabilities of the false positive detections for all images under evaluation. tp_probs: an array that contains the probabilities of the True positive detections for all images under evaluation. num_targets: the total number of targets (excluding `labels_to_exclude`) for all images under evaluation. num_images: the number of images under evaluation. """ if not isinstance(fp_probs, type(tp_probs)): raise AssertionError("fp and tp probs should have same type.") if isinstance(fp_probs, torch.Tensor): fp_probs = fp_probs.detach().cpu().numpy() if isinstance(tp_probs, torch.Tensor): tp_probs = tp_probs.detach().cpu().numpy() total_fps, total_tps = [], [] all_probs = sorted(set(list(fp_probs) + list(tp_probs))) for thresh in all_probs[1:]: total_fps.append((fp_probs >= thresh).sum()) total_tps.append((tp_probs >= thresh).sum()) total_fps.append(0) total_tps.append(0) fps_per_image = np.asarray(total_fps) / float(num_images) total_sensitivity = np.asarray(total_tps) / float(num_targets) return fps_per_image, total_sensitivity
[docs] def compute_froc_score( fps_per_image: np.ndarray, total_sensitivity: np.ndarray, eval_thresholds: tuple = (0.25, 0.5, 1, 2, 4, 8) ) -> Any: """ This function is modified from the official evaluation code of `CAMELYON 16 Challenge <https://camelyon16.grand-challenge.org/>`_, and used to compute the challenge's second evaluation metric, which is defined as the average sensitivity at the predefined false positive rates per whole slide image. Args: fps_per_image: the average number of false positives per image for different thresholds. total_sensitivity: sensitivities (true positive rates) for different thresholds. eval_thresholds: the false positive rates for calculating the average sensitivity. Defaults to (0.25, 0.5, 1, 2, 4, 8) which is the same as the CAMELYON 16 Challenge. """ interp_sens = np.interp(eval_thresholds, fps_per_image[::-1], total_sensitivity[::-1]) return np.mean(interp_sens)