Source code for monai.losses.image_dissimilarity

# Copyright (c) MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import torch
from torch.nn import functional as F
from torch.nn.modules.loss import _Loss

from monai.networks.layers import gaussian_1d, separable_filtering
from monai.utils import LossReduction
from monai.utils.module import look_up_option


def make_rectangular_kernel(kernel_size: int) -> torch.Tensor:
    return torch.ones(kernel_size)


def make_triangular_kernel(kernel_size: int) -> torch.Tensor:
    fsize = (kernel_size + 1) // 2
    if fsize % 2 == 0:
        fsize -= 1
    f = torch.ones((1, 1, fsize), dtype=torch.float).div(fsize)
    padding = (kernel_size - fsize) // 2 + fsize // 2
    return F.conv1d(f, f, padding=padding).reshape(-1)


def make_gaussian_kernel(kernel_size: int) -> torch.Tensor:
    sigma = torch.tensor(kernel_size / 3.0)
    kernel = gaussian_1d(sigma=sigma, truncated=kernel_size // 2, approx="sampled", normalize=False) * (
        2.5066282 * sigma
    )
    return kernel[:kernel_size]


kernel_dict = {
    "rectangular": make_rectangular_kernel,
    "triangular": make_triangular_kernel,
    "gaussian": make_gaussian_kernel,
}


[docs] class LocalNormalizedCrossCorrelationLoss(_Loss): """ Local squared zero-normalized cross-correlation. The loss is based on a moving kernel/window over the y_true/y_pred, within the window the square of zncc is calculated. The kernel can be a rectangular / triangular / gaussian window. The final loss is the averaged loss over all windows. Adapted from: https://github.com/voxelmorph/voxelmorph/blob/legacy/src/losses.py DeepReg (https://github.com/DeepRegNet/DeepReg) """
[docs] def __init__( self, spatial_dims: int = 3, kernel_size: int = 3, kernel_type: str = "rectangular", reduction: LossReduction | str = LossReduction.MEAN, smooth_nr: float = 0.0, smooth_dr: float = 1e-5, ) -> None: """ Args: spatial_dims: number of spatial dimensions, {``1``, ``2``, ``3``}. Defaults to 3. kernel_size: kernel spatial size, must be odd. kernel_type: {``"rectangular"``, ``"triangular"``, ``"gaussian"``}. Defaults to ``"rectangular"``. reduction: {``"none"``, ``"mean"``, ``"sum"``} Specifies the reduction to apply to the output. Defaults to ``"mean"``. - ``"none"``: no reduction will be applied. - ``"mean"``: the sum of the output will be divided by the number of elements in the output. - ``"sum"``: the output will be summed. smooth_nr: a small constant added to the numerator to avoid nan. smooth_dr: a small constant added to the denominator to avoid nan. """ super().__init__(reduction=LossReduction(reduction).value) self.ndim = spatial_dims if self.ndim not in {1, 2, 3}: raise ValueError(f"Unsupported ndim: {self.ndim}-d, only 1-d, 2-d, and 3-d inputs are supported") self.kernel_size = kernel_size if self.kernel_size % 2 == 0: raise ValueError(f"kernel_size must be odd, got {self.kernel_size}") _kernel = look_up_option(kernel_type, kernel_dict) self.kernel = _kernel(self.kernel_size) self.kernel.require_grads = False self.kernel_vol = self.get_kernel_vol() self.smooth_nr = float(smooth_nr) self.smooth_dr = float(smooth_dr)
def get_kernel_vol(self): vol = self.kernel for _ in range(self.ndim - 1): vol = torch.matmul(vol.unsqueeze(-1), self.kernel.unsqueeze(0)) return torch.sum(vol)
[docs] def forward(self, pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor: """ Args: pred: the shape should be BNH[WD]. target: the shape should be BNH[WD]. Raises: ValueError: When ``self.reduction`` is not one of ["mean", "sum", "none"]. """ if pred.ndim - 2 != self.ndim: raise ValueError(f"expecting pred with {self.ndim} spatial dimensions, got pred of shape {pred.shape}") if target.shape != pred.shape: raise ValueError(f"ground truth has differing shape ({target.shape}) from pred ({pred.shape})") t2, p2, tp = target * target, pred * pred, target * pred kernel, kernel_vol = self.kernel.to(pred), self.kernel_vol.to(pred) kernels = [kernel] * self.ndim # sum over kernel t_sum = separable_filtering(target, kernels=kernels) p_sum = separable_filtering(pred, kernels=kernels) t2_sum = separable_filtering(t2, kernels=kernels) p2_sum = separable_filtering(p2, kernels=kernels) tp_sum = separable_filtering(tp, kernels=kernels) # average over kernel t_avg = t_sum / kernel_vol p_avg = p_sum / kernel_vol # normalized cross correlation between t and p # sum[(t - mean[t]) * (p - mean[p])] / std[t] / std[p] # denoted by num / denom # assume we sum over N values # num = sum[t * p - mean[t] * p - t * mean[p] + mean[t] * mean[p]] # = sum[t*p] - sum[t] * sum[p] / N * 2 + sum[t] * sum[p] / N # = sum[t*p] - sum[t] * sum[p] / N # = sum[t*p] - sum[t] * mean[p] = cross # the following is actually squared ncc cross = tp_sum - p_avg * t_sum t_var = torch.max( t2_sum - t_avg * t_sum, torch.as_tensor(self.smooth_dr, dtype=t2_sum.dtype, device=t2_sum.device) ) p_var = torch.max( p2_sum - p_avg * p_sum, torch.as_tensor(self.smooth_dr, dtype=p2_sum.dtype, device=p2_sum.device) ) ncc: torch.Tensor = (cross * cross + self.smooth_nr) / (t_var * p_var) if self.reduction == LossReduction.SUM.value: return torch.sum(ncc).neg() # sum over the batch, channel and spatial ndims if self.reduction == LossReduction.NONE.value: return ncc.neg() if self.reduction == LossReduction.MEAN.value: return torch.mean(ncc).neg() # average over the batch, channel and spatial ndims raise ValueError(f'Unsupported reduction: {self.reduction}, available options are ["mean", "sum", "none"].')
[docs] class GlobalMutualInformationLoss(_Loss): """ Differentiable global mutual information loss via Parzen windowing method. Reference: https://dspace.mit.edu/handle/1721.1/123142, Section 3.1, equation 3.1-3.5, Algorithm 1 """
[docs] def __init__( self, kernel_type: str = "gaussian", num_bins: int = 23, sigma_ratio: float = 0.5, reduction: LossReduction | str = LossReduction.MEAN, smooth_nr: float = 1e-7, smooth_dr: float = 1e-7, ) -> None: """ Args: kernel_type: {``"gaussian"``, ``"b-spline"``} ``"gaussian"``: adapted from DeepReg Reference: https://dspace.mit.edu/handle/1721.1/123142, Section 3.1, equation 3.1-3.5, Algorithm 1. ``"b-spline"``: based on the method of Mattes et al [1,2] and adapted from ITK References: [1] "Nonrigid multimodality image registration" D. Mattes, D. R. Haynor, H. Vesselle, T. Lewellen and W. Eubank Medical Imaging 2001: Image Processing, 2001, pp. 1609-1620. [2] "PET-CT Image Registration in the Chest Using Free-form Deformations" D. Mattes, D. R. Haynor, H. Vesselle, T. Lewellen and W. Eubank IEEE Transactions in Medical Imaging. Vol.22, No.1, January 2003. pp.120-128. num_bins: number of bins for intensity sigma_ratio: a hyper param for gaussian function reduction: {``"none"``, ``"mean"``, ``"sum"``} Specifies the reduction to apply to the output. Defaults to ``"mean"``. - ``"none"``: no reduction will be applied. - ``"mean"``: the sum of the output will be divided by the number of elements in the output. - ``"sum"``: the output will be summed. smooth_nr: a small constant added to the numerator to avoid nan. smooth_dr: a small constant added to the denominator to avoid nan. """ super().__init__(reduction=LossReduction(reduction).value) if num_bins <= 0: raise ValueError("num_bins must > 0, got {num_bins}") bin_centers = torch.linspace(0.0, 1.0, num_bins) # (num_bins,) sigma = torch.mean(bin_centers[1:] - bin_centers[:-1]) * sigma_ratio self.kernel_type = look_up_option(kernel_type, ["gaussian", "b-spline"]) self.num_bins = num_bins self.kernel_type = kernel_type if self.kernel_type == "gaussian": self.preterm = 1 / (2 * sigma**2) self.bin_centers = bin_centers[None, None, ...] self.smooth_nr = float(smooth_nr) self.smooth_dr = float(smooth_dr)
def parzen_windowing( self, pred: torch.Tensor, target: torch.Tensor ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: if self.kernel_type == "gaussian": pred_weight, pred_probability = self.parzen_windowing_gaussian(pred) target_weight, target_probability = self.parzen_windowing_gaussian(target) elif self.kernel_type == "b-spline": # a third order BSpline kernel is used for the pred image intensity PDF. pred_weight, pred_probability = self.parzen_windowing_b_spline(pred, order=3) # a zero order (box car) BSpline kernel is used for the target image intensity PDF. target_weight, target_probability = self.parzen_windowing_b_spline(target, order=0) else: raise ValueError return pred_weight, pred_probability, target_weight, target_probability
[docs] def parzen_windowing_b_spline(self, img: torch.Tensor, order: int) -> tuple[torch.Tensor, torch.Tensor]: """ Parzen windowing with b-spline kernel (adapted from ITK) Args: img: the shape should be B[NDHW]. order: int. """ # Compute binsize for the histograms. # # The binsize for the image intensities needs to be adjusted so that # we can avoid dealing with boundary conditions using the cubic # spline as the Parzen window. We do this by increasing the size # of the bins so that the joint histogram becomes "padded" at the # borders. Because we are changing the binsize, # we also need to shift the minimum by the padded amount in order to # avoid minimum values filling in our padded region. # # Note that there can still be non-zero bin values in the padded region, # it's just that these bins will never be a central bin for the Parzen # window. _max, _min = torch.max(img), torch.min(img) padding = 2 bin_size = (_max - _min) / (self.num_bins - 2 * padding) norm_min = torch.div(_min, bin_size) - padding # assign bin/window index to each voxel window_term = torch.div(img, bin_size) - norm_min # B[NDHW] # make sure the extreme values are in valid (non-padded) bins window_term = torch.clamp(window_term, padding, self.num_bins - padding - 1) # B[NDHW] window_term = window_term.reshape(window_term.shape[0], -1, 1) # (batch, num_sample, 1) bins = torch.arange(self.num_bins, device=window_term.device).reshape(1, 1, -1) # (1, 1, num_bins) sample_bin_matrix = torch.abs(bins - window_term) # (batch, num_sample, num_bins) # b-spleen kernel # (4 - 6 * abs ** 2 + 3 * abs ** 3) / 6 when 0 <= abs < 1 # (2 - abs) ** 3 / 6 when 1 <= abs < 2 weight = torch.zeros_like(sample_bin_matrix, dtype=torch.float) # (batch, num_sample, num_bins) if order == 0: weight = weight + (sample_bin_matrix < 0.5) + (sample_bin_matrix == 0.5) * 0.5 elif order == 3: weight = ( weight + (4 - 6 * sample_bin_matrix**2 + 3 * sample_bin_matrix**3) * (sample_bin_matrix < 1) / 6 ) weight = weight + (2 - sample_bin_matrix) ** 3 * (sample_bin_matrix >= 1) * (sample_bin_matrix < 2) / 6 else: raise ValueError(f"Do not support b-spline {order}-order parzen windowing") weight = weight / torch.sum(weight, dim=-1, keepdim=True) # (batch, num_sample, num_bins) probability = torch.mean(weight, dim=-2, keepdim=True) # (batch, 1, num_bins) return weight, probability
[docs] def parzen_windowing_gaussian(self, img: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: """ Parzen windowing with gaussian kernel (adapted from DeepReg implementation) Note: the input is expected to range between 0 and 1 Args: img: the shape should be B[NDHW]. """ img = torch.clamp(img, 0, 1) img = img.reshape(img.shape[0], -1, 1) # (batch, num_sample, 1) weight = torch.exp( -self.preterm.to(img) * (img - self.bin_centers.to(img)) ** 2 ) # (batch, num_sample, num_bin) weight = weight / torch.sum(weight, dim=-1, keepdim=True) # (batch, num_sample, num_bin) probability = torch.mean(weight, dim=-2, keepdim=True) # (batch, 1, num_bin) return weight, probability
[docs] def forward(self, pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor: """ Args: pred: the shape should be B[NDHW]. target: the shape should be same as the pred shape. Raises: ValueError: When ``self.reduction`` is not one of ["mean", "sum", "none"]. """ if target.shape != pred.shape: raise ValueError(f"ground truth has differing shape ({target.shape}) from pred ({pred.shape})") wa, pa, wb, pb = self.parzen_windowing(pred, target) # (batch, num_sample, num_bin), (batch, 1, num_bin) pab = torch.bmm(wa.permute(0, 2, 1), wb.to(wa)).div(wa.shape[1]) # (batch, num_bins, num_bins) papb = torch.bmm(pa.permute(0, 2, 1), pb.to(pa)) # (batch, num_bins, num_bins) mi = torch.sum( pab * torch.log((pab + self.smooth_nr) / (papb + self.smooth_dr) + self.smooth_dr), dim=(1, 2) ) # (batch) if self.reduction == LossReduction.SUM.value: return torch.sum(mi).neg() # sum over the batch and channel ndims if self.reduction == LossReduction.NONE.value: return mi.neg() if self.reduction == LossReduction.MEAN.value: return torch.mean(mi).neg() # average over the batch and channel ndims raise ValueError(f'Unsupported reduction: {self.reduction}, available options are ["mean", "sum", "none"].')