Source code for monai.losses.spectral_loss

# Copyright (c) MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import torch
import torch.nn.functional as F
from torch.fft import fftn
from torch.nn.modules.loss import _Loss

from monai.utils import LossReduction



[docs]
class JukeboxLoss(_Loss):
    """
    Calculate spectral component based on the magnitude of Fast Fourier Transform (FFT).

    Based on:
        Dhariwal, et al. 'Jukebox: A generative model for music.' https://arxiv.org/abs/2005.00341

    Args:
        spatial_dims: number of spatial dimensions.
        fft_signal_size: signal size in the transformed dimensions. See torch.fft.fftn() for more information.
        fft_norm: {``"forward"``, ``"backward"``, ``"ortho"``} Specifies the normalization mode in the fft. See
            torch.fft.fftn() for more information.

        reduction: {``"none"``, ``"mean"``, ``"sum"``}
            Specifies the reduction to apply to the output. Defaults to ``"mean"``.

            - ``"none"``: no reduction will be applied.
            - ``"mean"``: the sum of the output will be divided by the number of elements in the output.
            - ``"sum"``: the output will be summed.
    """

    def __init__(
        self,
        spatial_dims: int,
        fft_signal_size: tuple[int] | None = None,
        fft_norm: str = "ortho",
        reduction: LossReduction | str = LossReduction.MEAN,
    ) -> None:
        super().__init__(reduction=LossReduction(reduction).value)

        self.spatial_dims = spatial_dims
        self.fft_signal_size = fft_signal_size
        self.fft_dim = tuple(range(1, spatial_dims + 2))
        self.fft_norm = fft_norm


[docs]
    def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        input_amplitude = self._get_fft_amplitude(target)
        target_amplitude = self._get_fft_amplitude(input)

        # Compute distance between amplitude of frequency components
        # See Section 3.3 from https://arxiv.org/abs/2005.00341
        loss = F.mse_loss(target_amplitude, input_amplitude, reduction="none")

        if self.reduction == LossReduction.MEAN.value:
            loss = loss.mean()
        elif self.reduction == LossReduction.SUM.value:
            loss = loss.sum()
        elif self.reduction == LossReduction.NONE.value:
            pass

        return loss


    def _get_fft_amplitude(self, images: torch.Tensor) -> torch.Tensor:
        """
        Calculate the amplitude of the fourier transformations representation of the images

        Args:
            images: Images that are to undergo fftn

        Returns:
            fourier transformation amplitude
        """
        img_fft = fftn(images, s=self.fft_signal_size, dim=self.fft_dim, norm=self.fft_norm)

        amplitude = torch.sqrt(torch.real(img_fft) ** 2 + torch.imag(img_fft) ** 2)

        return amplitude