Source code for monai.losses.spectral_loss
# Copyright (c) MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import torch
import torch.nn.functional as F
from torch.fft import fftn
from torch.nn.modules.loss import _Loss
from monai.utils import LossReduction
[docs]
class JukeboxLoss(_Loss):
"""
Calculate spectral component based on the magnitude of Fast Fourier Transform (FFT).
Based on:
Dhariwal, et al. 'Jukebox: A generative model for music.' https://arxiv.org/abs/2005.00341
Args:
spatial_dims: number of spatial dimensions.
fft_signal_size: signal size in the transformed dimensions. See torch.fft.fftn() for more information.
fft_norm: {``"forward"``, ``"backward"``, ``"ortho"``} Specifies the normalization mode in the fft. See
torch.fft.fftn() for more information.
reduction: {``"none"``, ``"mean"``, ``"sum"``}
Specifies the reduction to apply to the output. Defaults to ``"mean"``.
- ``"none"``: no reduction will be applied.
- ``"mean"``: the sum of the output will be divided by the number of elements in the output.
- ``"sum"``: the output will be summed.
"""
def __init__(
self,
spatial_dims: int,
fft_signal_size: tuple[int] | None = None,
fft_norm: str = "ortho",
reduction: LossReduction | str = LossReduction.MEAN,
) -> None:
super().__init__(reduction=LossReduction(reduction).value)
self.spatial_dims = spatial_dims
self.fft_signal_size = fft_signal_size
self.fft_dim = tuple(range(1, spatial_dims + 2))
self.fft_norm = fft_norm
[docs]
def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
input_amplitude = self._get_fft_amplitude(target)
target_amplitude = self._get_fft_amplitude(input)
# Compute distance between amplitude of frequency components
# See Section 3.3 from https://arxiv.org/abs/2005.00341
loss = F.mse_loss(target_amplitude, input_amplitude, reduction="none")
if self.reduction == LossReduction.MEAN.value:
loss = loss.mean()
elif self.reduction == LossReduction.SUM.value:
loss = loss.sum()
elif self.reduction == LossReduction.NONE.value:
pass
return loss
def _get_fft_amplitude(self, images: torch.Tensor) -> torch.Tensor:
"""
Calculate the amplitude of the fourier transformations representation of the images
Args:
images: Images that are to undergo fftn
Returns:
fourier transformation amplitude
"""
img_fft = fftn(images, s=self.fft_signal_size, dim=self.fft_dim, norm=self.fft_norm)
amplitude = torch.sqrt(torch.real(img_fft) ** 2 + torch.imag(img_fft) ** 2)
return amplitude