python source code of multitask_gaussian

gpytorch-master
- .github
  - ISSUE_TEMPLATE
    - ---documentation-examples.md
    - ---bug-report.md
    - ---feature-request.md
    - ---refactor.md
- environment.yml
- examples
  - 04_Variational_and_Approximate_GPs
    - SVGP_Multitask_GP_Regression.ipynb
    - GP_Regression_with_Uncertain_Inputs.ipynb
    - README.rst
    - Non_Gaussian_Likelihoods.ipynb
    - index.rst
    - Modifying_the_variational_strategy_and_distribution.ipynb
    - Approximate_GP_Objective_Functions.ipynb
    - SVGP_Regression_CUDA.ipynb
  - 06_PyTorch_NN_Integration_DKL
    - KISSGP_Deep_Kernel_Regression_CUDA.ipynb
    - Deep_Kernel_Learning_DenseNet_CIFAR_Tutorial.ipynb
    - README.rst
    - densenet.py
    - index.rst
    - .gitignore
  - LBFGS.py
  - 05_Deep_Gaussian_Processes
    - Deep_Gaussian_Processes.ipynb
    - README.rst
    - index.rst
  - 07_Pyro_Integration
    - Cox_Process_Example.ipynb
    - Pyro_GPyTorch_High_Level.ipynb
    - README.rst
    - Clustered_Multitask_GP_Regression.ipynb
    - index.rst
    - Pyro_GPyTorch_Low_Level.ipynb
  - README.rst
  - 00_Basic_Usage
    - Hyperparameters.ipynb
    - README.rst
    - Saving_and_Loading_Models.ipynb
    - index.rst
  - 02_Scalable_Exact_GPs
    - Simple_GP_Regression_CUDA.ipynb
    - KeOps_GP_Regression.ipynb
    - Scalable_Kernel_Interpolation_for_Products_CUDA.ipynb
    - README.rst
    - Simple_MultiGPU_GP_Regression.ipynb
    - Exact_GP_Posterior_Sampling_with_CIQ.ipynb
    - Simple_GP_Regression_With_LOVE_Fast_Variances_and_Sampling.ipynb
    - index.rst
    - KISSGP_Regression.ipynb
    - SGPR_Regression_CUDA.ipynb
    - Grid_GP_Regression.ipynb
  - 01_Exact_GPs
    - GP_Regression_DistributionalKernel.ipynb
    - Spectral_Mixture_GP_Regression.ipynb
    - Simple_GP_Regression.ipynb
    - README.rst
    - index.rst
    - GP_Regression_Fully_Bayesian.ipynb
  - 03_Multitask_Exact_GPs
    - Hadamard_Multitask_GP_Regression.ipynb
    - Multitask_GP_Regression.ipynb
    - ModelList_GP_Regression.ipynb
    - README.rst
    - index.rst
    - Batch_Independent_Multioutput_GP.ipynb
  - index.rst
  - .gitignore
  - 08_Advanced_Usage
    - Simple_GP_Regression_Derivative_Information_2d.ipynb
    - TorchScript_Exact_Models.ipynb
    - README.rst
    - TorchScript_Variational_Models.ipynb
    - Simple_Batch_Mode_GP_Regression.ipynb
    - index.rst
    - Simple_GP_Regression_Derivative_Information_1d.ipynb
- readthedocs.yml
- .pre-commit-config.yaml
- LICENSE
- test
  - lazy
    - test_sum_lazy_tensor.py
    - test_psd_sum_lazy_tensor.py
    - test_cached_cg_lazy_tensor.py
    - test_kronecker_product_lazy_tensor.py
    - test_block_diag_lazy_tensor.py
    - test_sum_batch_lazy_tensor.py
    - test_lazy_evaluated_kernel_tensor.py
    - test_chol_lazy_tensor.py
    - test_constant_mul_lazy_tensor.py
    - test_interpolated_lazy_tensor.py
    - test_added_diag_lazy_tensor.py
    - test_cat_lazy_tensor.py
    - test_mul_lazy_tensor.py
    - test_root_lazy_tensor.py
    - test_block_interleaved_lazy_tensor.py
    - test_non_lazy_tensor.py
    - test_toeplitz_lazy_tensor.py
    - __init__.py
    - test_diag_lazy_tensor.py
    - test_matmul_lazy_tensor.py
    - test_batch_repeat_lazy_tensor.py
    - test_zero_lazy_tensor.py
  - functions
    - test_rbf_covariance.py
    - test_inv_quad_log_det.py
    - test_matern_covariance.py
    - test_log_normal_cdf.py
    - test_inv_matmul.py
    - test_inv_quad.py
    - __init__.py
    - test_matmul.py
    - test_dsmm.py
    - test_root_decomposition.py
  - utils
    - test_grid.py
    - test_sparse.py
    - test_getitem.py
    - test_pivoted_cholesky.py
    - test_quadrature.py
    - test_interpolation.py
    - test_toeplitz.py
    - test_fft.py
    - __init__.py
    - test_linear_cg.py
    - test_minres.py
    - test_lanczos.py
    - test_cholesky.py
  - kernels
    - test_gaussian_symmetrized_kl_kernel.py
    - test_additive_kernel.py
    - test_rff_kernel.py
    - test_rq_kernel.py
    - test_newton_girard_additive_kernel.py
    - test_scale_kernel.py
    - test_cosine_kernel.py
    - test_matern_kernel.py
    - test_cylindrical_kernel.py
    - test_polynomial_kernel_grad.py
    - test_linear_kernel.py
    - keops
      - test_matern_kernel.py
      - test_rbf_kernel.py
      - __init__.py
    - test_polynomial_kernel.py
    - test_grid_interpolation_kernel.py
    - test_rbf_kernel.py
    - test_arc_kernel.py
    - __init__.py
    - test_spectral_mixture_kernel.py
    - test_periodic_kernel.py
    - test_grid_kernel.py
    - test_rbf_kernel_grad.py
  - variational
    - test_independent_multitask_variational_strategy.py
    - test_batch_decoupled_variational_strategy.py
    - test_unwhitened_variational_strategy.py
    - test_whitened_variational_strategy.py
    - __init__.py
    - test_lmc_variational_strategy.py
    - test_orthogonally_decoupled_variational_strategy.py
    - test_variational_strategy.py
    - test_grid_interpolation_variational_strategy.py
  - priors
    - test_gamma_prior.py
    - test_horseshoe_prior.py
    - test_lkj_prior.py
    - test_multivariate_normal_prior.py
    - __init__.py
    - test_smoothed_box_prior.py
    - test_normal_prior.py
  - examples
    - test_spectral_mixture_gp_regression.py
    - test_pyro_integration.py
    - test_kronecker_multitask_ski_gp_regression.py
    - test_sgpr_regression.py
    - test_white_noise_regression.py
    - test_batch_multitask_gp_regression.py
    - test_decoupled_svgp_regression.py
    - test_simple_gp_classification.py
    - test_kronecker_multitask_gp_regression.py
    - test_svgp_gp_regression.py
    - test_batch_svgp_gp_regression.py
    - test_unwhitened_svgp_regression.py
    - test_svgp_gp_classification.py
    - test_hadamard_multitask_gp_regression.py
    - test_kissgp_dkl_regression.py
    - test_batch_decoupled_ppgpr_regression.py
    - test_batch_gp_regression.py
    - test_kissgp_white_noise_regression.py
    - test_simple_gp_regression.py
    - test_kissgp_kronecker_product_regression.py
    - old_variational_strategy_model.pth
    - test_kissgp_additive_regression.py
    - test_fixed_noise_fanatasy_updates.py
    - test_kissgp_gp_classification.py
    - test_lcm_kernel_regression.py
    - test_kissgp_additive_classification.py
    - __init__.py
    - test_kissgp_multiplicative_regression.py
    - test_lmc_svgp_regression.py
    - test_kissgp_variational_regression.py
    - test_independent_multitask_gp_regression.py
    - test_kissgp_gp_regression.py
    - test_grid_gp_regression.py
    - test_kissgp_kronecker_product_classification.py
    - test_model_list_gp_regression.py
  - distributions
    - test_multitask_multivariate_normal.py
    - test_multivariate_normal.py
    - test_delta.py
    - __init__.py
  - constraints
    - test_constraints.py
    - __init__.py
  - models
    - test_exact_gp.py
    - test_variational_gp.py
    - test_model_list.py
    - __init__.py
  - likelihoods
    - test_softmax_likelihood.py
    - test_general_multitask_gaussian_likelihood.py
    - test_bernoulli_likelihood.py
    - test_multitask_gaussian_likelihood.py
    - test_gaussian_likelihood.py
    - __init__.py
  - __init__.py
  - means
    - test_zero_mean.py
    - test_multitask_mean.py
    - test_constant_mean_grad.py
    - test_linear_mean.py
    - __init__.py
    - test_constant_mean.py
- .conda
  - meta.yaml
- gpytorch
  - lazy
    - added_diag_lazy_tensor.py
    - non_lazy_tensor.py
    - mul_lazy_tensor.py
    - cat_lazy_tensor.py
    - psd_sum_lazy_tensor.py
    - block_diag_lazy_tensor.py
    - interpolated_lazy_tensor.py
    - block_interleaved_lazy_tensor.py
    - matmul_lazy_tensor.py
    - zero_lazy_tensor.py
    - root_lazy_tensor.py
    - cached_cg_lazy_tensor.py
    - lazy_tensor_representation_tree.py
    - batch_repeat_lazy_tensor.py
    - kronecker_product_lazy_tensor.py
    - diag_lazy_tensor.py
    - __init__.py
    - lazy_tensor.py
    - block_lazy_tensor.py
    - keops_lazy_tensor.py
    - chol_lazy_tensor.py
    - toeplitz_lazy_tensor.py
    - constant_mul_lazy_tensor.py
    - lazy_evaluated_kernel_tensor.py
    - sum_batch_lazy_tensor.py
    - sum_lazy_tensor.py
  - module.py
  - functions
    - _inv_quad_log_det.py
    - _matmul.py
    - rbf_covariance.py
    - _sqrt_inv_matmul.py
    - matern_covariance.py
    - _dsmm.py
    - _inv_quad.py
    - _inv_matmul.py
    - __init__.py
    - _log_normal_cdf.py
    - _root_decomposition.py
  - utils
    - linear_cg.py
    - getitem.py
    - interpolation.py
    - contour_integral_quad.py
    - fft.py
    - grid.py
    - minres.py
    - deprecation.py
    - lanczos.py
    - __init__.py
    - warnings.py
    - stochastic_lq.py
    - errors.py
    - toeplitz.py
    - cholesky.py
    - sparse.py
    - transforms.py
    - memoize.py
    - pivoted_cholesky.py
    - broadcasting.py
    - quadrature.py
  - kernels
    - rq_kernel.py
    - lcm_kernel.py
    - spectral_mixture_kernel.py
    - matern_kernel.py
    - multi_device_kernel.py
    - multitask_kernel.py
    - additive_structure_kernel.py
    - newton_girard_additive_kernel.py
    - grid_interpolation_kernel.py
    - rbf_kernel.py
    - polynomial_kernel.py
    - grid_kernel.py
    - linear_kernel.py
    - kernel.py
    - product_structure_kernel.py
    - keops
      - matern_kernel.py
      - rbf_kernel.py
      - keops_kernel.py
      - __init__.py
    - gaussian_symmetrized_kl_kernel.py
    - index_kernel.py
    - cylindrical_kernel.py
    - cosine_kernel.py
    - rbf_kernel_grad.py
    - __init__.py
    - rff_kernel.py
    - scale_kernel.py
    - distributional_input_kernel.py
    - inducing_point_kernel.py
    - polynomial_kernel_grad.py
    - periodic_kernel.py
    - arc_kernel.py
  - variational
    - grid_interpolation_variational_strategy.py
    - batch_decoupled_variational_strategy.py
    - variational_strategy.py
    - independent_multitask_variational_strategy.py
    - _variational_distribution.py
    - additive_grid_interpolation_variational_strategy.py
    - mean_field_variational_distribution.py
    - cholesky_variational_distribution.py
    - unwhitened_variational_strategy.py
    - whitened_variational_strategy.py
    - delta_variational_distribution.py
    - _variational_strategy.py
    - __init__.py
    - lmc_variational_strategy.py
    - orthogonally_decoupled_variational_strategy.py
  - priors
    - horseshoe_prior.py
    - torch_priors.py
    - lkj_prior.py
    - prior.py
    - smoothed_box_prior.py
    - wishart_prior.py
    - __init__.py
    - utils.py
  - distributions
    - multitask_multivariate_normal.py
    - multivariate_normal.py
    - delta.py
    - distribution.py
    - __init__.py
  - constraints
    - constraints.py
    - __init__.py
  - test
    - model_test_case.py
    - base_likelihood_test_case.py
    - base_kernel_test_case.py
    - variational_test_case.py
    - lazy_tensor_test_case.py
    - __init__.py
    - utils.py
    - base_test_case.py
    - base_mean_test_case.py
  - models
    - model_list.py
    - exact_prediction_strategies.py
    - pyro
      - pyro_gp.py
      - _pyro_mixin.py
      - __init__.py
    - __init__.py
    - approximate_gp.py
    - exact_gp.py
    - gp.py
    - deep_gps
      - deep_gp.py
      - __init__.py
  - likelihoods
    - gaussian_likelihood.py
    - bernoulli_likelihood.py
    - likelihood_list.py
    - noise_models.py
    - softmax_likelihood.py
    - __init__.py
    - likelihood.py
    - multitask_gaussian_likelihood.py
  - mlls
    - _approximate_mll.py
    - noise_model_added_loss_term.py
    - sum_marginal_log_likelihood.py
    - marginal_log_likelihood.py
    - inducing_point_kernel_added_loss_term.py
    - gamma_robust_variational_elbo.py
    - predictive_log_likelihood.py
    - exact_marginal_log_likelihood.py
    - __init__.py
    - deep_approximate_mll.py
    - variational_elbo.py
    - added_loss_term.py
  - __init__.py
  - means
    - linear_mean.py
    - constant_mean_grad.py
    - zero_mean.py
    - mean.py
    - __init__.py
    - multitask_mean.py
    - constant_mean.py
  - settings.py
  - beta_features.py
- .pylintrc
- setup.py
- setup.cfg
- .travis.yml
- README.md
- .isort.cfg
- requirements.txt
- .gitignore
- docs
  - Makefile
  - source
    - functions.rst
    - distributions.rst
    - module.rst
    - constraints.rst
    - marginal_log_likelihoods.rst
    - beta_features.rst
    - lazy.rst
    - likelihoods.rst
    - means.rst
    - kernels.rst
    - variational.rst
    - utils.rst
    - priors.rst
    - settings.rst
    - index.rst
    - conf.py
    - models.rst
    - .gitignore
    - _static
      - .gitkeep
  - requirements.txt

#!/usr/bin/env python3

import warnings
from typing import Any

import torch
from torch import Tensor

from ..constraints import GreaterThan
from ..distributions import base_distributions
from ..functions import add_diag
from ..lazy import (
    BlockDiagLazyTensor,
    DiagLazyTensor,
    KroneckerProductLazyTensor,
    MatmulLazyTensor,
    RootLazyTensor,
    lazify,
)
from ..likelihoods import Likelihood, _GaussianLikelihoodBase
from ..utils.warnings import OldVersionWarning
from .noise_models import MultitaskHomoskedasticNoise


class _MultitaskGaussianLikelihoodBase(_GaussianLikelihoodBase):
    """Base class for multi-task Gaussian Likelihoods, supporting general heteroskedastic noise models. """

    def __init__(self, num_tasks, noise_covar, rank=0, task_correlation_prior=None, batch_shape=torch.Size()):
        """
        Args:
            num_tasks (int):
                Number of tasks.
            noise_covar (:obj:`gpytorch.module.Module`):
                A model for the noise covariance. This can be a simple homoskedastic noise model, or a GP
                that is to be fitted on the observed measurement errors.
            rank (int):
                The rank of the task noise covariance matrix to fit. If `rank` is set to 0, then a diagonal covariance
                matrix is fit.
            task_correlation_prior (:obj:`gpytorch.priors.Prior`):
                Prior to use over the task noise correlation matrix. Only used when `rank` > 0.
            batch_shape (torch.Size):
                Number of batches.
        """
        super().__init__(noise_covar=noise_covar)
        if rank != 0:
            if rank > num_tasks:
                raise ValueError(f"Cannot have rank ({rank}) greater than num_tasks ({num_tasks})")
            tidcs = torch.tril_indices(num_tasks, rank, dtype=torch.long)
            self.tidcs = tidcs[:, 1:]  # (1, 1) must be 1.0, no need to parameterize this
            task_noise_corr = torch.randn(*batch_shape, self.tidcs.size(-1))
            self.register_parameter("task_noise_corr", torch.nn.Parameter(task_noise_corr))
            if task_correlation_prior is not None:
                self.register_prior(
                    "MultitaskErrorCorrelationPrior", task_correlation_prior, lambda: self._eval_corr_matrix
                )
        elif task_correlation_prior is not None:
            raise ValueError("Can only specify task_correlation_prior if rank>0")
        self.num_tasks = num_tasks
        self.rank = rank
        # Handle deprecation of parameterization - TODO: Remove in future release
        self._register_load_state_dict_pre_hook(deprecate_task_noise_corr)

    def _eval_corr_matrix(self):
        tnc = self.task_noise_corr
        fac_diag = torch.ones(*tnc.shape[:-1], self.num_tasks, device=tnc.device, dtype=tnc.dtype)
        Cfac = torch.diag_embed(fac_diag)
        Cfac[..., self.tidcs[0], self.tidcs[1]] = self.task_noise_corr
        # squared rows must sum to one for this to be a correlation matrix
        C = Cfac / Cfac.pow(2).sum(dim=-1, keepdim=True).sqrt()
        return C @ C.transpose(-1, -2)

    def _shaped_noise_covar(self, base_shape, *params):
        if len(base_shape) >= 2:
            *batch_shape, n, _ = base_shape
        else:
            *batch_shape, n = base_shape

        # compute the noise covariance
        if len(params) > 0:
            shape = None
        else:
            shape = base_shape if len(base_shape) == 1 else base_shape[:-1]
        noise_covar = self.noise_covar(*params, shape=shape)

        if self.rank > 0:
            # if rank > 0, compute the task correlation matrix
            # TODO: This is inefficient, change repeat so it can repeat LazyTensors w/ multiple batch dimensions
            task_corr = self._eval_corr_matrix()
            exp_shape = torch.Size([*batch_shape, n]) + task_corr.shape[-2:]
            task_corr_exp = lazify(task_corr.unsqueeze(-3).expand(exp_shape))
            noise_sem = noise_covar.sqrt()
            task_covar_blocks = MatmulLazyTensor(MatmulLazyTensor(noise_sem, task_corr_exp), noise_sem)
        else:
            # otherwise tasks are uncorrelated
            task_covar_blocks = noise_covar

        if len(batch_shape) == 1:
            # TODO: Properly support general batch shapes in BlockDiagLazyTensor (no shape arithmetic)
            tcb_eval = task_covar_blocks.evaluate()
            task_covar = BlockDiagLazyTensor(lazify(tcb_eval), block_dim=-3)
        else:
            task_covar = BlockDiagLazyTensor(task_covar_blocks)

        return task_covar

    def forward(self, function_samples: Tensor, *params: Any, **kwargs: Any) -> base_distributions.Normal:
        noise = self._shaped_noise_covar(function_samples.shape, *params, **kwargs).diag()
        noise = noise.view(*noise.shape[:-1], *function_samples.shape[-2:])
        return base_distributions.Independent(base_distributions.Normal(function_samples, noise.sqrt()), 1)


class MultitaskGaussianLikelihood(_MultitaskGaussianLikelihoodBase):
    """
    A convenient extension of the :class:`gpytorch.likelihoods.GaussianLikelihood` to the multitask setting that allows
    for a full cross-task covariance structure for the noise. The fitted covariance matrix has rank `rank`.
    If a strictly diagonal task noise covariance matrix is desired, then rank=0 should be set. (This option still
    allows for a different `log_noise` parameter for each task.). This likelihood assumes homoskedastic noise.

    Like the Gaussian likelihood, this object can be used with exact inference.
    """

    def __init__(
        self,
        num_tasks,
        rank=0,
        task_correlation_prior=None,
        batch_shape=torch.Size(),
        noise_prior=None,
        noise_constraint=None,
    ):
        """
        Args:
            num_tasks (int): Number of tasks.

            rank (int): The rank of the task noise covariance matrix to fit. If `rank` is set to 0,
            then a diagonal covariance matrix is fit.

            task_correlation_prior (:obj:`gpytorch.priors.Prior`): Prior to use over the task noise correlaton matrix.
            Only used when `rank` > 0.

        """
        if noise_constraint is None:
            noise_constraint = GreaterThan(1e-4)

        noise_covar = MultitaskHomoskedasticNoise(
            num_tasks=num_tasks, noise_prior=noise_prior, noise_constraint=noise_constraint, batch_shape=batch_shape
        )
        super().__init__(
            num_tasks=num_tasks,
            noise_covar=noise_covar,
            rank=rank,
            task_correlation_prior=task_correlation_prior,
            batch_shape=batch_shape,
        )

        self.register_parameter(name="raw_noise", parameter=torch.nn.Parameter(torch.zeros(*batch_shape, 1)))
        self.register_constraint("raw_noise", noise_constraint)

    @property
    def noise(self):
        return self.raw_noise_constraint.transform(self.raw_noise)

    @noise.setter
    def noise(self, value):
        self._set_noise(value)

    def _set_noise(self, value):
        if not torch.is_tensor(value):
            value = torch.as_tensor(value).to(self.raw_noise)
        self.initialize(raw_noise=self.raw_noise_constraint.inverse_transform(value))

    def _shaped_noise_covar(self, base_shape, *params):
        noise_covar = super()._shaped_noise_covar(base_shape, *params)
        noise = self.noise
        return noise_covar.add_diag(noise)


class MultitaskGaussianLikelihoodKronecker(_MultitaskGaussianLikelihoodBase):
    """
    A convenient extension of the :class:`gpytorch.likelihoods.GaussianLikelihood` to the multitask setting that allows
    for a full cross-task covariance structure for the noise. The fitted covariance matrix has rank `rank`.
    If a strictly diagonal task noise covariance matrix is desired, then rank=0 should be set. (This option still
    allows for a different `noise` parameter for each task.)

    Like the Gaussian likelihood, this object can be used with exact inference.

    Note: This Likelihood is scheduled to be deprecated and replaced by an improved version of
    `MultitaskGaussianLikelihood`. Use this only for compatibility with batched Multitask models.
    """

    def __init__(
        self, num_tasks, rank=0, task_prior=None, batch_shape=torch.Size(), noise_prior=None, noise_constraint=None
    ):
        """
        Args:
            num_tasks (int): Number of tasks.

            rank (int): The rank of the task noise covariance matrix to fit. If `rank` is set to 0,
            then a diagonal covariance matrix is fit.

            task_prior (:obj:`gpytorch.priors.Prior`): Prior to use over the task noise covariance matrix if
            `rank` > 0, or a prior over the log of just the diagonal elements, if `rank` == 0.

        """
        super(Likelihood, self).__init__()
        if noise_constraint is None:
            noise_constraint = GreaterThan(1e-4)
        self.register_parameter(name="raw_noise", parameter=torch.nn.Parameter(torch.zeros(*batch_shape, 1)))
        if rank == 0:
            self.register_parameter(
                name="raw_task_noises", parameter=torch.nn.Parameter(torch.zeros(*batch_shape, num_tasks))
            )
            if task_prior is not None:
                raise RuntimeError("Cannot set a `task_prior` if rank=0")
        else:
            self.register_parameter(
                name="task_noise_covar_factor", parameter=torch.nn.Parameter(torch.randn(*batch_shape, num_tasks, rank))
            )
            if task_prior is not None:
                self.register_prior("MultitaskErrorCovariancePrior", task_prior, self._eval_covar_matrix)
        self.num_tasks = num_tasks
        self.rank = rank

        self.register_constraint("raw_noise", noise_constraint)

    @property
    def noise(self):
        return self.raw_noise_constraint.transform(self.raw_noise)

    @noise.setter
    def noise(self, value):
        self._set_noise(value)

    def _set_noise(self, value):
        self.initialize(raw_noise=self.raw_noise_constraint.inverse_transform(value))

    def _eval_covar_matrix(self):
        covar_factor = self.task_noise_covar_factor
        noise = self.noise
        D = noise * torch.eye(self.num_tasks, dtype=noise.dtype, device=noise.device)
        return covar_factor.matmul(covar_factor.transpose(-1, -2)) + D

    def marginal(self, function_dist, *params, **kwargs):
        r"""
        Adds the task noises to the diagonal of the covariance matrix of the supplied
        :obj:`gpytorch.distributions.MultivariateNormal` or :obj:`gpytorch.distributions.MultitaskMultivariateNormal`,
        in case of `rank` == 0. Otherwise, adds a rank `rank` covariance matrix to it.

        To accomplish this, we form a new :obj:`gpytorch.lazy.KroneckerProductLazyTensor` between :math:`I_{n}`,
        an identity matrix with size equal to the data and a (not necessarily diagonal) matrix containing the task
        noises :math:`D_{t}`.

        We also incorporate a shared `noise` parameter from the base
        :class:`gpytorch.likelihoods.GaussianLikelihood` that we extend.

        The final covariance matrix after this method is then :math:`K + D_{t} \otimes I_{n} + \sigma^{2}I_{nt}`.

        Args:
            function_dist (:obj:`gpytorch.distributions.MultitaskMultivariateNormal`): Random variable whose covariance
                matrix is a :obj:`gpytorch.lazy.LazyTensor` we intend to augment.
        Returns:
            :obj:`gpytorch.distributions.MultitaskMultivariateNormal`: A new random variable whose covariance
            matrix is a :obj:`gpytorch.lazy.LazyTensor` with :math:`D_{t} \otimes I_{n}` and :math:`\sigma^{2}I_{nt}`
            added.
        """
        mean, covar = function_dist.mean, function_dist.lazy_covariance_matrix

        if self.rank == 0:
            task_noises = self.raw_noise_constraint.transform(self.raw_task_noises)
            task_var_lt = DiagLazyTensor(task_noises)
            dtype, device = task_noises.dtype, task_noises.device
        else:
            task_noise_covar_factor = self.task_noise_covar_factor
            task_var_lt = RootLazyTensor(task_noise_covar_factor)
            dtype, device = task_noise_covar_factor.dtype, task_noise_covar_factor.device

        eye_lt = DiagLazyTensor(
            torch.ones(*covar.batch_shape, covar.size(-1) // self.num_tasks, dtype=dtype, device=device)
        )
        task_var_lt = task_var_lt.expand(*covar.batch_shape, *task_var_lt.matrix_shape)

        covar_kron_lt = KroneckerProductLazyTensor(eye_lt, task_var_lt)
        covar = covar + covar_kron_lt

        noise = self.noise
        covar = add_diag(covar, noise)
        return function_dist.__class__(mean, covar)


def deprecate_task_noise_corr(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs):
    if prefix + "task_noise_corr_factor" in state_dict:
        # Remove after 1.0
        warnings.warn(
            "Loading a deprecated parameterization of _MultitaskGaussianLikelihoodBase. Consider re-saving your model.",
            OldVersionWarning,
        )
        # construct the task correlation matrix from the factors using the old parameterization
        corr_factor = state_dict.pop(prefix + "task_noise_corr_factor").squeeze(0)
        corr_diag = state_dict.pop(prefix + "task_noise_corr_diag").squeeze(0)
        num_tasks, rank = corr_factor.shape[-2:]
        M = corr_factor.matmul(corr_factor.transpose(-1, -2))
        idx = torch.arange(M.shape[-1], dtype=torch.long, device=M.device)
        M[..., idx, idx] += corr_diag
        sem_inv = 1 / torch.diagonal(M, dim1=-2, dim2=-1).sqrt().unsqueeze(-1)
        C = M * sem_inv.matmul(sem_inv.transpose(-1, -2))
        # perform a Cholesky decomposition and extract the required entries
        L = torch.cholesky(C)
        tidcs = torch.tril_indices(num_tasks, rank)[:, 1:]
        task_noise_corr = L[..., tidcs[0], tidcs[1]]
        state_dict[prefix + "task_noise_corr"] = task_noise_corr