Source code for loss

"""
This module provides loss functions frequently encountered in the literature
on high dimensional covariance matrix estimation.
"""

import numpy as np
from hfhd import hd


[docs]def prial(S_list, sigma_hat_list, sigma, loss_func=None): r""" The percentage relative improvement in average loss (PRIAL) over the sample covariance matrix. Parameters ---------- S_list : list of numpy.ndarray The sample covariance matrix. sigma_hat_list : list of numpy.ndarray The covariance matrix estimate using the estimator of interest. sigma : numpy.ndarray The (true) population covariance matrix. loss_func : function, defualt = None The loss function. If ``None`` the minimum variance loss function is used. Returns ------- prial : float The PRIAL. Notes ----- The percentage relative improvement in average loss (PRIL) over the sample covariance matrix is given by: .. math:: \mathrm{PRIAL}_{n}\left(\widehat{\Sigma}_{n}\right):= \frac{\mathbb{E}\left[\mathcal{L}_{n}\left(S_{n}, \Sigma_{n}\right)\right]-\mathbb{E}\left[\mathcal{L}_{n} \left(\widehat{\Sigma}_{n}, \Sigma_{n}\right)\right]} {\mathbb{E}\left[\mathcal{L}_{n}\left(S_{n}, \Sigma_{n}\right)\right]-\mathbb{E}\left[\mathcal{L}_{n} \left(S_{n}^{*}, \Sigma_{n}\right)\right]} \times 100 \% """ if loss_func is None: loss_func = loss_mv mean_loss_S = np.mean([loss_func(S, sigma) for S in S_list], axis=0) mean_loss_sigma_hat = np.mean([loss_func(sigma_hat, sigma) for sigma_hat in sigma_hat_list], axis=0) mean_loss_fsopt = np.mean([loss_func(hd.fsopt(S, sigma), sigma) for S in S_list], axis=0) denom = mean_loss_S - mean_loss_fsopt num = mean_loss_S - mean_loss_sigma_hat if denom != 0: prial = num / denom * 100 else: raise ValueError("""PRIAL not defined: The sample covariance attained the smallest possible loss.""") return prial
[docs]def loss_mv(sigma_hat, sigma): r""" The minimum variance loss function of Ledoit and Wolf (2018). Parameters ---------- sigma_hat : numpy.ndarray The covariance matrix estimate using the estimator of interest. sigma : numpy.ndarray The (true) population covariance matrix. Returns ------- out : float The minimum variance loss. Notes ----- The minimum variance (MV)-loss function is proposed by Engle et al. (2019) as a loss function that is appropriate for covariance matrix estimator evaluation for the problem of minimum variance portfolio allocations under a linear constraint and large-dimensional asymptotic theory. The loss function is given by: .. math:: \mathcal{L}_{n}^{\mathrm{MV}}\left(\widehat{\Sigma}_{n}, \Sigma_{n}\right):=\frac{\operatorname{Tr}\left(\widehat{\Sigma}_{n}^{-1} \Sigma_{n} \widehat{\Sigma}_{n}^{-1}\right) / p} {\left[\operatorname{Tr}\left(\widehat{\Sigma}_{n}^{-1}\right) /p\right]^{2}}-\frac{1}{\operatorname{Tr}\left(\Sigma_{n}^{-1}\right)/p}. It can be interpreted as the true variance of the minimum variance portfolio constructed from the estimated covariance matrix. """ p = sigma.shape[0] sigma_hat_inv = np.linalg.inv(sigma_hat) sigma_inv = np.linalg.inv(sigma) num = np.trace(sigma_hat_inv @ sigma @ sigma_hat_inv) / p denom = (np.trace(sigma_hat_inv) / p) ** 2 return num / denom - (np.trace(sigma_inv) / p)
[docs]def loss_fr(sigma_hat, sigma): r"""Squared Frobenius norm scaled by 1/p. Same as ``np.linalg.norm(sigma_hat - sigma, 'fro')**2 *1/p``. Parameters ---------- sigma_hat : numpy.ndarray The covariance matrix estimate using the estimator of interest. sigma : numpy.ndarray The (true) population covariance matrix. Returns ------- out : float The minimum variance loss. Notes ----- The loss function is given by: .. math:: \mathcal{L}_{n}^{\mathrm{FR}}\left(\widehat{\Sigma}_{n}, \Sigma_{n}\right):=\frac{1}{p} \operatorname{Tr}\left[\left(\widehat{\Sigma}_{n} -\Sigma_{n}\right)^{2}\right] """ p = sigma.shape[0] delta = sigma_hat - sigma return np.trace(delta @ delta) / p
[docs]def marchenko_pastur(x, c, sigma_sq): r""" The Marchenko-Pastur distribution. This is the pdf of eigenvalues of a sample covariance matrix estimate of the true covariance matrix, which is a``sigma_sq`` scaled identity matrix. It depends on the concentration ratio ``c``, which is the ratio of the dimension divided by the number of observations. Parameters ---------- x : float The value of the sample eigenvalue. c : float The concentration ratio. $c=p/n$. sigma_sq : float The value of population eigenvalues. Returns ------- p : float The value of the Marchenko-Pastur distribution at the sample eigenvalue ``x``. Notes ----- The Marchenko-Pastur law states that the limiting spectrum of the sample covariance matrix $S = {X 'X}/n$ of independent and identically distributed $p$-dimensional random vectors $\mathbf{X}=\left(x_{1}, \ldots, x_{n}\right)$ with mean $\mathbf{0}$ and covariance matrix $\mathbf{\Sigma}=\sigma^{2} \mathbf{I}_{p}$, has density \begin{equation} f_{c}(x)=\left\{\begin{array}{ll} \frac{1}{2 \pi x c \sigma^{2}} \sqrt{(b-x)(x-a)}, & a \leq x \leq b \\ 0, & \text { otherwise, } \end{array}\right. \end{equation} where the smallest and the largest eigenvalues are given by $a=\sigma^{2}(1-\sqrt{c})^{2}$ and $b=\sigma^{2}(1+\sqrt{c})^{2}$, respectively, as $p, n \rightarrow \infty$ with $p / n \rightarrow c>0$. References ---------- Marchenko, V. A. and Pastur, L. A. (1967). Distribution of eigenvalues for some sets of random matrices, Matematicheskii Sbornik 114(4): 507–536. """ a = sigma_sq*(1-np.sqrt(c))**2 b = sigma_sq*(1+np.sqrt(c))**2 if a <= x <= b: p = 1/(2*np.pi*x*c*sigma_sq)*np.sqrt((b-x)*(x-a)) else: p = 0 return p