Source code for hd

"""This module provides functions to estimate covariance matrices in
high dimension, i.e., when the concentration ratio is not small or even greater
than one. """

import numpy as np
import warnings
import numba
from hfhd import hf
import datetime


[docs]def fsopt(S, sigma): r""" The infeasible finite sample optimal rotation equivariant covariance matrix estimator of Ledoit and Wolf (2018). Parameters ---------- S : numpy.ndarray The sample covariance matrix. sigma : numpy.ndarray The (true) population covariance matrix. Returns ------- out : numpy.ndarray The finite sample optimal rotation equivariant covariance matrix estimate. Notes ----- This estimator is given by .. math:: S_{n}^{*}:=\sum_{i=1}^{p} d_{n, i}^{*} \cdot u_{n, i} u_{n, i}^{\prime} =\sum_{i=1}^{p}\left(u_{n, i}^{\prime} \Sigma_{n} u_{n, i}\right) \cdot u_{n, i} u_{n, i}^{\prime}, where $\left[u_{n, 1} \ldots u_{n, p}\right]$ are the sample eigenvectors. References ---------- Ledoit, O. and Wolf, M. (2018). Analytical nonlinear shrinkage of large-dimensional covariance matrices, University of Zurich, Department of Economics, Working Paper (264). """ lambd, u = np.linalg.eigh(S) d = np.einsum("ji, jk, ki -> i", u, sigma, u) return u @ np.diag(d) @ u.T
[docs]def linear_shrinkage(X): r""" The linear shrinkage estimator of Ledoit and Wolf (2004). The observations need to be synchronized and i.i.d. Parameters ---------- X : numpy.ndarray, shape = (p, n) A sample of synchronized log-returns. Returns ------- shrunk_cov : numpy.ndarray The linearly shrunk covariance matrix. Examples -------- >>> np.random.seed(0) >>> n = 5 >>> p = 5 >>> X = np.random.multivariate_normal(np.zeros(p), np.eye(p), n) >>> cov = linear_shrinkage(X.T) >>> cov array([[1.1996234, 0. , 0. , 0. , 0. ], [0. , 1.1996234, 0. , 0. , 0. ], [0. , 0. , 1.1996234, 0. , 0. ], [0. , 0. , 0. , 1.1996234, 0. ], [0. , 0. , 0. , 0. , 1.1996234]]) Notes ----- The most ubiquitous estimator of the rotation equivariant type is perhaps the linear shrinkage estimator of Ledoit and Wolf (2004). These authors propose a weighted average of the sample covariance matrix and the identity (or some other highly structured) matrix that is scaled such that the trace remains the same. The weight, or shrinkage intensity, $\rho$ is chosen such that the squared Frobenius loss is minimized by inducing bias but reducing variance. In Theorem 1, the authors show that the optimal $\rho$ is given by \begin{equation} \rho=\beta^{2} /\left(\alpha^{2}+\beta^{2}\right)=\beta^{2} / \delta^{2}, \end{equation} where $\mu=\operatorname{tr}(\mathbf{\Sigma})/p$, $\alpha^{2}= \|\mathbf{\Sigma}-\mu \mathbf{I}_p\|_{F}^{2}$, $\beta^{2}= E\left[\|\mathbf{S}-\mathbf{\Sigma}\|_{F}^{2}\right]$, and $\delta^{2}=E\left[\|\mathbf{S}-\mu \mathbf{I}_p\|_{F}^{2}\right]$. $\beta^{2} / \delta^{2}$ can be interpreted as a normalized measure of the error of the sample covariance matrix. Shrinking the covariance matrix towards the $\mu$-scaled identity matrix has the effect of pulling the eigenvalues towards $\mu$. This reduces eigenvalue dispersion. In other words, the elements of the diagonal matrix in the rotation equivariant form are chosen as \begin{equation} \widehat{\mathbf{\delta}}^{(l, o)}:=\left(\widehat{d}_{1}^{(l, o)}, \ldots, \widehat{d}_{p}^{(l, o)}\right)=\left(\rho \mu+(1-\rho) \lambda_{1}, \ldots, \rho \mu+(1-\rho) \lambda_{p}\right). \end{equation} In the current form it is still an oracle estimator since it depends on ] unobservable quantities. To make it a feasible estimator, these quantities have to be estimated. To this end, define the grand mean as \begin{equation} \label{eqn: grand mean} \bar{\lambda}:=\frac{1}{p} \sum_{i=1}^{p} \lambda_{i}, \end{equation} The estimator for $\rho$ is given by \begin{equation} \widehat{\rho} : = \frac{b^{2}}{d^{2}}, \end{equation} where $d^{2}=\left\|\mathbf{S}-\bar{\lambda} \mathbf{I}_{p} \right\|_{F}^{2}$ and $b^{2}=\min \left(\bar{b}^{2}, d^{2}\right)$ with $\bar{b}^{2}=\frac{1}{n^{2}} \sum_{k=1}^{n}\left\|\mathbf{x}_{k} \left(\mathbf{x}_{k}\right)^{\prime}-\mathbf{S}\right\|_{F}^{2}$, where $\mathbf{x}_{k}$ denotes the $k$th column of the observation matrix $\mathbf{X}$ for $k=1, \ldots, n$. In order for this estimator to be consistent, the assumption that $\mathbf{X}$ is i.i.d with finite fourth moments must be satisfied. The feasible linear shrinkage estimator is then of form rotation equivatiant form with the elements of the diagonal chosen as \begin{equation} \widehat{\mathbf{\delta}}^{(l)}:=\left(\widehat{d}_{{1}}^{(l)}, \ldots, \widehat{d}_{p}^{(l)}\right)=\left( \widehat{\rho} \bar{\lambda}+ (1- \widehat{\rho}) \lambda_{1}, \ldots, \widehat{\rho} \bar{\lambda}+ (1- \widehat{\rho}) \lambda_{p}\right). \end{equation} Hence, the linear shrinkage estimator is given by \begin{equation} \widehat{\mathbf{S}}:=\sum_{i=1}^{p} \widehat{d}_{i}^{(l)} \mathbf{u}_{i} \mathbf{u}_{i}^{\prime} \end{equation} The result is a biased but well-conditioned covariance matrix estimate. References ---------- Ledoit, O. and Wolf, M. (2004). A well-conditioned estimator for large-dimensional covariance matrices, Journal of Multivariate Analysis 88(2): 365–411. """ S = np.cov(X) rho_hat = _linear_shrinkage_intensity(X, S) shrunk_cov = _linear_shrinkage_cov(S, rho_hat) return shrunk_cov
def _linear_shrinkage_cov(S, rho=0.1): """ Linearly shrink a sample covariance matrix with shrinkage intensity ``rho``. Parameters ---------- S : numpy.ndarray, shape = (p, p) The sample covariance matrix. rho : float, 0 <= rho <= 1 The shrinkage intensity. Returns ------- shrunk_cov : numpy.ndarray The shrunk covariance matrix. Examples -------- >>> np.random.seed(0) >>> n = 5 >>> p = 5 >>> X = np.random.multivariate_normal(np.zeros(p), np.eye(p), n) >>> S = np.cov(X.T) >>> cov = _linear_shrinkage_cov(S, 0.1) >>> cov array([[ 2.45653147, 0.02090578, 0.06479169, 1.47201982, -0.46265304], [ 0.02090578, 0.32973086, -0.13795285, -0.1922658 , -0.47424349], [ 0.06479169, -0.13795285, 0.42122102, 0.17390622, 0.535665 ], [ 1.47201982, -0.1922658 , 0.17390622, 1.25079368, 0.16427303], [-0.46265304, -0.47424349, 0.535665 , 0.16427303, 1.53983998]]) """ p = S.shape[0] lambda_bar = np.trace(S) / p shrunk_cov = rho * lambda_bar * np.eye(p) + (1 - rho) * S return shrunk_cov def _linear_shrinkage_intensity(X, S): """ Compute the optimal linear shrinkage intensity given a sample and a covariance matrix estimate according to Ledoit and Wolf (2004). Parameters ---------- X : numpy.ndarray, shape = (p, n) A sample of synchronized log-returns. S : numpy.ndarray, shape = (p, p) A sample covariance matrix of synchronized log-returns. Returns ------- rho_hat : numpy.ndarray The estimate of the optimal linear shrinkage intensity. Notes ----- Assumption: independent and identically distributed observations with finite fourth moments. """ p, n = X.shape evalues = np.linalg.eigvalsh(S) lambd = np.mean(evalues) temp = [np.linalg.norm(np.outer(X[:, i], X[:, i]) - S, 'fro')**2 for i in range(n)] b_bar_sq = np.sum(temp)/n**2 d_sq = np.linalg.norm(S - lambd*np.eye(p), 'fro')**2 b_sq = np.minimum(b_bar_sq, d_sq) rho_hat = b_sq/d_sq return rho_hat
[docs]def linear_shrink_target(cov, target, step=0.05, max_iter=100): r""" Linearly shrink a covariance matrix until a condition number target is reached. Useful for reducing the impact of outliers in :func:`nerive`. Parameters ---------- cov : numpy.ndarray, shape = (p, p) The covariance matrix. target: float > 1 The highest acceptable condition number. step : float > 0 The linear shrinkage parameter for each step. max_iter : int > 1 The maximum number of iterations until giving up. Returns ------- cov : numpy.ndarray, shape = (p, p) The linearly shrunk covariance matrix estimate. """ assert target > 1, "Target cond must be greater 1." for _ in range(max_iter): cond = np.linalg.cond(cov) if cond <= target: break cov = _linear_shrinkage_cov(cov, step) return cov
[docs]def nonlinear_shrinkage(X): r""" Compute the shrunk sample covariance matrix with the analytic nonlinear shrinkage formula of Ledoit and Wolf (2018). This estimator shrinks the sample sample covariance matrix with :func:`~_nonlinear_shrinkage_cov`. The code has been adapted from the Matlab implementation provided by the authors in Appendix D. Parameters ---------- x : numpy.ndarray, shape = (p, n) A sample of synchronized log-returns. Returns ------- sigmatilde : numpy.ndarray, shape = (p, p) The nonlinearly shrunk covariance matrix estimate. Examples -------- >>> np.random.seed(0) >>> n = 13 >>> p = 6 >>> X = np.random.multivariate_normal(np.zeros(p), np.eye(p), n) >>> nonlinear_shrinkage(X.T) array([[ 1.50231589e+00, -2.49140874e-01, 2.68050353e-01, 2.69052962e-01, 3.42958216e-01, -1.51487901e-02], [-2.49140874e-01, 1.05011440e+00, -1.20681859e-03, -1.25414579e-01, -1.81604754e-01, 4.38535891e-02], [ 2.68050353e-01, -1.20681859e-03, 1.02797073e+00, 1.19235516e-01, 1.03335603e-01, 8.58533018e-02], [ 2.69052962e-01, -1.25414579e-01, 1.19235516e-01, 1.03290514e+00, 2.18096913e-01, 5.63011351e-02], [ 3.42958216e-01, -1.81604754e-01, 1.03335603e-01, 2.18096913e-01, 1.22086494e+00, 1.07255380e-01], [-1.51487901e-02, 4.38535891e-02, 8.58533018e-02, 5.63011351e-02, 1.07255380e-01, 1.07710975e+00]]) Notes ----- The idea of shrinkage covariance estimators is further developed by Ledoit and Wolf (2012) who argue that given a sample of size $\mathcal{O}(p^2)$, estimating $\mathcal{O}(1)$ parameters, as in linear shrinkage, is precise but too restrictive, while estimating $\mathcal{O}(p^2)$ parameters, as in the the sample covariance matrix, is impossible. They argue that the optimal number of parameters to estimate is $\mathcal{O}(p)$. Their proposed non-linear shrinkage estimator uses exactly $p$ parameters, one for each eigenvalue, to regularize each eigenvalue with a specific shrinkage intensity individually. Linear shrinkage, in contrast, is restricted by a single shrinkage intensity with which all eigenvalues are shrunk uniformly. Nonlinear shrinkage enables a nonlinear fit of the shrunk eigenvalues, which is appropriate when there are clusters of eigenvalues. In this case, it may be optimal to pull a small eigenvalue (i.e., an eigenvalue that is below the grand mean) further downwards and hence further away from the grand mean. Linear shrinkage, in contrast, always pulls a small eigenvalue upwards. Ledoit and Wolf (2018) find an analytic formula based on the Hilbert transform that nonlinearly shrinks eigenvalues asymptotically optimally with respect to the MV-loss function (as well as the quadratic Frobenius loss). The shrinkage function via the Hilbert transform can be interpreted as a local attractor. Much like the gravitational field extended into space by massive objects, eigenvalue clusters exert an attraction force that increases with the mass (i.e. the number of eigenvalues in the cluster) and decreases with the distance. If an eigenvalue $\lambda_i$ has many neighboring eigenvalues slightly smaller than itself, the exerted force on $\lambda_i$ will have large magnitude and downward direction. If $\lambda_i$ has many neighboring eigenvalues slightly larger than itself, the exerted force on $\lambda_i$ will also have large magnitude but upward direction. If the neighboring eigenvalues are much larger or much smaller than $\lambda_i$ the magnitude of the force on $\lambda_i$ will be small. The nonlinear effect this has on the shrunk eigenvalues can be seen in the figure below. The linearly shrunk eigenvalues, on the other hand, follow a line. Both approaches reduce the dispersion of eigenvalues and hence deserve the name shrinkage. .. image:: ../img/nls_fit.png The authors assume that there exists a $n \times p$ matrix $\mathbf{Z}$ of i.i.d. random variables with mean zero, variance one, and finite 16th moment such that the matrix of observations $\mathbf{X}:=\mathbf{Z} \mathbf{\Sigma}^{1/2}$. Neither $\mathbf{\Sigma}^{1/2}$ nor $\mathbf{Z}$ are observed on their own. This assumption might not be satisfied if the data generating process is a factor model. Use :func:`~nercome` or :func:`~nerive` if you believe the assumption is in your dataset violated. Theorem 3.1 of Ledoit and Wolf (2018) states that under their assumptions and general asymptotics the MV-loss is minimized by a rotation equivariant covariance estimator, where the elements of the diagonal matrix are \begin{equation} \label{eqn: oracle diag} \widehat{\mathbf{\delta}}^{(o, nl)}:=\left(\widehat{d}_{1}^{(o, nl)}, \ldots, \widetilde{d}_{p}^{(o, nl)}\right):=\left(d^{(o, nl)}\left( \lambda_{1}\right), \ldots, d^{(o, nl)}\left(\lambda_{p}\right)\right). \end{equation} $d^{(o, nl)}(x)$ denotes the oracle nonlinear shrinkage function and it is defined as \begin{equation} \forall x \in \operatorname{Supp}(F) \quad d^{(o, nl)}(x):=\frac{x}{ [\pi c x f(x)]^{2}+\left[1-c-\pi c x \mathcal{H}_{f}(x)\right]^{2}}, \end{equation} where $\mathcal{H}_{g}(x)$ is the Hilbert transform. As per Definition 2 of Ledoit and Wolf (2018) it is defined as \begin{equation} \forall x \in \mathbb{R} \quad \mathcal{H}_{g}(x):=\frac{1}{\pi} P V \int_{-\infty}^{+\infty} g(t) \frac{d t}{t-x}, \end{equation} which uses the Cauchy Principal Value, denoted as $PV$ to evaluate the singular integral in the following way \begin{equation} P V \int_{-\infty}^{+\infty} g(t) \frac{d t}{t-x}:=\lim _{\varepsilon \rightarrow 0^{+}}\left[\int_{-\infty}^{x-\varepsilon} g(t) \frac{d t} {t-x}+\int_{x+\varepsilon}^{+\infty} g(t) \frac{d t}{t-x}\right]. \end{equation} It is an oracle estimator due to the dependence on the limiting sample spectral density $f$, its Hilbert transform $\mathcal{H}_f$, and the limiting concentration ratio $c$, which are all unobservable. Nevertheless, it represents progress compared to :func:`hd.fsopt`, since it no longer depends on the full population covariance matrix, $\mathbf{\Sigma}$, but only on its eigenvalues. This reduces the number of parameters to be estimated from the impossible $\mathcal{O}(p^2)$ to the manageable $\mathcal{O}(p)$. To make the estimator feasible, unobserved quantities have to be replaced by statistics. A consistent estimator for the limiting concentration $c$ is the sample concentration $c_n = p/n$. For the limiting sample spectral density $f$, the authors propose a kernel estimator. This is necessary, even though $F_{n}\stackrel{\text { a.s }}{\rightarrow} F$, since $F_n$ is discontinuous at every $\lambda_{i}$ and thus its derivative $f_n$, which would've been the natural estimator for $f$, does not exist there. A kernel density estimator is a non-parametric estimator of the probability density function of a random variable. In kernel density estimation data from a finite sample is smoothed with a non-negative function $K$, called the kernel, and a smoothing parameter $h$, called the bandwidth, to make inferences about the population density. A kernel estimator is of the form \begin{equation} \widehat{f}_{h}(x)=\frac{1}{N} \sum_{i=1}^{N} K_{h}\left(x-x_{i}\right)= \frac{1}{N h} \sum_{i=1}^{N} K\left(\frac{x-x_{i}}{h}\right). \end{equation} The chosen kernel estimator for the limiting sample spectral density is based on the Epanechnikov kernel with a variable bandwidth, proportional to the eigenvalues, $h_{i}:=\lambda_{i} h,$ for $i=1, \ldots, p$, where the global bandwidth is set to $h:=n^{-1 / 3}$. The reasoning behind the variable bandwidth choice can be intuited from the figure below, which shows that the support of the limiting sample spectral distribution is approximately proportional to the eigenvalue. .. image:: ../img/lssd.png The Epanechnikov kernel is defined as \begin{equation} \forall x \in \mathbb{R} \quad \kappa^{(E)}(x):= \frac{3}{4 \sqrt{5}}\left[1-\frac{x^{2}}{5}\right]^{+}. \end{equation} The kernel estimators of $f$ and $\mathcal{H}$ are thus \begin{equation} \forall x \in \mathbb{R} \quad \widetilde{f}_{n}(x):= \frac{1}{p} \sum_{i=1}^{p} \frac{1}{h_{i}} \kappa^{(E)} \left(\frac{x-\lambda_{i}}{h_{i}}\right)=\frac{1}{p} \sum_{i=1}^{p} \frac{1}{\lambda_{i} h} \kappa^{(E)}\left(\frac{x-\lambda_{i}}{\lambda_{i} h}\right) \end{equation} and \begin{equation} \mathcal{H}_{\tilde{f}_{n}}(x):=\frac{1}{p} \sum_{i=1}^{p} \frac{1}{h_{i}} \mathcal{H}_{k}\left(\frac{x-\lambda_{i}}{h_{i}}\right)=\frac{1}{p} \sum_{i=1}^{p} \frac{1}{\lambda_{i} h} \mathcal{H}_{k}\left(\frac{x-\lambda_{i}} {\lambda_{i} h}\right)=\frac{1}{\pi} PV \int \frac{\widetilde{f}_{n}(t)}{x-t} d t, \end{equation} respectively. The feasible nonlinear shrinkage estimator is of rotation equivariant form, where the elements of the diagonal matrix are \begin{equation} \forall i=1, \ldots, p \quad \widetilde{d}_{i}:=\frac{\lambda_{i}} {\left[\pi \frac{p}{n} \lambda_{i} \widetilde{f}_{n} \left(\lambda_{i}\right)\right]^{2}+\left[1-\frac{p}{n}-\pi \frac{p}{n} \lambda_{i} \mathcal{H}_{\tilde{f}_{n}}\left(\lambda_{i} \right)\right]^{2}} \end{equation} In other words, the feasible nonlinear shrinkage estimator is \begin{equation} \widetilde{\mathbf{S}}:=\sum_{i=1}^{p} \widetilde{d}_{i} \mathbf{u}_{i} \mathbf{u}_{i}^{\prime}. \end{equation} References ---------- Ledoit, O. and Wolf, M. (2018). Analytical nonlinear shrinkage of large-dimensional covariance matrices, University of Zurich, Department of Economics, Working Paper (264). """ p, n = X.shape S = np.cov(X) sigma_tilde = _nonlinear_shrinkage_cov(S, n-1) return sigma_tilde
def _nonlinear_shrinkage_cov(S, n): """ Shrink a sample covariance matrix with the analytic nonlinear shrinkage formula of Ledoit and Wolf (2018). The code has been adapted from the Matlab implementation provided by the authors in Appendix D. Parameters ---------- S : numpy.ndarray, shape = (p, p) The covariance matrix estimate based on a p by ``n`` sample. n : int The effective number of observations per feature. Returns ------- sigmatilde : numpy.ndarray, shape = (p, p) The nonlinearly shrunk covariance matrix estimate. Notes ----- .. note:: Subtract the number of degrees of freedom lost due to prior estimations from the number of observations per feature to compute ``n``. E.g. if the feature has been de-meaned in the process of covariance matrix estimation, subtract 1. If the covariance is based on residuals, subtract the number of parameters of the estimator that produced the residuals. Examples -------- >>> np.random.seed(0) >>> n = 13 >>> p = 6 >>> X = np.random.multivariate_normal(np.zeros(p), np.eye(p), n) >>> S = np.cov(X.T) >>> _nonlinear_shrinkage_cov(S,n-1) array([[ 1.50231589e+00, -2.49140874e-01, 2.68050353e-01, 2.69052962e-01, 3.42958216e-01, -1.51487901e-02], [-2.49140874e-01, 1.05011440e+00, -1.20681859e-03, -1.25414579e-01, -1.81604754e-01, 4.38535891e-02], [ 2.68050353e-01, -1.20681859e-03, 1.02797073e+00, 1.19235516e-01, 1.03335603e-01, 8.58533018e-02], [ 2.69052962e-01, -1.25414579e-01, 1.19235516e-01, 1.03290514e+00, 2.18096913e-01, 5.63011351e-02], [ 3.42958216e-01, -1.81604754e-01, 1.03335603e-01, 2.18096913e-01, 1.22086494e+00, 1.07255380e-01], [-1.51487901e-02, 4.38535891e-02, 8.58533018e-02, 5.63011351e-02, 1.07255380e-01, 1.07710975e+00]]) >>> # This function can even handle singular covariance matrices. >>> p = 14 >>> X = np.random.multivariate_normal(np.zeros(p), np.eye(p), n) >>> S = np.cov(X.T) >>> _nonlinear_shrinkage_cov(S,n-1) array([[ 7.54515492e-01, -6.45326742e-02, -1.80320579e-01, -3.57720108e-03, -7.97520403e-02, 1.36138395e-01, -1.01345337e-02, 7.25279128e-02, -7.86780483e-02, 2.72372825e-02, 1.00706333e-02, -5.79499680e-02, -1.25481898e-01, -1.48256429e-02], [-6.45326742e-02, 8.44503245e-01, -1.24917494e-01, -3.51829389e-02, 1.33441823e-01, 5.92079433e-02, 2.11138324e-02, -1.27753593e-03, 9.52252623e-02, -2.11375517e-03, 9.64330095e-02, 5.23211671e-02, -1.65536088e-01, 2.55397217e-02], [-1.80320579e-01, -1.24917494e-01, 6.99399731e-01, -4.04994408e-02, 3.55740241e-02, 1.57533721e-01, -6.81649874e-02, 1.01763326e-01, -3.83272984e-02, -5.46609875e-03, 8.38352683e-02, -8.09862188e-02, -6.09294471e-02, 4.26805157e-02], [-3.57720108e-03, -3.51829389e-02, -4.04994408e-02, 8.86179145e-01, -8.08767394e-02, 1.30951556e-02, -1.72720919e-02, 1.50237335e-01, -1.15205858e-01, -1.48730085e-01, 3.77860032e-02, 1.24465925e-02, -5.68199278e-04, 2.94108399e-02], [-7.97520403e-02, 1.33441823e-01, 3.55740241e-02, -8.08767394e-02, 8.03855378e-01, 3.85564280e-02, -1.73179369e-02, 8.22774366e-02, -2.03421197e-01, -1.52509750e-02, -7.03293437e-02, -3.14963651e-02, 1.21756419e-01, 5.54656502e-03], [ 1.36138395e-01, 5.92079433e-02, 1.57533721e-01, 1.30951556e-02, 3.85564280e-02, 8.07230306e-01, -3.16636374e-02, 1.55275235e-03, 1.27215506e-01, -7.58816222e-02, -1.37688973e-02, 1.67230701e-02, 1.09806724e-01, -5.06072120e-02], [-1.01345337e-02, 2.11138324e-02, -6.81649874e-02, -1.72720919e-02, -1.73179369e-02, -3.16636374e-02, 7.92866895e-01, 1.05036377e-01, 1.68442856e-01, -1.01968600e-01, 1.76314684e-02, -1.06150243e-01, 3.90440121e-02, -2.52221857e-02], [ 7.25279128e-02, -1.27753593e-03, 1.01763326e-01, 1.50237335e-01, 8.22774366e-02, 1.55275235e-03, 1.05036377e-01, 7.83954520e-01, -5.33212443e-03, 2.40305470e-01, -3.91748916e-02, 1.78156296e-01, -4.31671138e-02, -4.71762192e-02], [-7.86780483e-02, 9.52252623e-02, -3.83272984e-02, -1.15205858e-01, -2.03421197e-01, 1.27215506e-01, 1.68442856e-01, -5.33212443e-03, 5.92364714e-01, 3.74973744e-02, -1.20092945e-02, 5.85976099e-02, 7.75487285e-02, 1.03957288e-01], [ 2.72372825e-02, -2.11375517e-03, -5.46609875e-03, -1.48730085e-01, -1.52509750e-02, -7.58816222e-02, -1.01968600e-01, 2.40305470e-01, 3.74973744e-02, 7.16243320e-01, 8.94555209e-02, -1.44653465e-01, 7.22046121e-02, 9.86481306e-02], [ 1.00706333e-02, 9.64330095e-02, 8.38352683e-02, 3.77860032e-02, -7.03293437e-02, -1.37688973e-02, 1.76314684e-02, -3.91748916e-02, -1.20092945e-02, 8.94555209e-02, 9.51436769e-01, 5.81230204e-02, 6.85744828e-02, -3.80840856e-02], [-5.79499680e-02, 5.23211671e-02, -8.09862188e-02, 1.24465925e-02, -3.14963651e-02, 1.67230701e-02, -1.06150243e-01, 1.78156296e-01, 5.85976099e-02, -1.44653465e-01, 5.81230204e-02, 7.25021377e-01, -1.19052381e-02, 2.75756708e-02], [-1.25481898e-01, -1.65536088e-01, -6.09294471e-02, -5.68199278e-04, 1.21756419e-01, 1.09806724e-01, 3.90440121e-02, -4.31671138e-02, 7.75487285e-02, 7.22046121e-02, 6.85744828e-02, -1.19052381e-02, 7.57919800e-01, -5.50198035e-02], [-1.48256429e-02, 2.55397217e-02, 4.26805157e-02, 2.94108399e-02, 5.54656502e-03, -5.06072120e-02, -2.52221857e-02, -4.71762192e-02, 1.03957288e-01, 9.86481306e-02, -3.80840856e-02, 2.75756708e-02, -5.50198035e-02, 9.21548133e-01]]) References ---------- Ledoit, O. and Wolf, M. (2018). Analytical nonlinear shrinkage of large-dimensional covariance matrices, University of Zurich, Department of Economics, Working Paper (264). """ assert n >= 12, "Number of observations per feature should be at least 12." p = S.shape[0] # extract sample eigenvalues sorted in ascending order and eigenvectors. lambd, u = np.linalg.eigh(S) # compute analytical nonlinear shrinkage kernel formula. lambd = lambd[np.maximum(0, p - n):] L = np.tile(lambd, (np.minimum(p, n), 1)).T h = n ** (-1 / 3) # Equation(4.9) H = h * L.T x = (L - L.T) / H ftilde = (3 / 4 / np.sqrt(5)) ftilde *= np.mean(np.maximum(1 - x ** 2 / 5, 0) / H, axis=1) # Equation(4.7) Hftemp = ((-3 / 10 / np.pi) * x + (3 / 4 / np.sqrt(5) / np.pi) * (1 - x ** 2 / 5) * np.log(np.abs((np.sqrt(5) - x) / (np.sqrt(5) + x)))) # Equation(4.8) Hftemp[np.abs(x) == np.sqrt(5)] = ((-3 / 10 / np.pi) * x[np.abs(x) == np.sqrt(5)]) Hftilde = np.mean(Hftemp / H, axis=1) if p <= n: dtilde = lambd / ((np.pi * (p / n) * lambd * ftilde) ** 2 + (1 - (p / n) - np.pi * (p / n) * lambd * Hftilde) ** 2) # Equation(4.3) else: Hftilde0 =\ ((1 / np.pi) * (3 / 10 / h ** 2 + 3 / 4 / np.sqrt(5) / h * (1 - 1 / 5 / h ** 2) * np.log((1 + np.sqrt(5) * h) / (1 - np.sqrt(5) * h))) * np.mean(1 / lambd)) # Equation(C.8) dtilde0 = 1 / (np.pi * (p - n) / n * Hftilde0) # Equation(C.5) dtilde1 = \ lambd / (np.pi ** 2 * lambd ** 2 * (ftilde ** 2 + Hftilde ** 2)) # Equation(C.4) dtilde = np.concatenate([dtilde0 * np.ones((p - n)), dtilde1]) sigmatilde = np.dot(np.dot(u, np.diag(dtilde)), u.T) return sigmatilde def _get_partitions(L): """ Get intraday partitions for NERIVE. Parameters ---------- L : int > 1 The number of partitions. Returns ------- stp : list The list of datetime.time objects. """ stp = [(datetime.datetime(2000, 1, 1, 9, 30) + datetime.timedelta(minutes=np.ceil(6.5/L*60)) * i).time() for i in range(L)] stp.append(datetime.time(16, 0)) return stp
[docs]def nerive(tick_series_list, stp=None, estimator=None, **kwargs): r""" The nonparametric eigenvalue-regularized integrated covariance matrix estimator (NERIVE) of Lam and Feng (2018). This estimator is similar to the :func:`~nercome` estimator extended into the hight frequency setting. Parameters ---------- tick_series_list : list of pd.Series Each pd.Series contains ticks of one asset with datetime index. K : numpy.ndarray, default= ``None`` An array of sclales. If ``None`` all scales :math:`i = 1, ..., M` are used, where M is chosen :math:`M = n^{1/2}` acccording to Eqn (34) of Zhang (2006). stp : array-like of datetime.time() objects, default = [9:30, 12:45, 16:00] The split time points. estimator : function, default = ``None`` An integrated covariance estimator taking ``tick_series_lists`` as the first argument. If ``None`` the :func:`~msrc_pairwise` is used. **kwargs : miscellaneous Keyword arguments of the ``estimator``. Returns ------- out : numpy.ndarray, 2d The NERIVE estimate of the integrated covariance matrix. Notes ----- The nonparametric eigenvalue-regularized integrated covariance matrix estimator (NERIVE) proposed by Lam and Feng (2018) splits the sample into $L$ partitions. The split points are denoted by $$ 0=\widetilde{\tau}_{0}<\widetilde{\tau}_{1}<\cdots<\widetilde{\tau}_{L}=T $$ and the $l$th partition is given by $\left(\widetilde{\tau}_{l-1}, \widetilde{\tau}_{l}\right].$ The integrated covariance estimator for the $l$th partition is \begin{equation} \widehat{\mathbf{\Sigma}}_l=\mathbf{U}_{-l} \operatorname{diag}\left(\mathbf{U}_{-l}' \widetilde{\mathbf{\Sigma}}_l \mathbf{U}_{-l}\right) \mathbf{U}_{-l}' \end{equation} where $\mathbf{U}_{-l}$ is an orthogonal matrix depending on all observations over the full interval $[0, T]$ except the $l$th partition. The NERIVE estimator over the full interval $[0, T]$ is given by \begin{equation} \widehat{\mathbf{\Sigma}}=\sum_{l=1}^{L} \widehat{\mathbf{\Sigma}}_l= \sum_{l=1}^{L} \mathbf{U}_{-l} \operatorname{diag}\left(\mathbf{U}_{-l}' \widetilde{\mathbf{\Sigma}}_l \mathbf{U}_{-l}\right) \mathbf{U}_{-l}'. \end{equation} $\widetilde{\mathbf{\Sigma}}$ is an integrated covariance estimator that corrects for asynchronicity and microstructure noise, e.g., one of :mod:`hf`. Lam and Feng (2018) choose the TSRC for the sake of tractablility in the proofs. Importantly, NERIVE does not assume i.i.d. observations but weak dependence between the log-price process and the microstructure noise process within partition $l$, and weak serial dependence of microstructure noise vectors, given $\mathcal{F}_{-l}$. Similar to NERCOME, NERIVE allows for the presence of pervasive factors as long as they persist between refresh times. .. warning:: NERIVE splits the data into smaller subsamples. Estimator parameters that depend on the sample size must be adjusted. Further, the price process must be preprocessed to have zero mean return over the **full** sample. References ---------- Lam, C. and Feng, P. (2018). A nonparametric eigenvalue-regularized integrated covariance matrix estimator for asset return data, Journal of Econometrics 206(1): 226–257. """ # TODO extend to multiple days, each partition should then be # one day and the cov for eigenmatrix is average of individual day covs. last_series_date = None for series in tick_series_list: assert series.index[0].date() == series.index[-1].date(),\ """All tick times must be contained within one day. NERIVE is an intraday estimator.""" if last_series_date is not None: assert series.index[0].date() == last_series_date,\ """The date of tick times of all assets must be equal.""" last_series_date = series.index[0].date() if estimator is None: estimator = hf.msrc_pairwise if stp is None: stp = _get_partitions(4) L = len(stp) - 1 Sigma_hat_list = [] for j in range(L): ticks_j = [x.between_time(stp[j], stp[j+1], True, True) for x in tick_series_list] ticks_notj = [x.between_time(stp[j+1], stp[j], False, False) for x in tick_series_list] Lambda, P_not_j = np.linalg.eigh(estimator(ticks_notj, **kwargs)) Sigma_tilde = estimator(ticks_j, **kwargs) D = np.diag(np.diag(P_not_j.T @ Sigma_tilde @ P_not_j)) Sigma_hat = P_not_j @ D @ P_not_j.T Sigma_hat_list.append(Sigma_hat) return np.mean(Sigma_hat_list, axis=0)
[docs]def nercome(X, m=None, M=50): r""" The nonparametric eigenvalue-regularized covariance matrix estimator (NERCOME) of Lam (2016). Parameters ---------- X : numpy.ndarray, shape = (p, n) A 2d array of log-returns (n observations of p assets). m : int, default = ``None`` The size of the random split with which the eigenvalues are computed. If ``None`` the estimator searches over values suggested by Lam (2016) in Equation (4.8) and selects the best according to Equation (4.7). M : int, default = 50 The number of permutations. Lam (2016) suggests 50. Returns ------- opt_Sigma_hat : numpy.ndarray, shape = (p, p) The NERCOME covariance matrix estimate. Examples -------- >>> np.random.seed(0) >>> n = 13 >>> p = 5 >>> X = np.random.multivariate_normal(np.zeros(p), np.eye(p), n) >>> cov = nercome(X.T, m=5, M=10) >>> cov array([[ 1.34226722, 0.09693429, 0.00233125, 0.17717658, -0.01643898], [ 0.09693429, 1.0508423 , 0.10112215, -0.22908987, -0.04914651], [ 0.00233125, 0.10112215, 1.0731665 , -0.02959628, 0.38652859], [ 0.17717658, -0.22908987, -0.02959628, 1.10753766, 0.1807373 ], [-0.01643898, -0.04914651, 0.38652859, 0.1807373 , 0.88832791]]) Notes ----- A very different approach to the analytic formulas of nonlinear shrinkage is pursued by Abadir et al 2014. These authors propose a nonparametric estimator based on a sample splitting scheme. They split the data into two pieces and exploit the independence of observations across the splits to regularize the eigenvalues. Lam (2016) builds on their results and proposes a nonparametric estimator that is asymptotically optimal even if the population covariance matrix $\mathbf{\Sigma}$ has a factor structure. In the case of low-rank strong factor models, the assumption that each observation can be written as $\mathbf{x}_{t} =\mathbf{\Sigma}^{1 / 2} \mathbf{z}_{t}$ for $t=1, \ldots, n,$ where each $\mathbf{z}_{t}$ is a $p \times 1$ vector of independent and identically distributed random variables $z_{i t}$, for $i=1, \ldots, p,$ with zero-mean and unit variance, is violated since the covariance matrix is singular and its Cholesky decomposition does not exist. Both :func:`linear_shrinkage` and :func:`nonlinear_shrinkage` are build on this assumption are no longer optimal if it is not fulfilled. The proposed nonparametric eigenvalue-regularized covariance matrix estimator (NERCOME) starts by splitting the data into two pieces of size $n_1$ and $n_2 = n - n_1$. It is assumed that the observations are i.i.d with finite fourth moments such that the statistics computed in the different splits are likewise independent of each other. The sample covariance matrix of the first partition is defined as $\mathbf{S}_{n_1}:= \mathbf{X}_{n_1}\mathbf{X}_{n_1}'/n_1$. Its spectral decomposition is given by $\mathbf{S}_{n_1}=\mathbf{U}_{n_1} \mathbf{\Lambda}_{n_1} \mathbf{U}_{n_1}^{\prime},$ where $\mathbf{\Lambda}_{n_1}$ is a diagonal matrix, whose elements are the eigenvalues $\mathbf{\lambda}_{n_1}=\left(\lambda_{n_1, 1}, \ldots, \lambda_{n_1, p}\right)$ and an orthogonal matrix $\mathbf{U}_{n_1}$, whose columns $\left[\mathbf{u}_{n_1, 1} \ldots \mathbf{u}_{n_1, p}\right]$ are the corresponding eigenvectors. Analogously, the sample covariance matrix of the second partition is defined by $\mathbf{S}_{n_2}:= \mathbf{X}_{n_2}\mathbf{X}_{n_2}'/n_2$. Theorem 1 of Lam (2016) shows that \begin{equation} \widetilde{\mathbf{d}}_{n_1}^{(\text{NERCOME})}= \operatorname{diag}( \mathbf{U}_{n_1}^{\prime} \mathbf{S}_{n_2} \mathbf{U}_{n_1}) \end{equation} is asymptotically the same as the finite-sample optimal rotation equivariant $\mathbf{d}_{n_1}^{*} = \mathbf{U}_{n_1}^{\prime} \mathbf{\Sigma} \mathbf{U}_{n_1}$ based on the section $n_1$. The proposed estimator is thus of the rotation equivariant form, where the elements of the diagonal matrix are chosen according to $\widetilde{\mathbf{d}}_{n_1}^{(\text{NERCOME})}$. In other words the estimator is given by \begin{equation} \widetilde{\mathbf{S}}_{ n_1}^{(\text{NERCOME})}:= \sum_{i=1}^{p}\widetilde{\mathbf{d}}_{n_1}^{(\text{NERCOME})} \cdot \mathbf{u}_{n_1, i} \mathbf{u}_{n_1, i}^{\prime} \end{equation} The author shows that this estimator is asymptotically optimal with respect to the Frobenius loss even under factor structure. However, it uses the sample data inefficiently since only one section is utilized for the calculation of each component. The natural extension is to permute the data and bisect it anew. With these sections an estimate is computed according to $\widetilde{\mathbf{S}}_{ n_1}^{(\text{NERCOME})}$. This is done $M$ times and the covariance matrix estimates are averaged: \begin{equation} \widetilde{\mathbf{S}}_{n_1, M}^{(\text{NERCOME})}:=\frac{1}{M} \sum_{j=1}^{M}\widetilde{\mathbf{S}}_{n_1, j}^{(\text{NERCOME})}. \end{equation} The estimator depends on two tuning parameters, $M$ and $n_1$. Higher $M$ give more accurate results but the computational cost grows as well. The author suggests that more than 50 iterations are generally not needed for satisfactory results. $n_1$ is subject to regularity conditions. The author proposes to search over the contenders \begin{equation} n_1=\left[2 n^{1 / 2}, 0.2 n, 0.4 n, 0.6 n, 0.8 n, n-2.5 n^{1 / 2}, n-1.5 n^{1 / 2}\right] \end{equation} and select the one that minimizes the following criterion inspired by Bickel (2008) \begin{equation} g(n_1)=\left\|\frac{1}{M} \sum_{j=1}^{M} \left(\widetilde{\mathbf{S}}_{n_1, j}^{(\text{NERCOME})} -\mathbf{S}_{n_2, j}\right)\right\|_{F}^{2}. \end{equation} References ---------- Abadir, K. M., Distaso, W. and Zikeˇs, F. (2014). Design-free estimation of variance matrices, Journal of Econometrics 181(2): 165–180. Bickel, P. J. and Levina, E. (2008). Regularized estimation of large covariance matrices, The Annals of Statistics 36(1): 199–227. Lam, C. (2016). Nonparametric eigenvalue-regularized precision or covariance matrix estimator, The Annals of Statistics 44(3): 928–953. """ p, n = X.shape if m is None: m_list = [int(2 * n ** 0.5), int(0.2 * n), int(0.4 * n), int(0.6 * n), int(0.8 * n), int(n - 2.5*n ** 0.5), int(n - 1.5 * n ** 0.5)] Sigmas = [_nercome(X, m, M) for m in m_list] idx = _optimal_nere(Sigmas) opt_Sigma_hat = Sigmas[idx][0] else: opt_Sigma_hat = _nercome(X, m, M)[0] return opt_Sigma_hat
def _nercome(X, m, M): """ Comute the NERCOME estimator. Parameters ---------- X : numpy.ndarray, shape = (p, n) A 2d array of log-returns (n observations of p assets). m : int The number of observations in each random sample. M : int The number of permutations. Returns ------- (Sigma_hat, Sigma_tilde) : tuple Sigma_hat is the NERCOME estimate of the covariance matrix. Sigma_tilde is the averaged (over the M permutations) sample covariance matrix of the random split samples of size m, which is needed for :func:`~_optimal_nere'. """ p, n = X.shape Sigma_hat_sum = np.zeros((p, p)) Sigma_tilde_sum = np.zeros((p, p)) for j in range(M): idx_m = np.random.randint(0, n, m) mask = np.ones(n, dtype=np.int64) mask[idx_m] = int(0) Lambda, P_1 = np.linalg.eigh(np.cov(X[:, mask])) Sigma_tilde = np.cov(X[:, idx_m]) D = np.diag(np.diag(P_1.T @ Sigma_tilde @ P_1)) Sigma_hat = P_1 @ D @ P_1.T Sigma_hat_sum += Sigma_hat Sigma_tilde_sum += Sigma_tilde Sigma_hat = Sigma_hat_sum/M Sigma_tilde = Sigma_tilde_sum/M return (Sigma_hat, Sigma_tilde) def _optimal_nere(Sigmas): """ Determine the optimal nonparametric eigenvalue regularized estimator per Equation (4.7) of Lam (2016). Parameters ---------- Sigmas : list of tuples Each element of the list is a tuple with Sigma_hat as the 0th element and Sigma_tilde as the 1st element. Returns ------- idx: int The index of the best estimator. """ norms = [np.linalg.norm(x[0] - x[1], 'fro')**2 for x in Sigmas] idx_best = np.argmin(norms) return idx_best
[docs]@numba.njit def to_corr(cov): """Convert a covariance matrix to a correlation matrix. Parameters ---------- cov : numpy.ndarray, 2d, float A covariance matrix. Returns ------- out : numpy.ndarray, 2d A correlation matrix Examples -------- >>> cov = np.array([[2., 1.],[1., 2.]]) >>> to_corr(cov) array([[1. , 0.5], [0.5, 1. ]]) """ L = np.diag(1./np.sqrt(np.diag(cov))) return L @ cov @ L.T