Skip to content

Commit 6b1cc78

Browse files
committed
More work and tests
1 parent cc62df7 commit 6b1cc78

File tree

11 files changed

+1507
-1254
lines changed

11 files changed

+1507
-1254
lines changed

arch/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
pytest_plugins = [
44
"arch.tests.unitroot.cointegration_data",
5+
"arch.tests.covariance.covariance_data",
56
]
67

78

arch/covariance/kernel.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
"Gallant",
2626
"NeweyWest",
2727
"normalize_kernel_name",
28+
"ZeroLag",
2829
]
2930

3031
KERNELS = [
@@ -40,6 +41,7 @@
4041
"Andrews",
4142
"Gallant",
4243
"NeweyWest",
44+
"ZeroLag",
4345
]
4446

4547

@@ -189,6 +191,7 @@ class CovarianceEstimator(ABC):
189191
def __init__(
190192
self,
191193
x: ArrayLike,
194+
*,
192195
bandwidth: Optional[float] = None,
193196
df_adjust: int = 0,
194197
center: bool = True,
@@ -716,3 +719,29 @@ class NeweyWest(Bartlett):
716719
--------
717720
Bartlett
718721
"""
722+
723+
724+
zero_lag_name = "Zero-lag (No autocorrelation)"
725+
zero_lag_formula = """\
726+
w= 1 & z=0\\\\ \
727+
\\ 0 & z>0 \
728+
\\end{cases} \
729+
"""
730+
731+
732+
@Substitution(kernel_name=zero_lag_name, formula=zero_lag_formula)
733+
class ZeroLag(CovarianceEstimator, metaclass=AbstractDocStringInheritor):
734+
@property
735+
def kernel_const(self) -> float:
736+
return 1.0
737+
738+
@property
739+
def bandwidth_scale(self) -> float:
740+
return 0.0
741+
742+
@property
743+
def rate(self) -> float:
744+
return 0.0
745+
746+
def _weights(self) -> NDArray:
747+
return np.ones(1)

arch/covariance/var.py

Lines changed: 85 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from typing import Dict, NamedTuple, Optional, Tuple
22

33
import numpy as np
4-
from numpy import zeros
54
from numpy.linalg import lstsq
65
import pandas as pd
6+
from pandas.util._decorators import Appender
77
from statsmodels.tools import add_constant
88
from statsmodels.tsa.tsatools import lagmat
99

@@ -14,6 +14,9 @@
1414
normalize_kernel_name,
1515
)
1616
from arch.typing import ArrayLike, NDArray
17+
from arch.vendor import cached_property
18+
19+
__all__ = ["PreWhitenedRecolored"]
1720

1821

1922
class VARModel(NamedTuple):
@@ -23,48 +26,98 @@ class VARModel(NamedTuple):
2326
intercept: bool
2427

2528

26-
class PreWhitenRecoloredCovariance(CovarianceEstimator):
29+
class PreWhitenedRecolored(CovarianceEstimator):
2730
"""
31+
VAR-HAC and Pre-Whitened-Recolored Long-run covariance estimation.
32+
33+
Andrews & Monahan [1]_ PWRC and DenHaan-Levin's VAR-HAC [2]_ covariance
34+
estimators.
35+
2836
Parameters
2937
----------
3038
x : array_like
39+
The data to use in covariance estimation.
3140
lags : int, default None
41+
The number of lags to include in the VAR. If None, a specification
42+
search is used to select the order.
3243
method : {"aic", "hqc", "bic"}, default "aic"
44+
The information criteria to use in the model specification search.
3345
diagonal : bool, default True
46+
Flag indicating to consider both diagonal parameter coefficient
47+
matrices on lags. A diagonal coefficient matrix restricts all
48+
off-diagonal coefficient to be zero.
3449
max_lag : int, default None
50+
The maximum lag to use in the model specification search. If None,
51+
then nobs**(1/3) is used.
3552
sample_autocov : bool, default False
36-
kernel : str, default "bartlett"
53+
Whether to the the same autocovariance or the theoretical
54+
autocovariance implied by the estimated VAR when computing
55+
the long-run covairance.
56+
kernel : {str, None}, default "bartlett".
57+
The name of the kernel to use. Can be any available kernel. Input
58+
is normalised using lower casing and any underscores or hyphens
59+
are removed, so that "QuadraticSpectral", "quadratic-spectral" and
60+
"quadratic_spectral" are all the same. Use None to prevent recoloring.
3761
bandwidth : float, default None
62+
The kernel's bandwidth. If None, optimal bandwidth is estimated.
3863
df_adjust : int, default 0
64+
Degrees of freedom to remove when adjusting the covariance. Uses the
65+
number of observations in x minus df_adjust when dividing
66+
inner-products.
3967
center : bool, default True
68+
A flag indicating whether x should be demeaned before estimating the
69+
covariance.
4070
weights : array_like, default None
71+
An array of weights used to combine when estimating optimal bandwidth.
72+
If not provided, a vector of 1s is used. Must have nvar elements.
73+
force_int : bool, default False
74+
Force bandwidth to be an integer.
4175
4276
See Also
4377
--------
78+
arch.covariance.kernel
79+
Kernel-based long-run covariance estimators
4480
4581
Notes
4682
-----
83+
TODO: Add detailed notes
4784
4885
Examples
4986
--------
87+
88+
References
89+
----------
90+
.. [1] Andrews, D. W., & Monahan, J. C. (1992). An improved
91+
heteroskedasticity and autocorrelation consistent covariance matrix
92+
estimator. Econometrica: Journal of the Econometric Society, 953-966.
93+
.. [2] Haan, W. J. D., & Levin, A. T. (2000). Robust covariance matrix
94+
estimation with data-dependent VAR prewhitening order (No. 255).
95+
National Bureau of Economic Research.
5096
"""
5197

5298
def __init__(
5399
self,
54100
x: ArrayLike,
101+
*,
55102
lags: Optional[int] = None,
56103
method: str = "aic",
57104
diagonal: bool = True,
58105
max_lag: Optional[int] = None,
59106
sample_autocov: bool = False,
60-
kernel: str = "bartlett",
107+
kernel: Optional[str] = "bartlett",
61108
bandwidth: Optional[float] = None,
62109
df_adjust: int = 0,
63110
center: bool = True,
64111
weights: Optional[ArrayLike] = None,
112+
force_int: bool = False,
65113
) -> None:
66114
super().__init__(
67-
x, bandwidth=bandwidth, df_adjust=df_adjust, center=center, weights=weights
115+
x,
116+
bandwidth=bandwidth,
117+
df_adjust=df_adjust,
118+
center=center,
119+
weights=weights,
120+
force_int=force_int,
68121
)
69122
self._kernel_name = kernel
70123
self._lags = 0
@@ -75,7 +128,13 @@ def __init__(
75128
self._auto_lag_selection = True
76129
self._format_lags(lags)
77130
self._sample_autocov = sample_autocov
78-
kernel = normalize_kernel_name(kernel)
131+
if kernel is not None:
132+
kernel = normalize_kernel_name(kernel)
133+
else:
134+
if self._bandwidth not in (0, None):
135+
raise ValueError("bandwidth must be None when kernel is None")
136+
self._bandwidth = None
137+
kernel = "zerolag"
79138
if kernel not in KERNEL_ESTIMATORS:
80139
raise ValueError(KERNEL_ERR)
81140

@@ -155,7 +214,7 @@ def _ic_from_vars(
155214
ics: Dict[Tuple[int, int], float] = {
156215
(full_order, full_order): self._ic(sigma, nparam, nobs)
157216
}
158-
if not self._diagonal:
217+
if not self._diagonal or self._x.shape[1] == 1:
159218
return ics
160219

161220
purged_indiv_lags = np.empty((nvar, nobs, max_lag - full_order))
@@ -206,15 +265,15 @@ def _select_lags(self) -> Tuple[int, int]:
206265
return models[ic.argmin()]
207266

208267
def _estimate_var(self, full_order: int, diag_order: int) -> VARModel:
209-
nobs, nvar = self._x.shape
268+
nvar = self._x.shape[1]
210269
center = int(self._center)
211270
max_lag = max(full_order, diag_order)
212271
lhs, rhs, extra_lags = self._setup_model_data(max_lag)
213272
c = int(self._center)
214273
rhs = rhs[:, : c + full_order * nvar]
215274
extra_lags = extra_lags[:, :, full_order:diag_order]
216275

217-
params = zeros((nvar, nvar * max_lag + center))
276+
params = np.zeros((nvar, nvar * max_lag + center))
218277
resids = np.empty_like(lhs)
219278
ncommon = rhs.shape[1]
220279
for i in range(nvar):
@@ -246,8 +305,8 @@ def _estimate_sample_cov(self, nvar: int, nlag: int) -> NDArray:
246305
if self._center:
247306
x = x - x.mean(0)
248307
nobs = x.shape[0]
249-
var_cov = zeros((nvar * nlag, nvar * nlag))
250-
gamma = zeros((nlag, nvar, nvar))
308+
var_cov = np.zeros((nvar * nlag, nvar * nlag))
309+
gamma = np.zeros((nlag, nvar, nvar))
251310
for i in range(nlag):
252311
gamma[i] = (x[i:].T @ x[: (nobs - i)]) / nobs
253312
for r in range(nlag):
@@ -258,10 +317,11 @@ def _estimate_sample_cov(self, nvar: int, nlag: int) -> NDArray:
258317
var_cov[r * nvar : (r + 1) * nvar, c * nvar : (c + 1) * nvar] = g
259318
return var_cov
260319

320+
@staticmethod
261321
def _estimate_model_cov(
262-
self, nvar: int, nlag: int, coeffs: NDArray, short_run: NDArray
322+
nvar: int, nlag: int, coeffs: NDArray, short_run: NDArray
263323
) -> NDArray:
264-
sigma = zeros((nvar * nlag, nvar * nlag))
324+
sigma = np.zeros((nvar * nlag, nvar * nlag))
265325
sigma[:nvar, :nvar] = short_run
266326
multiplier = np.linalg.inv(np.eye(coeffs.size) - np.kron(coeffs, coeffs))
267327
vec_sigma = sigma.ravel()[:, None]
@@ -274,7 +334,7 @@ def _companion_form(
274334
) -> Tuple[NDArray, NDArray]:
275335
nvar = var_model.resids.shape[1]
276336
nlag = var_model.var_order
277-
coeffs = zeros((nvar * nlag, nvar * nlag))
337+
coeffs = np.zeros((nvar * nlag, nvar * nlag))
278338
coeffs[:nvar] = var_model.params[:, var_model.intercept :]
279339
for i in range(nlag - 1):
280340
coeffs[(i + 1) * nvar : (i + 2) * nvar, i * nvar : (i + 1) * nvar] = np.eye(
@@ -286,15 +346,21 @@ def _companion_form(
286346
var_cov = self._estimate_model_cov(nvar, nlag, coeffs, short_run)
287347
return coeffs, var_cov
288348

289-
@property
349+
@cached_property
350+
@Appender(CovarianceEstimator.cov.__doc__)
290351
def cov(self) -> CovarianceEstimate:
291352
common, individual = self._select_lags()
292353
self._order = (common, individual)
293354
var_mod = self._estimate_var(common, individual)
294355
resids = var_mod.resids
295356
nobs, nvar = resids.shape
296357
self._kernel_instance = self._kernel(
297-
resids, self._bandwidth, 0, False, self._x_weights, self._force_int
358+
resids,
359+
bandwidth=self._bandwidth,
360+
df_adjust=0,
361+
center=False,
362+
weights=self._x_weights,
363+
force_int=self._force_int,
298364
)
299365
kern_cov = self._kernel_instance.cov
300366
short_run = kern_cov.short_run
@@ -316,7 +382,7 @@ def cov(self) -> CovarianceEstimate:
316382
have diagonal coefficient matrices. The maximum eigenvalue of the companion-form \
317383
VAR(1) coefficient matrix is {max_eig}."""
318384
)
319-
coeff_sum = zeros((nvar, nvar))
385+
coeff_sum = np.zeros((nvar, nvar))
320386
params = var_mod.params[:, var_mod.intercept :]
321387
for i in range(var_mod.var_order):
322388
coeff_sum += params[:, i * nvar : (i + 1) * nvar]
@@ -343,7 +409,8 @@ def cov(self) -> CovarianceEstimate:
343409

344410
def _ensure_kernel_instantized(self) -> None:
345411
if self._kernel_instance is None:
346-
self.cov
412+
# Workaround to avoid linting noise
413+
getattr(self, "cov")
347414

348415
@property
349416
def bandwidth_scale(self) -> float:
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from itertools import product
2+
3+
import numpy as np
4+
import pandas as pd
5+
import pytest
6+
7+
DATA_PARAMS = list(product([1, 3], [True, False], [0, 1, 3]))
8+
DATA_IDS = [f"dim: {d}, pandas: {p}, order: {o}" for d, p, o in DATA_PARAMS]
9+
10+
11+
@pytest.fixture(scope="module", params=DATA_PARAMS, ids=DATA_IDS)
12+
def covariance_data(request):
13+
dim, pandas, order = request.param
14+
rs = np.random.RandomState([839084, 3823810, 982103, 829108])
15+
burn = 100
16+
shape = (burn + 500,)
17+
if dim > 1:
18+
shape += (3,)
19+
rvs = rs.standard_normal(shape)
20+
phi = np.zeros((order, dim, dim))
21+
if order > 0:
22+
phi[0] = np.eye(dim) * 0.4 + 0.1
23+
for i in range(1, order):
24+
phi[i] = 0.3 / (i + 1) * np.eye(dim)
25+
for i in range(order, burn + 500):
26+
for j in range(order):
27+
if dim == 1:
28+
rvs[i] += np.squeeze(phi[j] * rvs[i - j - 1])
29+
else:
30+
rvs[i] += phi[j] @ rvs[i - j - 1]
31+
if order > 1:
32+
p = np.eye(dim * order, dim * order, -dim)
33+
for j in range(order):
34+
p[:dim, j * dim : (j + 1) * dim] = phi[j]
35+
v, _ = np.linalg.eig(p)
36+
assert np.max(np.abs(v)) < 1
37+
rvs = rvs[burn:]
38+
if pandas and dim == 1:
39+
return pd.Series(rvs, name="x")
40+
elif pandas:
41+
df = pd.DataFrame(rvs, columns=[f"x{i}" for i in range(dim)])
42+
df.to_csv(f"cov-data-order-{order}.csv")
43+
return df
44+
45+
return rvs

0 commit comments

Comments
 (0)