dapper.mods.QG.sakov2008
Reproduce results from bib.sakov2008deterministic
.
1"""Reproduce results from `bib.sakov2008deterministic`.""" 2 3import numpy as np 4 5import dapper.mods as modelling 6from dapper.mods.QG import LP_setup, model_config, sample_filename, shape 7from dapper.tools.localization import nd_Id_localization 8 9############################ 10# Time series, model, initial condition 11############################ 12 13model = model_config("sakov2008", {}) 14Dyn = { 15 "M": np.prod(shape), 16 "model": model.step, 17 "noise": 0, 18} 19 20# Considering that I have 8GB mem on the Mac, and the estimate: 21# ≈ (8 bytes/float)*(129² float/stat)*(7 stat/k) * K, 22# it should be possible to run experiments of length (K) < 8000. 23tseq = modelling.Chronology(dt=model.prms["dtout"], dko=1, T=1500, BurnIn=250) 24# In my opinion the burn in should be 400. 25# Sakov also used 10 repetitions. 26 27X0 = modelling.RV(M=Dyn["M"], file=sample_filename) 28 29 30############################ 31# Observation settings 32############################ 33 34# This will look like satellite tracks when plotted in 2D 35Ny = 300 36jj = modelling.linspace_int(Dyn["M"], Ny) 37 38# Want: random_offset(t1)==random_offset(t2) if t1==t2. 39# Solutions: (1) use caching (ensure maxsize=inf) or (2) stream seeding. 40# Either way, use a local random stream to avoid interfering with global stream 41# (and e.g. ensure equal outcomes for 1st and 2nd run of the python session). 42# TODO 5: upgrade to `default_rng()` 43rstream = np.random.RandomState() 44max_offset = jj[1] - jj[0] 45 46 47def obs_inds(ko): 48 def random_offset(): 49 rstream.seed(ko) 50 u = rstream.rand() 51 return int(np.floor(max_offset * u)) 52 53 return jj + random_offset() 54 55 56def obs_now(ko): 57 jj = obs_inds(ko) 58 59 @modelling.ens_compatible 60 def hmod(E): 61 return E[jj] 62 63 # Localization. 64 batch_shape = [2, 2] # width (in grid points) of each state batch. 65 # Increasing the width 66 # => quicker analysis (but less rel. speed-up by parallelzt., depending on NPROC) 67 # => worse (increased) rmse (but width 4 is only slightly worse than 1); 68 # if inflation is applied locally, then rmse might actually improve. 69 localizer = nd_Id_localization(shape[::-1], batch_shape[::-1], jj, periodic=False) 70 71 Obs = { 72 "M": Ny, 73 "model": hmod, 74 "noise": modelling.GaussRV(C=4 * np.eye(Ny)), 75 "localizer": localizer, 76 } 77 78 # Moving localization mask for smoothers: 79 Obs["loc_shift"] = lambda ii, dt: ii # no movement (suboptimal, but easy) 80 81 # Jacobian left unspecified coz it's (usually) employed by methods that 82 # compute full cov, which in this case is too big. 83 84 return modelling.Operator(**Obs) 85 86 87Obs = dict(time_dependent=lambda ko: obs_now(ko)) 88 89 90############################ 91# Other 92############################ 93HMM = modelling.HiddenMarkovModel(Dyn, Obs, tseq, X0, LP=LP_setup(obs_inds)) 94 95 96#################### 97# Suggested tuning 98#################### 99# Reproducing Fig 7 from Sakov and Oke "DEnKF" paper from 2008. 100 101# Notes: 102# - If N<=25, then typically need to increase the dissipation 103# to be almost sure to avoid divergence. See counillon2009.py for example. 104# - We have not had the need to increase the dissipation parameter for the EnKF. 105# - Our experiments differ from Sakov's in the following minor details: 106# - We use a batch width (unsure what Sakov uses). 107# - The "EnKF-Matlab" code has a bug: it forgets to take sqrt() of the taper coeffs. 108# This is equivalent to: R_actually_used = R_reported / sqrt(2). 109# - The boundary cells are all fixed at 0 by BCs, 110# but are included in the state vector (amounting to 3% of the its length), 111# and thus in RMSE calculations (which is not quite fair/optimal). 112 113# from dapper.mods.QG.sakov2008 import HMM # rmse.a: 114# xps += LETKF(mp=True, N=25,infl=1.04 ,loc_rad=10) # 0.64 115# xps += LETKF(mp=True, N=25,infl='-N',xN=2.0,loc_rad=10) # 0.66 116# xps += SL_EAKF( N=25,infl=1.04 ,loc_rad=10) # 0.62 117# xps += SL_EAKF( N=25,infl=1.03 ,loc_rad=10) # 0.58 118# 119# Iterative: 120# Yet to try: '-N' inflation, larger N, different loc_rad, and 121# smaller Lag (testing lag>3 was worse [with this loc_shift]) 122# xps += iLEnKS('Sqrt',N=25,infl=1.03,loc_rad=12,nIter=3,Lag=2) # 0.59 123# 124# N = 45 125# xps += LETKF(mp=True, N=N,infl=1.02 ,loc_rad=10) # 0.52 126# xps += LETKF(mp=True, N=N,infl='-N',xN=1.5,loc_rad=10) # 0.51
model =
<dapper.mods.QG.model_config object>
Dyn =
{'M': 16641, 'model': <bound method model_config.step of <dapper.mods.QG.model_config object>>, 'noise': 0}
tseq =
<Chronology>
- K: 300
- Ko: 299
- T: 1500.0
- BurnIn: 250
- dto: 5.0
- dt: 5.0
X0 =
RV({
'file': PosixPath('/home/runner/dpr_data/samples/QG_samples.npz'),
'M': 16641
})
Ny =
300
jj =
array([ 0, 55, 110, 166, 221, 277, 332, 388, 443, 499,
554, 610, 665, 721, 776, 832, 887, 942, 998, 1053,
1109, 1164, 1220, 1275, 1331, 1386, 1442, 1497, 1553, 1608,
1664, 1719, 1775, 1830, 1885, 1941, 1996, 2052, 2107, 2163,
2218, 2274, 2329, 2385, 2440, 2496, 2551, 2607, 2662, 2718,
2773, 2828, 2884, 2939, 2995, 3050, 3106, 3161, 3217, 3272,
3328, 3383, 3439, 3494, 3550, 3605, 3661, 3716, 3771, 3827,
3882, 3938, 3993, 4049, 4104, 4160, 4215, 4271, 4326, 4382,
4437, 4493, 4548, 4604, 4659, 4714, 4770, 4825, 4881, 4936,
4992, 5047, 5103, 5158, 5214, 5269, 5325, 5380, 5436, 5491,
5547, 5602, 5657, 5713, 5768, 5824, 5879, 5935, 5990, 6046,
6101, 6157, 6212, 6268, 6323, 6379, 6434, 6489, 6545, 6600,
6656, 6711, 6767, 6822, 6878, 6933, 6989, 7044, 7100, 7155,
7211, 7266, 7322, 7377, 7432, 7488, 7543, 7599, 7654, 7710,
7765, 7821, 7876, 7932, 7987, 8043, 8098, 8154, 8209, 8265,
8320, 8375, 8431, 8486, 8542, 8597, 8653, 8708, 8764, 8819,
8875, 8930, 8986, 9041, 9097, 9152, 9208, 9263, 9318, 9374,
9429, 9485, 9540, 9596, 9651, 9707, 9762, 9818, 9873, 9929,
9984, 10040, 10095, 10151, 10206, 10261, 10317, 10372, 10428, 10483,
10539, 10594, 10650, 10705, 10761, 10816, 10872, 10927, 10983, 11038,
11094, 11149, 11204, 11260, 11315, 11371, 11426, 11482, 11537, 11593,
11648, 11704, 11759, 11815, 11870, 11926, 11981, 12036, 12092, 12147,
12203, 12258, 12314, 12369, 12425, 12480, 12536, 12591, 12647, 12702,
12758, 12813, 12869, 12924, 12979, 13035, 13090, 13146, 13201, 13257,
13312, 13368, 13423, 13479, 13534, 13590, 13645, 13701, 13756, 13812,
13867, 13922, 13978, 14033, 14089, 14144, 14200, 14255, 14311, 14366,
14422, 14477, 14533, 14588, 14644, 14699, 14755, 14810, 14865, 14921,
14976, 15032, 15087, 15143, 15198, 15254, 15309, 15365, 15420, 15476,
15531, 15587, 15642, 15698, 15753, 15808, 15864, 15919, 15975, 16030,
16086, 16141, 16197, 16252, 16308, 16363, 16419, 16474, 16530, 16585])
rstream =
RandomState(MT19937) at 0x7FA24EFCDE40
max_offset =
55
def
obs_inds(ko):
def
obs_now(ko):
57def obs_now(ko): 58 jj = obs_inds(ko) 59 60 @modelling.ens_compatible 61 def hmod(E): 62 return E[jj] 63 64 # Localization. 65 batch_shape = [2, 2] # width (in grid points) of each state batch. 66 # Increasing the width 67 # => quicker analysis (but less rel. speed-up by parallelzt., depending on NPROC) 68 # => worse (increased) rmse (but width 4 is only slightly worse than 1); 69 # if inflation is applied locally, then rmse might actually improve. 70 localizer = nd_Id_localization(shape[::-1], batch_shape[::-1], jj, periodic=False) 71 72 Obs = { 73 "M": Ny, 74 "model": hmod, 75 "noise": modelling.GaussRV(C=4 * np.eye(Ny)), 76 "localizer": localizer, 77 } 78 79 # Moving localization mask for smoothers: 80 Obs["loc_shift"] = lambda ii, dt: ii # no movement (suboptimal, but easy) 81 82 # Jacobian left unspecified coz it's (usually) employed by methods that 83 # compute full cov, which in this case is too big. 84 85 return modelling.Operator(**Obs)
Obs =
{'time_dependent': <function <lambda>>}
HMM =
HiddenMarkovModel({
'Dyn':
Operator({
'M': 16641,
'model':
<bound method model_config.step of
<dapper.mods.QG.model_config object>>,
'noise':
GaussRV({
'mu': array([0., 0., 0., ..., 0., 0., 0.]),
'M': 16641,
'C': 0
})
}),
'Obs': <TimeDependentOperator> .Ops: <function <lambda>>,
'tseq':
<Chronology>
- K: 300
- Ko: 299
- T: 1500.0
- BurnIn: 250
- dto: 5.0
- dt: 5.0,
'X0':
RV({
'file': PosixPath('/home/runner/dpr_data/samples/QG_samples.npz'),
'M': 16641
}),
'liveplotters':
[(1, <function spatial2d.<locals>.init at
0x7fa23fd372e0>), (0, <class
'dapper.tools.liveplotting.spectral_errors'>), (0,
<function sliding_marginals.<locals>.init at
0x7fa23fd371c0>)],
'sectors': {},
'name': 'QG/sakov2008.py'
})