dapper.mods.QG.sakov2008

Reproduce results from bib.sakov2008deterministic.

  1"""Reproduce results from `bib.sakov2008deterministic`."""
  2
  3import numpy as np
  4
  5import dapper.mods as modelling
  6from dapper.mods.QG import LP_setup, model_config, sample_filename, shape
  7from dapper.tools.localization import nd_Id_localization
  8
  9############################
 10# Time series, model, initial condition
 11############################
 12
 13model = model_config("sakov2008", {})
 14Dyn = {
 15    "M": np.prod(shape),
 16    "model": model.step,
 17    "noise": 0,
 18}
 19
 20# Considering that I have 8GB mem on the Mac, and the estimate:
 21# ≈ (8 bytes/float)*(129² float/stat)*(7 stat/k) * K,
 22# it should be possible to run experiments of length (K) < 8000.
 23tseq = modelling.Chronology(dt=model.prms["dtout"], dko=1, T=1500, BurnIn=250)
 24# In my opinion the burn in should be 400.
 25# Sakov also used 10 repetitions.
 26
 27X0 = modelling.RV(M=Dyn["M"], file=sample_filename)
 28
 29
 30############################
 31# Observation settings
 32############################
 33
 34# This will look like satellite tracks when plotted in 2D
 35Ny = 300
 36jj = modelling.linspace_int(Dyn["M"], Ny)
 37
 38# Want: random_offset(t1)==random_offset(t2) if t1==t2.
 39# Solutions: (1) use caching (ensure maxsize=inf) or (2) stream seeding.
 40# Either way, use a local random stream to avoid interfering with global stream
 41# (and e.g. ensure equal outcomes for 1st and 2nd run of the python session).
 42# TODO 5: upgrade to `default_rng()`
 43rstream = np.random.RandomState()
 44max_offset = jj[1] - jj[0]
 45
 46
 47def obs_inds(ko):
 48    def random_offset():
 49        rstream.seed(ko)
 50        u = rstream.rand()
 51        return int(np.floor(max_offset * u))
 52
 53    return jj + random_offset()
 54
 55
 56def obs_now(ko):
 57    jj = obs_inds(ko)
 58
 59    @modelling.ens_compatible
 60    def hmod(E):
 61        return E[jj]
 62
 63    # Localization.
 64    batch_shape = [2, 2]  # width (in grid points) of each state batch.
 65    # Increasing the width
 66    #  => quicker analysis (but less rel. speed-up by parallelzt., depending on NPROC)
 67    #  => worse (increased) rmse (but width 4 is only slightly worse than 1);
 68    #     if inflation is applied locally, then rmse might actually improve.
 69    localizer = nd_Id_localization(shape[::-1], batch_shape[::-1], jj, periodic=False)
 70
 71    Obs = {
 72        "M": Ny,
 73        "model": hmod,
 74        "noise": modelling.GaussRV(C=4 * np.eye(Ny)),
 75        "localizer": localizer,
 76    }
 77
 78    # Moving localization mask for smoothers:
 79    Obs["loc_shift"] = lambda ii, dt: ii  # no movement (suboptimal, but easy)
 80
 81    # Jacobian left unspecified coz it's (usually) employed by methods that
 82    # compute full cov, which in this case is too big.
 83
 84    return modelling.Operator(**Obs)
 85
 86
 87Obs = dict(time_dependent=lambda ko: obs_now(ko))
 88
 89
 90############################
 91# Other
 92############################
 93HMM = modelling.HiddenMarkovModel(Dyn, Obs, tseq, X0, LP=LP_setup(obs_inds))
 94
 95
 96####################
 97# Suggested tuning
 98####################
 99# Reproducing Fig 7 from Sakov and Oke "DEnKF" paper from 2008.
100
101# Notes:
102# - If N<=25, then typically need to increase the dissipation
103#      to be almost sure to avoid divergence. See counillon2009.py for example.
104#    - We have not had the need to increase the dissipation parameter for the EnKF.
105# - Our experiments differ from Sakov's in the following minor details:
106#    - We use a batch width (unsure what Sakov uses).
107#    - The "EnKF-Matlab" code has a bug: it forgets to take sqrt() of the taper coeffs.
108#      This is equivalent to: R_actually_used = R_reported / sqrt(2).
109# - The boundary cells are all fixed at 0 by BCs,
110#   but are included in the state vector (amounting to 3% of the its length),
111#   and thus in RMSE calculations (which is not quite fair/optimal).
112
113# from dapper.mods.QG.sakov2008 import HMM                       # rmse.a:
114# xps += LETKF(mp=True, N=25,infl=1.04       ,loc_rad=10)        # 0.64
115# xps += LETKF(mp=True, N=25,infl='-N',xN=2.0,loc_rad=10)        # 0.66
116# xps += SL_EAKF(       N=25,infl=1.04       ,loc_rad=10)        # 0.62
117# xps += SL_EAKF(       N=25,infl=1.03       ,loc_rad=10)        # 0.58
118#
119# Iterative:
120# Yet to try: '-N' inflation, larger N, different loc_rad, and
121# smaller Lag (testing lag>3 was worse [with this loc_shift])
122# xps += iLEnKS('Sqrt',N=25,infl=1.03,loc_rad=12,nIter=3,Lag=2) # 0.59
123#
124# N = 45
125# xps += LETKF(mp=True, N=N,infl=1.02       ,loc_rad=10)        # 0.52
126# xps += LETKF(mp=True, N=N,infl='-N',xN=1.5,loc_rad=10)        # 0.51
model = <dapper.mods.QG.model_config object>
Dyn = {'M': 16641, 'model': <bound method model_config.step of <dapper.mods.QG.model_config object>>, 'noise': 0}
tseq = <Chronology> - K: 300 - Ko: 299 - T: 1500.0 - BurnIn: 250 - dto: 5.0 - dt: 5.0
X0 = RV({ 'file': PosixPath('/home/runner/dpr_data/samples/QG_samples.npz'), 'M': 16641 })
Ny = 300
jj = array([ 0, 55, 110, 166, 221, 277, 332, 388, 443, 499, 554, 610, 665, 721, 776, 832, 887, 942, 998, 1053, 1109, 1164, 1220, 1275, 1331, 1386, 1442, 1497, 1553, 1608, 1664, 1719, 1775, 1830, 1885, 1941, 1996, 2052, 2107, 2163, 2218, 2274, 2329, 2385, 2440, 2496, 2551, 2607, 2662, 2718, 2773, 2828, 2884, 2939, 2995, 3050, 3106, 3161, 3217, 3272, 3328, 3383, 3439, 3494, 3550, 3605, 3661, 3716, 3771, 3827, 3882, 3938, 3993, 4049, 4104, 4160, 4215, 4271, 4326, 4382, 4437, 4493, 4548, 4604, 4659, 4714, 4770, 4825, 4881, 4936, 4992, 5047, 5103, 5158, 5214, 5269, 5325, 5380, 5436, 5491, 5547, 5602, 5657, 5713, 5768, 5824, 5879, 5935, 5990, 6046, 6101, 6157, 6212, 6268, 6323, 6379, 6434, 6489, 6545, 6600, 6656, 6711, 6767, 6822, 6878, 6933, 6989, 7044, 7100, 7155, 7211, 7266, 7322, 7377, 7432, 7488, 7543, 7599, 7654, 7710, 7765, 7821, 7876, 7932, 7987, 8043, 8098, 8154, 8209, 8265, 8320, 8375, 8431, 8486, 8542, 8597, 8653, 8708, 8764, 8819, 8875, 8930, 8986, 9041, 9097, 9152, 9208, 9263, 9318, 9374, 9429, 9485, 9540, 9596, 9651, 9707, 9762, 9818, 9873, 9929, 9984, 10040, 10095, 10151, 10206, 10261, 10317, 10372, 10428, 10483, 10539, 10594, 10650, 10705, 10761, 10816, 10872, 10927, 10983, 11038, 11094, 11149, 11204, 11260, 11315, 11371, 11426, 11482, 11537, 11593, 11648, 11704, 11759, 11815, 11870, 11926, 11981, 12036, 12092, 12147, 12203, 12258, 12314, 12369, 12425, 12480, 12536, 12591, 12647, 12702, 12758, 12813, 12869, 12924, 12979, 13035, 13090, 13146, 13201, 13257, 13312, 13368, 13423, 13479, 13534, 13590, 13645, 13701, 13756, 13812, 13867, 13922, 13978, 14033, 14089, 14144, 14200, 14255, 14311, 14366, 14422, 14477, 14533, 14588, 14644, 14699, 14755, 14810, 14865, 14921, 14976, 15032, 15087, 15143, 15198, 15254, 15309, 15365, 15420, 15476, 15531, 15587, 15642, 15698, 15753, 15808, 15864, 15919, 15975, 16030, 16086, 16141, 16197, 16252, 16308, 16363, 16419, 16474, 16530, 16585])
rstream = RandomState(MT19937) at 0x7FA24EFCDE40
max_offset = 55
def obs_inds(ko):
48def obs_inds(ko):
49    def random_offset():
50        rstream.seed(ko)
51        u = rstream.rand()
52        return int(np.floor(max_offset * u))
53
54    return jj + random_offset()
def obs_now(ko):
57def obs_now(ko):
58    jj = obs_inds(ko)
59
60    @modelling.ens_compatible
61    def hmod(E):
62        return E[jj]
63
64    # Localization.
65    batch_shape = [2, 2]  # width (in grid points) of each state batch.
66    # Increasing the width
67    #  => quicker analysis (but less rel. speed-up by parallelzt., depending on NPROC)
68    #  => worse (increased) rmse (but width 4 is only slightly worse than 1);
69    #     if inflation is applied locally, then rmse might actually improve.
70    localizer = nd_Id_localization(shape[::-1], batch_shape[::-1], jj, periodic=False)
71
72    Obs = {
73        "M": Ny,
74        "model": hmod,
75        "noise": modelling.GaussRV(C=4 * np.eye(Ny)),
76        "localizer": localizer,
77    }
78
79    # Moving localization mask for smoothers:
80    Obs["loc_shift"] = lambda ii, dt: ii  # no movement (suboptimal, but easy)
81
82    # Jacobian left unspecified coz it's (usually) employed by methods that
83    # compute full cov, which in this case is too big.
84
85    return modelling.Operator(**Obs)
Obs = {'time_dependent': <function <lambda>>}
HMM = HiddenMarkovModel({ 'Dyn': Operator({ 'M': 16641, 'model': <bound method model_config.step of <dapper.mods.QG.model_config object>>, 'noise': GaussRV({ 'mu': array([0., 0., 0., ..., 0., 0., 0.]), 'M': 16641, 'C': 0 }) }), 'Obs': <TimeDependentOperator> .Ops: <function <lambda>>, 'tseq': <Chronology> - K: 300 - Ko: 299 - T: 1500.0 - BurnIn: 250 - dto: 5.0 - dt: 5.0, 'X0': RV({ 'file': PosixPath('/home/runner/dpr_data/samples/QG_samples.npz'), 'M': 16641 }), 'liveplotters': [(1, <function spatial2d.<locals>.init at 0x7fa23fd372e0>), (0, <class 'dapper.tools.liveplotting.spectral_errors'>), (0, <function sliding_marginals.<locals>.init at 0x7fa23fd371c0>)], 'sectors': {}, 'name': 'QG/sakov2008.py' })