dapper.xp_process

Tools (notably xpSpace) for processing and presenting experiment data.

View Source

  1"""Tools (notably `xpSpace`) for processing and presenting experiment data."""
  2
  3import collections
  4import copy
  5import warnings
  6
  7import colorama
  8import numpy as np
  9from mpl_tools import place
 10from patlib.std import nonchalance
 11from struct_tools import AlignedDict, complement, intersect, transps
 12from tabulate import tabulate
 13
 14from dapper.dpr_config import rc
 15from dapper.stats import align_col, unpack_uqs
 16from dapper.tools.colors import color_text, stripe
 17from dapper.tools.rounding import UncertainQtty
 18from dapper.tools.viz import NoneDict, default_styles
 19from dapper.xp_launch import xpList
 20
 21
 22class SparseSpace(dict):
 23    """Subclass of `dict` that enforces key conformity to a given `namedtuple`.
 24
 25    Like a normal `dict`, it can hold any type of objects.
 26    But, since the keys must conform, they effectively follow a coordinate system,
 27    so that the `dict` becomes a vector **space**. Example:
 28    >>> dct = xpSpace(["x", "y", "z"])
 29    >>> dct[(1, 2, 3)] = "pointA"
 30
 31    The coordinate system is specified by the `dims`:
 32    a list of keys defining the `namedtuple` of `self.Coord`.
 33    The above dict only has three `dims`, so this fails:
 34    >>> dct[(1, 2, 3, 4)] = "pointB"  # doctest: +NORMALIZE_WHITESPACE
 35    Traceback (most recent call last):
 36    ...
 37    TypeError: The key (1, 2, 3, 4) did not fit the coord.  system
 38    which has dims ('x', 'y', 'z')
 39
 40    Coordinates can contain any value, including `None`:
 41    >>> dct[(1, 2, None)] = "pointB"
 42
 43    In intended usage, this space is highly sparse,
 44    meaning there are many coordinates with no entry.
 45    Indeed, as a data format for nd-arrays, it may be called
 46    "coordinate list representation", used e.g. by `scipy.sparse.coo_matrix`.
 47
 48    Thus, operations across (potentially multiple) `dims`,
 49    such as optimization or averaging, should be carried out by iterating
 50    -- not over the `dims` -- but over the the list of items.
 51
 52    The most important method is `nest`,
 53    which is used (by `xpSpace.table_tree`) to print and plot results.
 54    This is essentially a "groupby" operation, and indeed the case could
 55    be made that this class should be replaced by `pandas.DataFrame`,
 56    or better yet: <https://github.com/pydata/xarray>.
 57
 58    The `__getitem__` is quite flexible, allowing accessing by:
 59
 60    - The actual key, a `self.Coord` object, or a standard tuple.<br>
 61      Returns single item. Example:
 62
 63            >>> dct[1, 2, 3] == dct[(1, 2, 3)] == dct[dct.Coord(1, 2, 3)] == "pointA"
 64            True
 65
 66    - A `slice` or `list`.<br>
 67      Returns list.<br>
 68      *PS: indexing by slice or list assumes that the dict is ordered,
 69      which we inherit from the builtin `dict` since Python 3.7.
 70      Moreover, it is a reflection of the fact that the internals of this class
 71      work by looping over items.*
 72
 73    In addition, the `subspace` method (also aliased to `__call__`, and is implemented
 74    via `coords_matching`) can be used to select items by the values of a *subset*
 75    of their attributes. It returns a `SparseSpace`.
 76    If there is only a single item it can be accessed as in `dct[()]`.
 77
 78    Inspired by
 79
 80    - https://stackoverflow.com/a/7728830
 81    - https://stackoverflow.com/q/3387691
 82    """
 83
 84    @property
 85    def dims(self):
 86        return self.Coord._fields
 87
 88    def __init__(self, dims):
 89        """Usually initialized through `xpSpace.from_list`.
 90
 91        Parameters
 92        ----------
 93        dims: list or tuple
 94            The attributes defining the coordinate system.
 95        """
 96        # Define coordinate system
 97        self.Coord = collections.namedtuple('Coord', dims)
 98
 99        def repr2(c, keys=False, str_or_repr=repr):
100            if keys:
101                lst = [f"{k}={str_or_repr(v)}" for k, v in c._asdict().items()]
102            else:
103                lst = [str_or_repr(v) for v in c]
104            return "(" + ", ".join(lst) + ")"
105
106        self.Coord.repr2 = repr2
107
108    def update(self, items):
109        """Update dict, using the custom `__setitem__` to ensure key conformity.
110
111        NB: the `kwargs` syntax is not supported because it only works for keys that
112        consist of (a single) string, which is not very interesting for SparseSpace.
113        """
114        # See https://stackoverflow.com/a/2588648
115        # and https://stackoverflow.com/a/2390997
116        try:
117            items = items.items()
118        except AttributeError:
119            pass
120        for k, v in items:
121            self[k] = v
122
123    def __setitem__(self, key, val):
124        """Setitem ensuring coordinate conforms."""
125        try:
126            key = self.Coord(*key)
127        except TypeError:
128            raise TypeError(
129                f"The key {key!r} did not fit the coord. system "
130                f"which has dims {self.dims}")
131        super().__setitem__(key, val)
132
133    def __getitem__(self, key):
134        """Also allows list-indexing by `list` and `slice`."""
135        # List of items (from list of indices)
136        if isinstance(key, list):
137            lst = list(self.values())
138            return [lst[k] for k in key]
139
140        # List of items (from slice)
141        elif isinstance(key, slice):
142            return [*self.values()][key]
143
144        # Single item (by Coord object, or tuple)
145        else:
146            # NB: Dont't use isinstance(key, self.Coord)
147            # coz it fails when the namedtuple (Coord) has been
148            # instantiated in different places (but with equal params).
149            # Also see bugs.python.org/issue7796
150            return super().__getitem__(key)
151
152    def __call__(self, **kwargs):
153        """Shortcut (syntactic sugar) for `SparseSpace.subspace`."""
154        return self.subspace(**kwargs)
155
156    def subspace(self, **kwargs):
157        """Get an affine subspace.
158
159        NB: If you're calling this repeatedly (for all values of the same `kwargs`)
160        then you should consider using `SparseSpace.nest` instead.
161
162        Example:
163        >>> xp_dict.subspace(da_method="EnKF", infl=1, seed=3) # doctest: +SKIP
164        """
165        # Slow version
166        # outer = self.nest(outer_dims=list(kwargs))  # make subspaceS
167        # inner = outer[outer.Coord(**kwargs)]        # discard all but 1
168
169        coords = self.coords_matching(**kwargs)
170        inner = self.__class__(complement(self.dims, kwargs))
171        for coord in coords:
172            inner[inner.coord_from_attrs(coord)] = self[coord]
173
174        return inner
175
176    def coords_matching(self, **kwargs):
177        """Get all `coord`s matching kwargs.
178
179        Used by `SparseSpace.label_xSection` and `SparseSpace.subspace`. Unlike the
180        latter, this function returns a *list* of *keys* of the *original subspace*.
181
182        Note that the `missingval` shenanigans of `xpList.inds` are here unnecessary
183        since each coordinate is complete.
184        """
185        def match(coord):
186            return all(getattr(coord, k) == kwargs[k] for k in kwargs)
187
188        return [c for c in self if match(c)]
189
190    def coord_from_attrs(self, obj):
191        """Form a `coord` for this `xpSpace` by extracting attrs. from `obj`.
192
193        For instances of `self.Coord`, this is the identity opeartor, i.e.
194
195            self.coord_from_attrs(coord) == coord
196        """
197        coord = (getattr(obj, a, None) for a in self.dims)
198        return self.Coord(*coord)
199
200    def __repr__(self):
201        txt  = f"<{self.__class__.__name__}>"
202        txt += " with Coord/dims: "
203        try:
204            txt += "(and ticks): " + str(AlignedDict(self.ticks))
205        except AttributeError:
206            txt += str(self.dims) + "\n"
207
208        # Note: print(xpList(self)) produces a more human-readable table,
209        # but requires prep_table(), which we don't really want to call again
210        # (it's only called in from_list, not (necessarily) in any nested spaces)
211        L = 2
212        keys = [k.repr2() for k in self]
213        if 2*L < len(keys):
214            keys = keys[:L] + ["..."] + keys[-L:]
215        keys = "[\n  " + ",\n  ".join(keys) + "\n]"
216        return txt + f"populated by {len(self)} items with keys: {keys}"
217
218    def nest(self, inner_dims=None, outer_dims=None):
219        """Project along `inner_acces` to yield a new `xpSpace` with dims `outer_dims`
220
221        The entries of this `xpSpace` are themselves `xpSpace`s, with dims `inner_dims`,
222        each one regrouping the entries with the same (projected) coordinate.
223
224        Note: this method could also be called `groupby`.
225        Note: this method is also called by `__getitem__(key)` if `key` is dict.
226        """
227        # Default: a singleton outer space,
228        # with everything contained in the inner (projection) space.
229        if inner_dims is None and outer_dims is None:
230            outer_dims = ()
231
232        # Validate dims
233        if inner_dims is None:
234            assert outer_dims is not None
235            inner_dims = complement(self.dims, outer_dims)
236        else:
237            assert outer_dims is None
238            outer_dims = complement(self.dims, inner_dims)
239
240        # Fill spaces
241        outer_space = self.__class__(outer_dims)
242        for coord, entry in self.items():
243            # Lookup subspace coord
244            outer_coord = outer_space.coord_from_attrs(coord)
245            try:
246                # Get subspace
247                inner_space = outer_space[outer_coord]
248            except KeyError:
249                # Create subspace, embed
250                inner_space = self.__class__(inner_dims)
251                outer_space[outer_coord] = inner_space
252            # Add entry to subspace, similar to .fill()
253            inner_space[inner_space.coord_from_attrs(coord)] = entry
254
255        return outer_space
256
257    def intersect_dims(self, attrs):
258        """Rm those `a` in `attrs` that are not in `self.dims`.
259
260        This enables sloppy `dims` allotment, for ease-of-use.
261        """
262        absent = complement(attrs, self.dims)
263        if absent:
264            print(color_text("Warning:", colorama.Fore.RED),
265                  "The requested attributes",
266                  color_text(str(absent), colorama.Fore.RED),
267                  ("were not found among the xpSpace dims"
268                   " (attrs. used as coordinates for the set of experiments)."
269                   " This may be no prob. if the attrs are redundant for the coord-sys."
270                   " However, if due to confusion or mis-spelling, then it is likely"
271                   " to cause mis-interpretation of the shown results."))
272            attrs = complement(attrs, absent)
273        return attrs
274
275    def append_dim(self, dim):
276        """Expand `self.Coord` by `dim`. For each item, insert `None` in new dim."""
277        self.__init__(self.dims+(dim,))
278        for coord in list(self):
279            entry = self.pop(coord)
280            self[coord + (None,)] = entry
281
282    def label_xSection(self, label, *NoneAttrs, **sub_coord):
283        """Insert duplicate entries for the given cross-section.
284
285        Works by adding the attr. `xSection` to the dims of `SparseSpace`,
286        and setting it to `label` for entries matching `sub_coord`,
287        reflecting the "constance/constraint/fixation" this represents.
288        This distinguishes the entries in this fixed-affine subspace,
289        preventing them from being gobbled up by the operations of `nest`.
290
291        If you wish, you can specify the `NoneAttrs`,
292        which are consequently set to None for the duplicated entries,
293        preventing them from being shown in plot labels and tuning panels.
294        """
295        if "xSect" not in self.dims:
296            self.append_dim('xSect')
297
298        for coord in self.coords_matching(**self.intersect_dims(sub_coord)):
299            entry = copy.deepcopy(self[coord])
300            coord = coord._replace(xSect=label)
301            coord = coord._replace(**{a: None for a in NoneAttrs})
302            self[coord] = entry
303
304
305DIM_ROLES = dict(outer=None, inner=None, mean=None, optim=None)
306
307
308class xpSpace(SparseSpace):
309    """Functionality to facilitate working with `xps` and their results."""
310
311    @classmethod
312    def from_list(cls, xps, tick_ordering=None):
313        """Init. from a list of objects, typically experiments referred to as `xp`s.
314
315        - Computes the relevant `dims` from the attributes, and
316        - Fills the dict by `xp`s.
317        - Computes and writes the attribute `ticks`.
318
319        This creates a `SparseSpace` of `xp`s. However, the nested subspaces generated
320        by `xpSpace.table_tree` (for printing and plotting) will hold objects of type
321        `UncertainQtty`, because it calls `mean` which calls `get_stat(statkey)`.
322        """
323        # Define and fill SparseSpace
324        dct = xpList(xps).prep_table(nomerge=['xSect'])[0]
325        self = cls(dct.keys())
326        self.fill(xps)
327        self.make_ticks(dct, tick_ordering)
328        return self
329
330    def make_ticks(self, dct, ordering=None):
331        """Unique & sort, for each individual "dim" in `dct`. Assign to `self.ticks`.
332
333        NB: `self.ticks` will not "propagate" through `SparseSpace.nest` or the like.
334        """
335        self.ticks = dct
336        ordering = ordering or {}
337        for name, values in dct.items():
338            ticks = set(values)  # unique (jumbles order)
339            order = ordering.get(name, 'as-found')
340
341            # Sort key
342            if callable(order):
343                key = order
344            elif 'as-found' in order:
345                key = values.index
346            else:  # "natural"
347                def key(x):
348                    return x
349
350            # Place None's at the end
351            def key_safe(x):
352                return (x is None), key(x)
353
354            # Sort
355            ticks = sorted(ticks, key=key_safe)
356            # Reverse
357            if isinstance(order, str) and "rev" in order:
358                ticks = ticks[::-1]
359            # Assign
360            dct[name] = ticks
361
362    def fill(self, xps):
363        """Mass insertion."""
364        self.update([(self.coord_from_attrs(xp), xp) for xp in xps])
365
366    def squeeze(self):
367        """Eliminate unnecessary dimensions."""
368        squeezed = xpSpace(xpList(self).prep_table()[0])
369        squeezed.fill(self)
370        return squeezed
371
372    def get_stat(self, statkey):
373        """Make `xpSpace` with same `Coord` as `self`, but values `xp.avrgs.statkey`."""
374        # Init a new xpDict to hold stat
375        avrgs = self.__class__(self.dims)
376
377        not_found = set()
378        for coord, xp in self.items():
379            try:
380                avrgs[coord] = getattr(xp.avrgs, statkey)
381            except AttributeError:
382                not_found.add(coord)
383
384        if len(not_found) == len(self):
385            raise AttributeError(
386                f"The stat. '{statkey}' was not found among **any** of the xp's.")
387        elif not_found:
388            print(color_text("Warning:", "RED"), f"no stat. '{statkey}' found for")
389            print(*not_found, sep="\n")
390
391        return avrgs
392
393    def mean(self, dims=None):
394        """Compute mean over `dims` (a list). Returns `xpSpace` without those `dims`."""
395        # Note: The case `dims=()` should work w/o special treatment.
396        if dims is None:
397            return self
398
399        nested = self.nest(dims)
400        for coord, space in nested.items():
401
402            def getval(uq):
403                return uq.val if isinstance(uq, UncertainQtty) else uq
404            vals = [getval(uq) for uq in space.values()]
405
406            # Don't use nanmean! It would give false impressions.
407            mu = np.mean(vals)
408
409            with warnings.catch_warnings():
410                warnings.simplefilter("ignore", category=RuntimeWarning)
411                # Don't print warnings caused by N=1.
412                # It already correctly yield nan's.
413                var = np.var(vals, ddof=1)
414
415            N = len(vals)
416            uq = UncertainQtty(mu, np.sqrt(var/N))
417            uq.nTotal   = N
418            uq.nFail    = N - np.isfinite(vals).sum()
419            uq.nSuccess = N - uq.nFail
420
421            nested[coord] = uq
422        return nested
423
424    def tune(self, dims=None, costfun=None):
425        """Get (compile/tabulate) a stat. optimised wrt. tuning params (`dims`)."""
426        # Define cost-function
427        costfun = (costfun or 'increasing').lower()
428        if 'increas' in costfun:
429            costfun = (lambda x: +x)
430        elif 'decreas' in costfun:
431            costfun = (lambda x: -x)
432        else:
433            assert callable(costfun)  # custom
434
435        # Note: The case `dims=()` should work w/o special treatment.
436        if dims is None:
437            return self
438
439        nested = self.nest(dims)
440        for coord, space in nested.items():
441            # Find optimal value (and coord) within space
442            MIN = np.inf
443            found_any = False
444            for inner_coord, uq in space.items():
445                cost = costfun(uq.val)
446                if cost <= MIN:
447                    found_any          = True
448                    MIN                = cost
449                    uq_opt             = uq
450                    uq_opt.tuned_coord = inner_coord
451
452            if not found_any:
453                uq_opt = uq  # one is as good as another
454                nDim = range(len(space.Coord._fields))
455                uq_opt.tuned_coord = space.Coord(*(None for _ in nDim))
456
457            nested[coord] = uq_opt
458
459        return nested
460
461    def table_tree(self, statkey, dims, *, costfun=None):
462        """Make hierarchy `outer > inner > mean > optim` using `SparseSpace.nest`.
463
464        The dimension passed to `nest` (at each level) is specified by `dims`.
465        The dimensions of `dims['mean']` and `dims['optim']` get eliminated
466        by the mean/tune operations. The `dims['outer']` and `dims['inner']
467        become the keys for the output hierarchy.
468
469        .. note::
470            cannot support multiple `statkey`s because it's not (obviously) meaningful
471            when optimizing over `dims['optim']`.
472        """
473        def validate_dims(dims):
474            """Validate dims."""
475            role_register = {}
476            new = {}
477            for role in set(dims) | set(DIM_ROLES):
478                assert role in DIM_ROLES, f"Invalid role {role!r}"
479                dd = dims.get(role, DIM_ROLES[role])
480
481                if dd is None:
482                    # Don't convert None to (), allowing None to remain special.
483                    pass
484
485                else:
486                    # Ensure iterable
487                    if isinstance(dd, str) or not hasattr(dd, "__iter__"):
488                        dd = (dd,)
489
490                    # Keep relevant only
491                    dd = self.intersect_dims(dd)
492
493                    # Ensure each dim plays a single-role
494                    for dim in dd:
495                        if dim in role_register:
496                            raise TypeError(
497                                f"A dim (here {dim!r}) cannot be assigned to 2"
498                                f" roles (here {role!r} and {role_register[dim]!r}).")
499                        else:
500                            role_register[dim] = role
501                new[role] = dd
502            return new
503
504        def mean_tune(xp_dict):
505            """Take mean, then tune.
506
507            Note: the `SparseSpace` implementation should be sufficiently
508            "uncluttered" that `mean_tune` (or a few of its code lines)
509            could be called anywhere above/between/below
510            the `nest`ing of `outer` or `inner`.
511            These possibile call locations are commented in the code.
512            """
513            uq_dict = xp_dict.get_stat(statkey)
514            uq_dict = uq_dict.mean(dims['mean'])
515            uq_dict = uq_dict.tune(dims['optim'], costfun)
516            return uq_dict
517
518        dims = validate_dims(dims)
519        self2 = mean_tune(self)
520        # Prefer calling mean_tune() [also see its docstring]
521        # before doing outer/inner nesting. This is because then the dims of
522        # a row (xpSpace) should not include mean&optim, and thus:
523        #  - Column header/coords may be had directly as row.keys(),
524        #    without extraction by coord_from_attrs() from (e.g.) row[0].
525        #  - Don't need to propagate mean&optim dims down to the row level.
526        #    which would require defining rows by the nesting:
527        #    rows = table.nest(outer_dims=complement(table.dims,
528        #        *(dims['inner'] or ()),
529        #        *(dims['mean']  or ()),
530        #        *(dims['optim'] or ()) ))
531        #  - Each level of the output from table_tree
532        #    is a smaller (and more manageable) dict.
533
534        tables = self2.nest(outer_dims=dims['outer'])
535        for table_coord, table in tables.items():
536            # table = mean_tune(table)
537
538            # Should not be used (nesting as rows is more natural,
539            # and is required for getting distinct/row_keys).
540            # cols = table.nest(outer_dims=dims['inner'])
541
542            rows = table.nest(inner_dims=dims['inner'] or ())
543
544            # Overwrite table by its nesting as rows
545            tables[table_coord] = rows
546
547            # for row_coord, row in rows.items():
548            # rows[row_coord] = mean_tune(row)
549
550        args = dict(statkey=statkey, xp_dict=self, dims=dims)
551        tables.created_with = args
552        return dims, tables
553
554    def tickz(self, dim_name):
555        """Dimension (axis) ticks without None"""
556        return [x for x in self.ticks[dim_name] if x is not None]
557
558    def print(self, statkey, dims,  # noqa (shadowing builtin)
559              subcols=True, decimals=None, costfun=None,
560              squeeze_labels=True, colorize=True, title=None):
561        """Print tables of results.
562
563        Parameters
564        ----------
565        statkey: str
566            The statistic to extract from the `xp.avrgs` for each `xp`.
567            Examples: `"rmse.a"` (i.e. `"err.rms.a"`), `"rmse.ocean.a"`, `"duration"`.
568        dims: dict
569            Allots (maps) the dims of `xpSpace` to different roles in the tables.
570
571            - The "role" `outer` should list the dims/attributes
572              used to define the splitting of the results into *separate tables*:
573              one table for each distinct combination of attributes.
574            - Similarly , the role `inner` determines which attributes
575              split a table into its columns.
576            - `mean` lists the attributes over which the mean is taken
577              (for that row & column)
578            - `optim` lists the attributes used over which the optimum
579               is searched for (after taking the mean).
580
581            Example:
582
583                dict(outer='da_method', inner='N', mean='seed',
584                     optim=('infl','loc_rad'))
585
586            Equivalently, use `mean=("seed",)`.
587            It is acceptible to leave this empty: `mean=()` or `mean=None`.
588        subcols: bool
589            If `True`, then subcolumns are added to indicate
590
591            - `1σ`: the confidence interval. If `mean=None` is used, this simply reports
592              the value `.prec` of the `statkey`, providing this is an `UncertainQtty`.
593              Otherwise, it is computed as `sqrt(var(xps)/N)`,
594              where `xps` is the set of statistic gathered over the `mean` dimensions.
595            - `*(optim)`: the optimal point (among all `optim` attributes),
596              as defined by `costfun`.
597            - `☠`: the number of failures (non-finite values) at that point.
598            - `✓`: the number of successes that go into the value
599        decimals: int
600            Number of decimals to print.
601            If `None`, this is determined for each statistic by its uncertainty.
602        costfun: str or function
603            Use `'increasing'` (default) or `'decreasing'` to indicate that the optimum
604            is defined as the lowest or highest value of the `statkey` found.
605        squeeze_labels: bool
606            Don't include redundant attributes in the line labels.
607            Caution: `get_style` will not be able to access the eliminated attrs.
608        colorize: bool
609            Add color to tables for readability.
610        """
611        # Title
612        if title is not None:
613            if colorize:
614                clrs = colorama.Back.LIGHTBLUE_EX, colorama.Fore.BLACK
615                title = color_text(str(title), *clrs)
616            print(title)
617
618        # Inform dims["mean"]
619        if dims.get('mean', None):
620            print(f"Averages (in time and) over {dims['mean']}.")
621        else:
622            print("Averages in time only"
623                  " (=> the 1σ estimates may be unreliable).")
624
625        def make_cols(rows, cc, subcols, h2):
626            """Subcolumns: align, justify, join."""
627            # Define subcol formats
628            if subcols:
629                templ = "{val} ±{prec}"
630                templ += "" if dims['optim'] is None else " *{tuned_coord}"
631                templ += "" if dims['mean' ] is None else " {nFail} {nSuccess}"
632                aligns = dict(prec="<", tuned_coord="<")
633
634            def align(column, idx):
635                if idx == 0:
636                    headers = dict(val=statkey, prec="1σ", tuned_coord=dims["optim"])
637                else:
638                    headers = dict(val="", prec="1σ", tuned_coord="")
639                headers.update(nFail="☠", nSuccess="✓")
640
641                col = unpack_uqs(column, decimals)
642
643                if subcols:
644                    for key in list(col):
645                        if key in templ:
646                            subcolmn = [headers.get(key, key)] + col[key]
647                            col[key] = align_col(subcolmn, just=aligns.get(key, ">"))
648                        else:
649                            del col[key]
650                    col = [templ.format(**row) for row in transps(col)]
651                else:
652                    col = align_col([headers["val"]] + col["val"])
653                return col
654
655            def super_header(col_coord, idx, col):
656                header, matter = col[0], col[1:]
657                cc = col_coord.repr2(not idx, str).strip("()").replace(", ", ",")
658                cc = cc.center(len(header), "_")  # +1 width for wide chars like ✔️
659                return [cc + "\n" + header] + matter
660
661            # Transpose
662            columns = [list(x) for x in zip(*rows)]
663
664            # Format column
665            for j, (col_coord, column) in enumerate(zip(cc, columns)):
666                col = align(column, j)
667                if h2:
668                    col = super_header(col_coord, j, col)
669                columns[j] = col
670
671            # Un-transpose
672            rows = [list(x) for x in zip(*columns)]
673
674            return rows
675
676        dims, tables = self.table_tree(statkey, dims, costfun=costfun)
677
678        for table_coord, table in tables.items():
679
680            # Get table's column coords/ticks (cc).
681            # cc is really a set, but we use dict for ordering.
682            # cc = self.ticks[dims["inner"]]  # may be > needed
683            # cc = table[0].keys()            # may be < needed
684            cc = {c: None for row in table.values() for c in row}
685            # Could additionally do cc = table.squeeze() but is it worth it?
686
687            # Convert table (rows) into rows (lists) of equal length
688            rows = [[row.get(c, None) for c in cc] for row in table.values()]
689
690            # Align cols
691            h2 = "\n" if len(cc) > 1 else ""  # super-header?
692            headers, *rows = make_cols(rows, cc, subcols, h2)
693
694            # Prepend left-side (attr) table
695            if squeeze_labels:
696                table = table.squeeze()
697            headers = [h2+k for k in table.dims] + [h2+'⑊'] + headers
698            for i, (key, row) in enumerate(zip(table, rows)):
699                rows[i] = [*key] + ['|'] + row
700
701            print()
702            if dims['outer']:
703                # Title
704                table_title = "Table for " + table_coord.repr2(True).strip("()")
705                if colorize:
706                    clrs = colorama.Back.YELLOW, colorama.Fore.BLACK
707                    table_title = color_text(table_title, *clrs)
708                print(table_title)
709            table = tabulate(rows, headers).replace('␣', ' ')
710            if colorize:
711                table = stripe(table, slice(2, None))
712            print(table)
713
714        return tables
715
716    def plot(self, statkey, dims, get_style=default_styles,
717             fignum=None, figsize=None, panels=None, costfun=None,
718             title1=None, title2=None, unique_labels=True, squeeze_labels=True):
719        """Plot (tables of) results.
720
721        Analagously to `xpSpace.print`,
722        the averages are grouped by `dims["inner"]`,
723        which here plays the role of the x-axis.
724
725        The averages can also be grouped by `dims["outer"]`,
726        producing a figure with multiple (columns of) panels.
727
728        The optimal points/parameters/attributes are plotted in smaller panels
729        below the main plot. This can be turned off by providing the figure
730        dims through the `panels` argument.
731
732        The parameters `statkey`, `dims`, `costfun`, `sqeeze_labels`
733        are documented in `xpSpace.print`.
734
735        Parameters
736        ----------
737        get_style: function
738            A function that takes an object, and returns a dict of line styles,
739            usually as a function of the object's attributes.
740        title1: anything
741            Figure title (in addition to the the defaults).
742        title2: anything
743            Figure title (in addition to the defaults). Goes on a new line.
744        unique_labels: bool
745            Only show a given line label once, even if it appears in several panels.
746        squeeze_labels:
747            Don't include redundant attributes in the labels.
748        """
749        def plot1(panelcol, row, style):
750            """Plot a given line (row) in the main panel and the optim panels.
751
752            Involves: Sort, insert None's, handle constant lines.
753            """
754            # Make a full row (yy) of vals, whether is_constant or not.
755            # is_constant = (len(row)==1 and next(iter(row))==row.Coord(None))
756            is_constant = all(x == row.Coord(None) for x in row)
757            if is_constant:
758                yy = [row[None, ] for _ in xticks]
759                style.marker = None
760            else:
761                yy = [row.get(row.Coord(x), None) for x in xticks]
762
763            # Plot main
764            row.vals = [getattr(y, 'val', None) for y in yy]
765            row.handles = {}
766            row.handles["main_panel"] = panelcol[0].plot(xticks, row.vals, **style)[0]
767
768            # Plot tuning params
769            row.tuned_coords = {}  # Store ordered, "transposed" argmins
770            argmins = [getattr(y, 'tuned_coord', None) for y in yy]
771            for a, panel in zip(dims["optim"] or (), panelcol[1:]):
772                yy = [getattr(coord, a, None) for coord in argmins]
773                row.tuned_coords[a] = yy
774
775                # Plotting all None's sets axes units (like any plotting call)
776                # which can cause trouble if the axes units were actually supposed
777                # to be categorical (eg upd_a), but this is only revealed later.
778                if not all(y == None for y in yy):
779                    style["alpha"] = 0.2
780                    row.handles[a] = panel.plot(xticks, yy, **style)
781
782        def label_management(table):
783            def pruner(style):
784                label = style.get("label", None)
785                if unique_labels:
786                    if label in register:
787                        del style["label"]
788                    elif label:
789                        register.add(style["label"])
790                        pruner.has_labels = True
791                elif label:
792                    pruner.has_labels = True
793            pruner.has_labels = False
794
795            def squeezer(coord):
796                return intersect(coord._asdict(), label_attrs)
797            if squeeze_labels:
798                label_attrs = xpList(table.keys()).prep_table()[0]
799            else:
800                label_attrs = table.dims
801
802            return pruner, squeezer
803        register = set()
804
805        def beautify(panels, title, has_labels):
806            panel0 = panels[0]
807            # panel0.set_title(title)
808            panel0.text(.5, 1, title, fontsize=12, ha="center", va="bottom",
809                        transform=panel0.transAxes, bbox=dict(
810                            facecolor='lightyellow', edgecolor='k',
811                            alpha=0.99, boxstyle="round,pad=0.25",
812                            # NB: padding makes label spill into axes
813                        ))
814            if has_labels:
815                panel0.legend()
816            if panel0.is_first_col():
817                panel0.set_ylabel(statkey)
818            panels[-1].set_xlabel(dims["inner"][0])
819            # Tuning panels:
820            for a, panel in zip(dims["optim"] or (), panels[1:]):
821                if panel.is_first_col():
822                    panel.set_ylabel(f"Optim.\n{a}")
823
824        # Nest dims through table_tree()
825        dims, tables = self.table_tree(statkey, dims, costfun=costfun)
826        assert len(dims["inner"]) == 1, "You must chose a valid attr. for the abscissa."
827
828        if not hasattr(self, "ticks"):
829            # TODO 6: this is probationary.
830            # In case self is actually a subspace, it may be that it does not contain
831            # all of the ticks of the original xpSpace. This may be fine,
832            # and we generate the ticks here again. However, this is costly-ish, so you
833            # should maybe simply (manually) assign them from the original xpSpace.
834            # And maybe you actually want the plotted lines to have holes where self
835            # has no values. Changes in the ticks are not obvious to the naked eye,
836            # unlike the case for printed tables (where column changes are quite clear).
837            print(color_text("Warning:", colorama.Fore.RED), "Making new x-ticks."
838                  "\nConsider assigning them yourself from the original"
839                  " xpSpace to this subspace.")
840            self.make_ticks(xpList(self).prep_table()[0])
841        xticks = self.tickz(dims["inner"][0])
842
843        # Create figure axes
844        if panels is None:
845            nrows   = len(dims['optim'] or ()) + 1
846            ncols   = len(tables)
847            maxW    = 12.7  # my mac screen
848            figsize = figsize or (min(5*ncols, maxW), 7)
849            gs      = dict(
850                height_ratios=[6]+[1]*(nrows-1),
851                hspace=0.05, wspace=0.05,
852                # eyeballed:
853                left=0.15/(1+np.log(ncols)),
854                right=0.97, bottom=0.06, top=0.9)
855            # Create
856            _, panels = place.freshfig(num=fignum, figsize=figsize,
857                                       nrows=nrows, sharex=True,
858                                       ncols=ncols, sharey='row',
859                                       gridspec_kw=gs, squeeze=False)
860        else:
861            panels = np.atleast_2d(panels)
862
863        # Fig. Title
864        fig = panels[0, 0].figure
865        fig_title = "Averages wrt. time"
866        if dims["mean"] is not None:
867            fig_title += " and " + ", ".join([repr(c) for c in dims['mean']])
868        if title1 is not None:
869            fig_title += ". " + title1
870        if title2 is not None:
871            with nonchalance():
872                title2 = title2.relative_to(rc.dirs["data"])
873            fig_title += "\n" + str(title2)
874        fig.suptitle(fig_title)
875
876        # Loop outer
877        for ax_column, (table_coord, table) in zip(panels.T, tables.items()):
878            table.panels = ax_column
879            label_prune, label_squeeze = label_management(table)
880            for coord, row in table.items():
881                style = get_style(NoneDict(label_squeeze(coord)))
882                label_prune(style)
883                plot1(table.panels, row, style)
884
885            beautify(table.panels,
886                     title=("" if dims["outer"] is None else
887                            table_coord.repr2(True).strip("()")),
888                     has_labels=label_prune.has_labels)
889
890        tables.fig = fig  # add reference to fig
891        return tables

class SparseSpace(builtins.dict): View Source

 23class SparseSpace(dict):
 24    """Subclass of `dict` that enforces key conformity to a given `namedtuple`.
 25
 26    Like a normal `dict`, it can hold any type of objects.
 27    But, since the keys must conform, they effectively follow a coordinate system,
 28    so that the `dict` becomes a vector **space**. Example:
 29    >>> dct = xpSpace(["x", "y", "z"])
 30    >>> dct[(1, 2, 3)] = "pointA"
 31
 32    The coordinate system is specified by the `dims`:
 33    a list of keys defining the `namedtuple` of `self.Coord`.
 34    The above dict only has three `dims`, so this fails:
 35    >>> dct[(1, 2, 3, 4)] = "pointB"  # doctest: +NORMALIZE_WHITESPACE
 36    Traceback (most recent call last):
 37    ...
 38    TypeError: The key (1, 2, 3, 4) did not fit the coord.  system
 39    which has dims ('x', 'y', 'z')
 40
 41    Coordinates can contain any value, including `None`:
 42    >>> dct[(1, 2, None)] = "pointB"
 43
 44    In intended usage, this space is highly sparse,
 45    meaning there are many coordinates with no entry.
 46    Indeed, as a data format for nd-arrays, it may be called
 47    "coordinate list representation", used e.g. by `scipy.sparse.coo_matrix`.
 48
 49    Thus, operations across (potentially multiple) `dims`,
 50    such as optimization or averaging, should be carried out by iterating
 51    -- not over the `dims` -- but over the the list of items.
 52
 53    The most important method is `nest`,
 54    which is used (by `xpSpace.table_tree`) to print and plot results.
 55    This is essentially a "groupby" operation, and indeed the case could
 56    be made that this class should be replaced by `pandas.DataFrame`,
 57    or better yet: <https://github.com/pydata/xarray>.
 58
 59    The `__getitem__` is quite flexible, allowing accessing by:
 60
 61    - The actual key, a `self.Coord` object, or a standard tuple.<br>
 62      Returns single item. Example:
 63
 64            >>> dct[1, 2, 3] == dct[(1, 2, 3)] == dct[dct.Coord(1, 2, 3)] == "pointA"
 65            True
 66
 67    - A `slice` or `list`.<br>
 68      Returns list.<br>
 69      *PS: indexing by slice or list assumes that the dict is ordered,
 70      which we inherit from the builtin `dict` since Python 3.7.
 71      Moreover, it is a reflection of the fact that the internals of this class
 72      work by looping over items.*
 73
 74    In addition, the `subspace` method (also aliased to `__call__`, and is implemented
 75    via `coords_matching`) can be used to select items by the values of a *subset*
 76    of their attributes. It returns a `SparseSpace`.
 77    If there is only a single item it can be accessed as in `dct[()]`.
 78
 79    Inspired by
 80
 81    - https://stackoverflow.com/a/7728830
 82    - https://stackoverflow.com/q/3387691
 83    """
 84
 85    @property
 86    def dims(self):
 87        return self.Coord._fields
 88
 89    def __init__(self, dims):
 90        """Usually initialized through `xpSpace.from_list`.
 91
 92        Parameters
 93        ----------
 94        dims: list or tuple
 95            The attributes defining the coordinate system.
 96        """
 97        # Define coordinate system
 98        self.Coord = collections.namedtuple('Coord', dims)
 99
100        def repr2(c, keys=False, str_or_repr=repr):
101            if keys:
102                lst = [f"{k}={str_or_repr(v)}" for k, v in c._asdict().items()]
103            else:
104                lst = [str_or_repr(v) for v in c]
105            return "(" + ", ".join(lst) + ")"
106
107        self.Coord.repr2 = repr2
108
109    def update(self, items):
110        """Update dict, using the custom `__setitem__` to ensure key conformity.
111
112        NB: the `kwargs` syntax is not supported because it only works for keys that
113        consist of (a single) string, which is not very interesting for SparseSpace.
114        """
115        # See https://stackoverflow.com/a/2588648
116        # and https://stackoverflow.com/a/2390997
117        try:
118            items = items.items()
119        except AttributeError:
120            pass
121        for k, v in items:
122            self[k] = v
123
124    def __setitem__(self, key, val):
125        """Setitem ensuring coordinate conforms."""
126        try:
127            key = self.Coord(*key)
128        except TypeError:
129            raise TypeError(
130                f"The key {key!r} did not fit the coord. system "
131                f"which has dims {self.dims}")
132        super().__setitem__(key, val)
133
134    def __getitem__(self, key):
135        """Also allows list-indexing by `list` and `slice`."""
136        # List of items (from list of indices)
137        if isinstance(key, list):
138            lst = list(self.values())
139            return [lst[k] for k in key]
140
141        # List of items (from slice)
142        elif isinstance(key, slice):
143            return [*self.values()][key]
144
145        # Single item (by Coord object, or tuple)
146        else:
147            # NB: Dont't use isinstance(key, self.Coord)
148            # coz it fails when the namedtuple (Coord) has been
149            # instantiated in different places (but with equal params).
150            # Also see bugs.python.org/issue7796
151            return super().__getitem__(key)
152
153    def __call__(self, **kwargs):
154        """Shortcut (syntactic sugar) for `SparseSpace.subspace`."""
155        return self.subspace(**kwargs)
156
157    def subspace(self, **kwargs):
158        """Get an affine subspace.
159
160        NB: If you're calling this repeatedly (for all values of the same `kwargs`)
161        then you should consider using `SparseSpace.nest` instead.
162
163        Example:
164        >>> xp_dict.subspace(da_method="EnKF", infl=1, seed=3) # doctest: +SKIP
165        """
166        # Slow version
167        # outer = self.nest(outer_dims=list(kwargs))  # make subspaceS
168        # inner = outer[outer.Coord(**kwargs)]        # discard all but 1
169
170        coords = self.coords_matching(**kwargs)
171        inner = self.__class__(complement(self.dims, kwargs))
172        for coord in coords:
173            inner[inner.coord_from_attrs(coord)] = self[coord]
174
175        return inner
176
177    def coords_matching(self, **kwargs):
178        """Get all `coord`s matching kwargs.
179
180        Used by `SparseSpace.label_xSection` and `SparseSpace.subspace`. Unlike the
181        latter, this function returns a *list* of *keys* of the *original subspace*.
182
183        Note that the `missingval` shenanigans of `xpList.inds` are here unnecessary
184        since each coordinate is complete.
185        """
186        def match(coord):
187            return all(getattr(coord, k) == kwargs[k] for k in kwargs)
188
189        return [c for c in self if match(c)]
190
191    def coord_from_attrs(self, obj):
192        """Form a `coord` for this `xpSpace` by extracting attrs. from `obj`.
193
194        For instances of `self.Coord`, this is the identity opeartor, i.e.
195
196            self.coord_from_attrs(coord) == coord
197        """
198        coord = (getattr(obj, a, None) for a in self.dims)
199        return self.Coord(*coord)
200
201    def __repr__(self):
202        txt  = f"<{self.__class__.__name__}>"
203        txt += " with Coord/dims: "
204        try:
205            txt += "(and ticks): " + str(AlignedDict(self.ticks))
206        except AttributeError:
207            txt += str(self.dims) + "\n"
208
209        # Note: print(xpList(self)) produces a more human-readable table,
210        # but requires prep_table(), which we don't really want to call again
211        # (it's only called in from_list, not (necessarily) in any nested spaces)
212        L = 2
213        keys = [k.repr2() for k in self]
214        if 2*L < len(keys):
215            keys = keys[:L] + ["..."] + keys[-L:]
216        keys = "[\n  " + ",\n  ".join(keys) + "\n]"
217        return txt + f"populated by {len(self)} items with keys: {keys}"
218
219    def nest(self, inner_dims=None, outer_dims=None):
220        """Project along `inner_acces` to yield a new `xpSpace` with dims `outer_dims`
221
222        The entries of this `xpSpace` are themselves `xpSpace`s, with dims `inner_dims`,
223        each one regrouping the entries with the same (projected) coordinate.
224
225        Note: this method could also be called `groupby`.
226        Note: this method is also called by `__getitem__(key)` if `key` is dict.
227        """
228        # Default: a singleton outer space,
229        # with everything contained in the inner (projection) space.
230        if inner_dims is None and outer_dims is None:
231            outer_dims = ()
232
233        # Validate dims
234        if inner_dims is None:
235            assert outer_dims is not None
236            inner_dims = complement(self.dims, outer_dims)
237        else:
238            assert outer_dims is None
239            outer_dims = complement(self.dims, inner_dims)
240
241        # Fill spaces
242        outer_space = self.__class__(outer_dims)
243        for coord, entry in self.items():
244            # Lookup subspace coord
245            outer_coord = outer_space.coord_from_attrs(coord)
246            try:
247                # Get subspace
248                inner_space = outer_space[outer_coord]
249            except KeyError:
250                # Create subspace, embed
251                inner_space = self.__class__(inner_dims)
252                outer_space[outer_coord] = inner_space
253            # Add entry to subspace, similar to .fill()
254            inner_space[inner_space.coord_from_attrs(coord)] = entry
255
256        return outer_space
257
258    def intersect_dims(self, attrs):
259        """Rm those `a` in `attrs` that are not in `self.dims`.
260
261        This enables sloppy `dims` allotment, for ease-of-use.
262        """
263        absent = complement(attrs, self.dims)
264        if absent:
265            print(color_text("Warning:", colorama.Fore.RED),
266                  "The requested attributes",
267                  color_text(str(absent), colorama.Fore.RED),
268                  ("were not found among the xpSpace dims"
269                   " (attrs. used as coordinates for the set of experiments)."
270                   " This may be no prob. if the attrs are redundant for the coord-sys."
271                   " However, if due to confusion or mis-spelling, then it is likely"
272                   " to cause mis-interpretation of the shown results."))
273            attrs = complement(attrs, absent)
274        return attrs
275
276    def append_dim(self, dim):
277        """Expand `self.Coord` by `dim`. For each item, insert `None` in new dim."""
278        self.__init__(self.dims+(dim,))
279        for coord in list(self):
280            entry = self.pop(coord)
281            self[coord + (None,)] = entry
282
283    def label_xSection(self, label, *NoneAttrs, **sub_coord):
284        """Insert duplicate entries for the given cross-section.
285
286        Works by adding the attr. `xSection` to the dims of `SparseSpace`,
287        and setting it to `label` for entries matching `sub_coord`,
288        reflecting the "constance/constraint/fixation" this represents.
289        This distinguishes the entries in this fixed-affine subspace,
290        preventing them from being gobbled up by the operations of `nest`.
291
292        If you wish, you can specify the `NoneAttrs`,
293        which are consequently set to None for the duplicated entries,
294        preventing them from being shown in plot labels and tuning panels.
295        """
296        if "xSect" not in self.dims:
297            self.append_dim('xSect')
298
299        for coord in self.coords_matching(**self.intersect_dims(sub_coord)):
300            entry = copy.deepcopy(self[coord])
301            coord = coord._replace(xSect=label)
302            coord = coord._replace(**{a: None for a in NoneAttrs})
303            self[coord] = entry

Subclass of dict that enforces key conformity to a given namedtuple.

Like a normal dict, it can hold any type of objects. But, since the keys must conform, they effectively follow a coordinate system, so that the dict becomes a vector space. Example:

>>> dct = xpSpace(["x", "y", "z"])
>>> dct[(1, 2, 3)] = "pointA"

The coordinate system is specified by the dims: a list of keys defining the namedtuple of self.Coord. The above dict only has three dims, so this fails:

>>> dct[(1, 2, 3, 4)] = "pointB"  # doctest: +NORMALIZE_WHITESPACE
Traceback (most recent call last):
...
TypeError: The key (1, 2, 3, 4) did not fit the coord.  system
which has dims ('x', 'y', 'z')

Coordinates can contain any value, including None:

>>> dct[(1, 2, None)] = "pointB"

In intended usage, this space is highly sparse, meaning there are many coordinates with no entry. Indeed, as a data format for nd-arrays, it may be called "coordinate list representation", used e.g. by scipy.sparse.coo_matrix.

Thus, operations across (potentially multiple) dims, such as optimization or averaging, should be carried out by iterating -- not over the dims -- but over the the list of items.

The most important method is nest, which is used (by xpSpace.table_tree) to print and plot results. This is essentially a "groupby" operation, and indeed the case could be made that this class should be replaced by pandas.DataFrame, or better yet: https://github.com/pydata/xarray.

The __getitem__ is quite flexible, allowing accessing by:

The actual key, a self.Coord object, or a standard tuple.
Returns single item. Example:
```
>>> dct[1, 2, 3] == dct[(1, 2, 3)] == dct[dct.Coord(1, 2, 3)] == "pointA"
True
```
A slice or list.
Returns list.
PS: indexing by slice or list assumes that the dict is ordered, which we inherit from the builtin dict since Python 3.7. Moreover, it is a reflection of the fact that the internals of this class work by looping over items.

In addition, the subspace method (also aliased to __call__, and is implemented via coords_matching) can be used to select items by the values of a subset of their attributes. It returns a SparseSpace. If there is only a single item it can be accessed as in dct[()].

Inspired by

SparseSpace(dims) View Source

 89    def __init__(self, dims):
 90        """Usually initialized through `xpSpace.from_list`.
 91
 92        Parameters
 93        ----------
 94        dims: list or tuple
 95            The attributes defining the coordinate system.
 96        """
 97        # Define coordinate system
 98        self.Coord = collections.namedtuple('Coord', dims)
 99
100        def repr2(c, keys=False, str_or_repr=repr):
101            if keys:
102                lst = [f"{k}={str_or_repr(v)}" for k, v in c._asdict().items()]
103            else:
104                lst = [str_or_repr(v) for v in c]
105            return "(" + ", ".join(lst) + ")"
106
107        self.Coord.repr2 = repr2

Usually initialized through xpSpace.from_list.

Parameters

dims (list or tuple): The attributes defining the coordinate system.

dims View Source

85    @property
86    def dims(self):
87        return self.Coord._fields

Coord

def update(self, items): View Source

109    def update(self, items):
110        """Update dict, using the custom `__setitem__` to ensure key conformity.
111
112        NB: the `kwargs` syntax is not supported because it only works for keys that
113        consist of (a single) string, which is not very interesting for SparseSpace.
114        """
115        # See https://stackoverflow.com/a/2588648
116        # and https://stackoverflow.com/a/2390997
117        try:
118            items = items.items()
119        except AttributeError:
120            pass
121        for k, v in items:
122            self[k] = v

Update dict, using the custom __setitem__ to ensure key conformity.

NB: the kwargs syntax is not supported because it only works for keys that consist of (a single) string, which is not very interesting for SparseSpace.

def subspace(self, **kwargs): View Source

157    def subspace(self, **kwargs):
158        """Get an affine subspace.
159
160        NB: If you're calling this repeatedly (for all values of the same `kwargs`)
161        then you should consider using `SparseSpace.nest` instead.
162
163        Example:
164        >>> xp_dict.subspace(da_method="EnKF", infl=1, seed=3) # doctest: +SKIP
165        """
166        # Slow version
167        # outer = self.nest(outer_dims=list(kwargs))  # make subspaceS
168        # inner = outer[outer.Coord(**kwargs)]        # discard all but 1
169
170        coords = self.coords_matching(**kwargs)
171        inner = self.__class__(complement(self.dims, kwargs))
172        for coord in coords:
173            inner[inner.coord_from_attrs(coord)] = self[coord]
174
175        return inner

Get an affine subspace.

NB: If you're calling this repeatedly (for all values of the same kwargs) then you should consider using SparseSpace.nest instead.

Example:

>>> xp_dict.subspace(da_method="EnKF", infl=1, seed=3) # doctest: +SKIP

def coords_matching(self, **kwargs): View Source

177    def coords_matching(self, **kwargs):
178        """Get all `coord`s matching kwargs.
179
180        Used by `SparseSpace.label_xSection` and `SparseSpace.subspace`. Unlike the
181        latter, this function returns a *list* of *keys* of the *original subspace*.
182
183        Note that the `missingval` shenanigans of `xpList.inds` are here unnecessary
184        since each coordinate is complete.
185        """
186        def match(coord):
187            return all(getattr(coord, k) == kwargs[k] for k in kwargs)
188
189        return [c for c in self if match(c)]

Get all coords matching kwargs.

Used by SparseSpace.label_xSection and SparseSpace.subspace. Unlike the latter, this function returns a list of keys of the original subspace.

Note that the missingval shenanigans of xpList.inds are here unnecessary since each coordinate is complete.

def coord_from_attrs(self, obj): View Source

191    def coord_from_attrs(self, obj):
192        """Form a `coord` for this `xpSpace` by extracting attrs. from `obj`.
193
194        For instances of `self.Coord`, this is the identity opeartor, i.e.
195
196            self.coord_from_attrs(coord) == coord
197        """
198        coord = (getattr(obj, a, None) for a in self.dims)
199        return self.Coord(*coord)

Form a coord for this xpSpace by extracting attrs. from obj.

For instances of self.Coord, this is the identity opeartor, i.e.

self.coord_from_attrs(coord) == coord

def nest(self, inner_dims=None, outer_dims=None): View Source

219    def nest(self, inner_dims=None, outer_dims=None):
220        """Project along `inner_acces` to yield a new `xpSpace` with dims `outer_dims`
221
222        The entries of this `xpSpace` are themselves `xpSpace`s, with dims `inner_dims`,
223        each one regrouping the entries with the same (projected) coordinate.
224
225        Note: this method could also be called `groupby`.
226        Note: this method is also called by `__getitem__(key)` if `key` is dict.
227        """
228        # Default: a singleton outer space,
229        # with everything contained in the inner (projection) space.
230        if inner_dims is None and outer_dims is None:
231            outer_dims = ()
232
233        # Validate dims
234        if inner_dims is None:
235            assert outer_dims is not None
236            inner_dims = complement(self.dims, outer_dims)
237        else:
238            assert outer_dims is None
239            outer_dims = complement(self.dims, inner_dims)
240
241        # Fill spaces
242        outer_space = self.__class__(outer_dims)
243        for coord, entry in self.items():
244            # Lookup subspace coord
245            outer_coord = outer_space.coord_from_attrs(coord)
246            try:
247                # Get subspace
248                inner_space = outer_space[outer_coord]
249            except KeyError:
250                # Create subspace, embed
251                inner_space = self.__class__(inner_dims)
252                outer_space[outer_coord] = inner_space
253            # Add entry to subspace, similar to .fill()
254            inner_space[inner_space.coord_from_attrs(coord)] = entry
255
256        return outer_space

Project along inner_acces to yield a new xpSpace with dims outer_dims

The entries of this xpSpace are themselves xpSpaces, with dims inner_dims, each one regrouping the entries with the same (projected) coordinate.

Note: this method could also be called groupby. Note: this method is also called by __getitem__(key) if key is dict.

def intersect_dims(self, attrs): View Source

258    def intersect_dims(self, attrs):
259        """Rm those `a` in `attrs` that are not in `self.dims`.
260
261        This enables sloppy `dims` allotment, for ease-of-use.
262        """
263        absent = complement(attrs, self.dims)
264        if absent:
265            print(color_text("Warning:", colorama.Fore.RED),
266                  "The requested attributes",
267                  color_text(str(absent), colorama.Fore.RED),
268                  ("were not found among the xpSpace dims"
269                   " (attrs. used as coordinates for the set of experiments)."
270                   " This may be no prob. if the attrs are redundant for the coord-sys."
271                   " However, if due to confusion or mis-spelling, then it is likely"
272                   " to cause mis-interpretation of the shown results."))
273            attrs = complement(attrs, absent)
274        return attrs

Rm those a in attrs that are not in self.dims.

This enables sloppy dims allotment, for ease-of-use.

def append_dim(self, dim): View Source

276    def append_dim(self, dim):
277        """Expand `self.Coord` by `dim`. For each item, insert `None` in new dim."""
278        self.__init__(self.dims+(dim,))
279        for coord in list(self):
280            entry = self.pop(coord)
281            self[coord + (None,)] = entry

Expand self.Coord by dim. For each item, insert None in new dim.

def label_xSection(self, label, *NoneAttrs, **sub_coord): View Source

283    def label_xSection(self, label, *NoneAttrs, **sub_coord):
284        """Insert duplicate entries for the given cross-section.
285
286        Works by adding the attr. `xSection` to the dims of `SparseSpace`,
287        and setting it to `label` for entries matching `sub_coord`,
288        reflecting the "constance/constraint/fixation" this represents.
289        This distinguishes the entries in this fixed-affine subspace,
290        preventing them from being gobbled up by the operations of `nest`.
291
292        If you wish, you can specify the `NoneAttrs`,
293        which are consequently set to None for the duplicated entries,
294        preventing them from being shown in plot labels and tuning panels.
295        """
296        if "xSect" not in self.dims:
297            self.append_dim('xSect')
298
299        for coord in self.coords_matching(**self.intersect_dims(sub_coord)):
300            entry = copy.deepcopy(self[coord])
301            coord = coord._replace(xSect=label)
302            coord = coord._replace(**{a: None for a in NoneAttrs})
303            self[coord] = entry

Insert duplicate entries for the given cross-section.

Works by adding the attr. xSection to the dims of SparseSpace, and setting it to label for entries matching sub_coord, reflecting the "constance/constraint/fixation" this represents. This distinguishes the entries in this fixed-affine subspace, preventing them from being gobbled up by the operations of nest.

If you wish, you can specify the NoneAttrs, which are consequently set to None for the duplicated entries, preventing them from being shown in plot labels and tuning panels.

Inherited Members

builtins.dict: get; setdefault; pop; popitem; keys; items; values; fromkeys; clear; copy

DIM_ROLES = {'outer': None, 'inner': None, 'mean': None, 'optim': None}