Source code for gcmprocpy.imfgen.dataset

"""Assemble processed channels into an xarray Dataset and write NetCDF.

The variable set, dimension name (``ndata``) and attributes reproduce the
original ``imf_create.py`` / ``bcwind_imf.py`` output exactly (one extra pair of
convenience attributes -- ``yearday_beg`` / ``yearday_end`` -- is added so the
filename can be derived without re-deriving it from the data).
"""

import os
from datetime import datetime

import numpy as np
import xarray as xr

from .processing import CHANNELS

# data-variable name + mask-variable name for each channel
VAR_NAMES = {"bx": "bx", "by": "by", "bz": "bz", "swden": "swden", "swvel": "swvel"}
MASK_NAMES = {"bx": "bxMask", "by": "byMask", "bz": "bzMask",
              "swden": "denMask", "swvel": "velMask"}
UNITS = {"bx": "nT", "by": "nT", "bz": "nT", "swden": "cm^{-3}", "swvel": "km/s"}
LONG_NAMES = {"bx": "IMF Bx", "by": "IMF By", "bz": "IMF Bz",
              "swden": "solar wind density", "swvel": "solar wind velocity"}
MASK_LONG_NAME = "Quality flag: 0=data derived from linear interpolation."

DEFAULT_PREFIX = {"omni": "imf_OMNI", "bcwind": "imf_bcwind"}

_SOURCE_ATTRS = {
    "omni": {
        "Description": ("10-minute average of OMNI data trailed by 1 minutes. "
                        "Sampled to minute output"),
        "Source": "Hourly OMNI combined 1AU IP Data",
        "url_reference": "https://omniweb.gsfc.nasa.gov/ow_min.html",
    },
    "bcwind": {
        "Description": "BCWIND.h5 to minute output IMF data",
        "Source": "bcwind.h5",
        "url_reference": "https://github.com/AnonNick/IMF",
    },
}


[docs] def build_dataset(processed, dates, timestamps, source="omni", source_path=None): """Build the IMF ``xarray.Dataset``. ``processed`` maps each channel in :data:`CHANNELS` to ``(values, mask)``. ``dates`` is the ``YYYYDDD.frac`` float array; ``timestamps`` the ISO strings. """ dates = np.asarray(dates) ndata = len(dates) data_vars = {} for name in CHANNELS: values, mask = processed[name] data_vars[VAR_NAMES[name]] = ( "ndata", np.asarray(values, dtype=float), {"units": UNITS[name], "long_name": LONG_NAMES[name]}, ) data_vars[MASK_NAMES[name]] = ( "ndata", np.asarray(mask, dtype="int8"), {"units": "boolean", "long_name": MASK_LONG_NAME}, ) data_vars["date"] = ( "ndata", dates, {"long_name": "year-day plus fractional day: yyyyddd.frac"}, ) data_vars["timestamp"] = ( "ndata", np.asarray(timestamps), {"long_name": "Timestamp of the data: YYYY-MM-DDTHH:MM:SS"}, ) ds = xr.Dataset(data_vars, coords={"ndata": np.arange(ndata)}) attrs = dict(_SOURCE_ATTRS[source]) if source == "bcwind" and source_path: attrs["Source"] = str(source_path) attrs["CreationTime"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") attrs["Version"] = "1.0.0" attrs["CreatedBy"] = "nikhilr" attrs["data_source"] = source attrs["yearday_beg"] = int(dates[0]) attrs["yearday_end"] = int(dates[-1]) # url_reference last, matching the originals' ordering attrs["url_reference"] = _SOURCE_ATTRS[source]["url_reference"] ds.attrs.update(attrs) return ds
[docs] def imf_filename(ds, prefix=None): """``<prefix>_<begYYYYDDD>-<endYYYYDDD>.nc`` from the dataset's bounds.""" if prefix is None: prefix = DEFAULT_PREFIX.get(ds.attrs.get("data_source", "omni"), "imf") beg = int(ds.attrs["yearday_beg"]) end = int(ds.attrs["yearday_end"]) return f"{prefix}_{beg}-{end}.nc"
[docs] def save_imf(ds, output_dir=".", prefix=None, path=None): """Write ``ds`` to NetCDF and return the path written. ``path`` overrides the auto-generated ``<prefix>_<beg>-<end>.nc`` name. (For per-year output, generate each year with :func:`imfgen.generate_imf_years` and call this once per dataset -- see ``imfgen --split-years``.) """ if path is None: os.makedirs(output_dir, exist_ok=True) path = os.path.join(output_dir, imf_filename(ds, prefix)) else: parent = os.path.dirname(path) if parent: os.makedirs(parent, exist_ok=True) ds.to_netcdf(path=path) return path