Source code for gcmprocpy.gpigen.dataset

"""Assemble the processed arrays into an xarray Dataset and write NetCDF."""

import os

import numpy as np
import xarray as xr


[docs] def build_dataset(year_day, f107d, f107a, kp, window, centered, missing_dates, model="tiegcm"): """Build the GPI ``xarray.Dataset`` for the target model. ``model="tiegcm"`` (default): the TIE-GCM format -- ``year_day``/``f107d``/ ``f107a`` on ``ndays`` and ``kp`` on ``(ndays, nkp=8)`` -- with the TIEGCM global attributes (unchanged). ``model="waccmx"``: the WACCM-X (CESM) 3-hourly solar-parameters format on a single (unlimited-on-write) ``time`` dimension of length ``ndays*8``: ``date`` (YYYYMMDD), ``datesec`` (0..75600), ``f107`` (= ``f107d`` repeated across the 8 daily slots), ``f107a`` (repeated), ``kp`` (flattened) and ``ap`` (derived from ``Kp`` via the official lookup table). """ year_day = np.array(year_day) kind = "centered" if centered else "trailing" if model == "waccmx": return _build_waccmx(year_day, f107d, f107a, kp, window, kind, missing_dates) avg_label = f"{window}-day {kind} average 10.7 cm solar flux" ds = xr.Dataset( { "year_day": ( ["ndays"], year_day, {"long_name": "4-digit year followed by 3-digit day"}, ), "f107d": ( ["ndays"], np.array(f107d), {"long_name": "daily 10.7 cm solar flux"}, ), "f107a": (["ndays"], np.array(f107a), {"long_name": avg_label}), "kp": (["ndays", "nkp"], np.array(kp), {"long_name": "3-hourly kp index"}), }, coords={"ndays": year_day, "nkp": np.arange(np.array(kp).shape[1])}, ) ds.attrs["title"] = "Geophysical Indices, obtained from gfz-potsdam" ds.attrs["yearday_beg"] = year_day[0] ds.attrs["yearday_end"] = year_day[-1] ds.attrs["ncar_mss_path"] = "/TGCM/data/gpi_1960001-2015365.nc" ds.attrs["data_source_url"] = "https://kp.gfz.de/" ds.attrs["hao_file_write_source"] = "https://github.com/AnonNick/GPI" ds.attrs["info"] = ( "Yearly ascii data files obtained from data_source_url; " "nc file written by hao_file_write_source." ) ds.attrs["averaging_window_days"] = window ds.attrs["averaging_kind"] = kind ds.attrs["F107_missing"] = list(missing_dates) return ds
def _build_waccmx(year_day, f107d, f107a, kp, window, kind, missing_dates): """WACCM-X 3-hourly GPI Dataset (see :func:`build_dataset`).""" from .dates import yearday_to_yyyymmdd from .indices import kp_to_ap kp = np.asarray(kp, dtype=float) # (ndays, nkp) ndays, nkp = kp.shape date = np.repeat([yearday_to_yyyymmdd(d) for d in year_day], nkp).astype("float64") # YYYYMMDD, 8x/day datesec = np.tile(np.arange(nkp) * 10800.0, ndays) # 0, 10800, ... 75600 f107 = np.repeat(np.asarray(f107d, dtype=float), nkp) f107a_flat = np.repeat(np.asarray(f107a, dtype=float), nkp) kp_flat = kp.reshape(-1) ap_flat = kp_to_ap(kp_flat).astype("float64") sfu = "10^-22 W m^-2 Hz^-1" ds = xr.Dataset( { "date": ("time", date, {"long_name": "current date (YYYYMMDD)"}), "datesec": ("time", datesec, {"long_name": "seconds after midnight UT"}), "f107": ("time", f107, {"units": sfu, "long_name": "10.7 cm solar radio flux (F10.7)"}), "f107a": ("time", f107a_flat, {"units": sfu, "long_name": f"{window}-day {kind} mean of 10.7 cm " "solar radio flux (F10.7)"}), "kp": ("time", kp_flat, {"long_name": "3 hr planetary K index"}), "ap": ("time", ap_flat, {"long_name": "3 hr planetary a index from K index lookup table"}), } ) ds.attrs["title"] = ("Geophysical Indices for WACCM-X " "(from GFZ Potsdam via TIE-GCM GPI)") ds.attrs["yearday_beg"] = year_day[0] ds.attrs["yearday_end"] = year_day[-1] ds.attrs["data_source_url"] = "https://kp.gfz.de/" ds.attrs["hao_file_write_source"] = "https://github.com/AnonNick/GPI" ds.attrs["data_summary"] = ( "3-hourly Kp and daily F10.7 from GFZ Potsdam, processed for TIE-GCM and " "converted to the CAM/WACCM/WACCM-X 3-hourly solar-parameters format; ap " "derived from Kp via the official Kp->ap lookup table." ) ds.attrs["averaging_window_days"] = window ds.attrs["averaging_kind"] = kind ds.attrs["F107_missing"] = list(missing_dates) ds.attrs["model"] = "waccmx" return ds
[docs] def gpi_filename(ds, prefix="gpi"): """``<prefix>[_WACCMX]_<begYYYYDDD>-<endYYYYDDD>.nc`` from the dataset's bounds. A ``model='waccmx'`` dataset gains a ``WACCMX`` tag (e.g. ``gpi_WACCMX_...``). """ beg = int(ds.attrs["yearday_beg"]) end = int(ds.attrs["yearday_end"]) tag = "_WACCMX" if ds.attrs.get("model") == "waccmx" else "" return f"{prefix}{tag}_{beg}-{end}.nc"
[docs] def save_gpi(ds, output_dir=".", prefix="gpi", path=None): """Write ``ds`` to NetCDF and return the path written. ``path`` overrides the auto-generated name; otherwise the file is ``<output_dir>/<prefix>_<beg>-<end>.nc``. """ if path is None: os.makedirs(output_dir, exist_ok=True) path = os.path.join(output_dir, gpi_filename(ds, prefix)) else: parent = os.path.dirname(path) if parent: os.makedirs(parent, exist_ok=True) unlimited = ["time"] if ds.attrs.get("model") == "waccmx" else None ds.to_netcdf(path=path, unlimited_dims=unlimited) return path