Source code for gcmprocpy.gpigen.core

"""Top-level orchestration: fetch -> process -> Dataset."""

from .dates import coerce_datetime, yesterday
from .processing import (
    build_kp,
    compute_end_trims,
    fill_fobs,
    running_average,
    trim,
)
from .dataset import build_dataset
from .sources import fetch
from datetime import timedelta


[docs] def generate_gpi( start="1960-01-01", end=None, source="json", window=81, centered=True, status="def", cache_dir=None, verbose=False, ): """Generate a GPI ``xarray.Dataset`` for ``[start, end]``. Parameters ---------- start, end : date-like or None Inclusive bounds (``YYYY-MM-DD``, ``YYYYDDD``, ISO, or ``datetime``). ``end=None`` defaults to yesterday. The output begins at ``start`` but, for a centered window, ends ``window // 2`` days before ``end`` (a centered average needs future data that does not yet exist). source : {"json", "textfile"} GFZ JSON API (default) or the locally-parsed 1932-onward text file. window : int Averaging window in days (default 81). centered : bool Centered (default) vs trailing average for ``f107a``. status : str GFZ ``status`` query param for the JSON API (default ``"def"``). cache_dir : str or None Where to drop the downloaded text file (``textfile`` source only). verbose : bool Print progress. Returns ------- xarray.Dataset Use :func:`gpigen.save_gpi` to write it to NetCDF. """ start_dt = coerce_datetime(start) end_dt = coerce_datetime(end, end_of_day=True) if end is not None else yesterday() half = window // 2 start_remove = half if centered else window avg_end_invalid = half if centered else 0 # Fetch extra lead-in days so the average is complete at `start`. fetch_start = start_dt - timedelta(days=start_remove) if verbose: print( f"Fetching {source} data {fetch_start.date()} -> {end_dt.date()} " f"(window={window}, {'centered' if centered else 'trailing'})" ) fobs_data, kp_data = fetch( source, fetch_start, end_dt, status=status, cache_dir=cache_dir, verbose=verbose ) if not fobs_data["Fobs"] or not kp_data["Kp"]: raise ValueError( f"No GPI data available for {start_dt.date()} -> {end_dt.date()} " f"(source={source!r}): the GFZ {source} source returned no records for " f"this range. Check the dates fall within the published archive and are " f"not in the future." ) year_day, f107d, missing_dates = fill_fobs(fobs_data) if verbose and missing_dates: print(f"Filled {len(missing_dates)} missing F10.7 day(s): {missing_dates}") f107a = running_average(f107d, window, centered) kp, unique_dates = build_kp(kp_data) fobs_end, kp_end = compute_end_trims(year_day, unique_dates, avg_end_invalid) year_day = trim(year_day, start_remove, fobs_end) f107d = trim(f107d, start_remove, fobs_end) f107a = trim(f107a, start_remove, fobs_end) kp = trim(kp, start_remove, kp_end) if not (len(year_day) == len(f107d) == len(f107a) == len(kp)): raise RuntimeError( "Array length mismatch after trimming " f"(year_day={len(year_day)}, f107d={len(f107d)}, " f"f107a={len(f107a)}, kp={len(kp)})." ) if len(year_day) == 0: kind = "centered" if centered else "trailing" raise ValueError( f"No GPI output days for {start_dt.date()} -> {end_dt.date()}: the " f"available data is shorter than the {window}-day {kind} averaging " f"window. Request a wider date range or a smaller window." ) ds = build_dataset(year_day, f107d, f107a, kp, window, centered, missing_dates) if verbose: print( f"Built GPI dataset: {ds.attrs['yearday_beg']} -> " f"{ds.attrs['yearday_end']} ({len(year_day)} days)" ) return ds