Source code for gcmprocpy.io

import os
import sys
import inspect
import logging
import matplotlib.pyplot as plt
import xarray as xr
import numpy as np
import netCDF4
from .containers import ModelDataset

logger = logging.getLogger(__name__)




[docs]
def load_datasets(directory,dataset_filter = None):
    """
    Loads netCDF datasets for the plotting routines.

    Args:
        directory (str): The location of the directory where the files are stored or the path to a single file.
        dataset_filter (str, optional): The string to filter the NetCDF files to select from (e.g., 'prim', 'sech'). Defaults to None.

    Returns:
        list[ModelDataset]: A list of ModelDataset objects, each containing an xarray.Dataset, filename, and model type.
    """

    datasets=[]
    if os.path.isdir(directory):
        files = sorted(os.listdir(directory))
        logger.info("Loading datasets globally.")
        for file in files:
            if file.endswith('.nc') and (dataset_filter is None or dataset_filter in file):
                file_path = os.path.join(directory, file)
                ds = xr.open_dataset(file_path, chunks='auto', decode_timedelta=False)
                model = 'WACCM-X' if ds.lev.units == 'hPa' else 'TIE-GCM'
                datasets.append(ModelDataset(ds=ds, filename=file, model=model))
    else:
        file_name = os.path.basename(directory)
        ds = xr.open_dataset(directory, chunks='auto', decode_timedelta=False)
        model = 'WACCM-X' if ds.lev.units == 'hPa' else 'TIE-GCM'
        datasets.append(ModelDataset(ds=ds, filename=file_name, model=model))
    return(datasets)



[docs]
def close_datasets(datasets):
    """
    Closes the xarray datasets.

    Args:
        datasets (list[ModelDataset]): A list of ModelDataset objects.

    Returns:
        None
    """
    for dataset in datasets:
        dataset.ds.close()
    return




[docs]
def save_derived(datasets, variable_names, overwrite=False, verbose=True):
    """Compute derivable field(s) on the full native grid and append them
    in place to each dataset's source NetCDF file, so subsequent loads read
    them directly instead of recomputing.

    Only *derivable* intermediate fields are persisted this way — quantities
    computed on the full grid from other fields (e.g. ``'N2'`` and the
    composition ratios). Slice-based derived *outputs* (emissions, OH bands,
    EP flux) are not handled here.

    The dataset is closed, the variable is appended via netCDF4, and the file
    is reopened, so the in-memory ``datasets`` keep working and the data cache
    is cleared.

    Args:
        datasets (list[ModelDataset]): Loaded datasets (modified in place).
        variable_names (str | list[str]): Field name(s) to compute and write.
        overwrite (bool): NetCDF cannot delete a variable in place, so a field
            already present on disk is skipped with a warning regardless;
            regenerate into a fresh copy to replace it.
        verbose (bool): Log progress.

    Returns:
        list[str]: ``"<path>:<var>"`` entries actually written.

    Raises:
        ValueError: If a name is not present and not derivable, or a dataset
            has no on-disk source path.
        PermissionError: If a source file is read-only or locked.
    """
    from .data_derived import ensure_field
    from .containers import clear_data_cache

    if isinstance(variable_names, str):
        variable_names = [variable_names]

    written = []
    for mds in datasets:
        path = mds.ds.encoding.get('source')
        if not path:
            raise ValueError(
                f"Dataset '{mds.filename}' has no on-disk source path; cannot persist."
            )

        # 1. Compute + realize the full-grid arrays (dataset must be open).
        pending = {}
        for name in variable_names:
            if not ensure_field(mds, name):
                raise ValueError(
                    f"'{name}' is not present in '{mds.filename}' and is not a "
                    f"derivable field for model {mds.model}. Derivable: requires "
                    f"its inputs to be in the file."
                )
            da = mds.ds[name]
            pending[name] = (tuple(da.dims), np.asarray(da.values, dtype='f8'),
                             dict(da.attrs))

        # 2. Close (HDF5 disallows concurrent read+write opens of the same file).
        mds.ds.close()

        # 3. Append the new variables in place.
        try:
            nc = netCDF4.Dataset(path, 'a')
        except (PermissionError, OSError) as exc:
            mds.ds = xr.open_dataset(path, chunks='auto', decode_timedelta=False)
            raise PermissionError(
                f"Cannot append to '{path}' (read-only or locked): {exc}. "
                f"Persist into a writable copy of the file instead."
            )
        try:
            for name, (dims, arr, attrs) in pending.items():
                if name in nc.variables:
                    if verbose:
                        logger.warning(
                            "'%s' already present in %s; skipping (in-place "
                            "overwrite is unsupported).", name, os.path.basename(path))
                    continue
                missing_dims = [d for d in dims if d not in nc.dimensions]
                if missing_dims:
                    logger.warning("Skipping '%s': dimensions %s not in %s.",
                                   name, missing_dims, os.path.basename(path))
                    continue
                v = nc.createVariable(name, 'f8', dims)
                v[:] = arr
                for key, val in attrs.items():
                    try:
                        v.setncattr(key, val)
                    except Exception:  # pragma: no cover - attribute type quirks
                        pass
                written.append(f"{path}:{name}")
                if verbose:
                    logger.info("Wrote derived '%s' into %s.", name,
                                os.path.basename(path))
        finally:
            nc.close()

        # 4. Reopen the augmented file and rebind the in-memory dataset.
        mds.ds = xr.open_dataset(path, chunks='auto', decode_timedelta=False)
        mds._time_values = mds.ds['time'].values
        mds._time_set = set(mds._time_values)

    clear_data_cache()
    return written




[docs]
def save_output(output_directory,filename,output_format,plot_object):
    output_directory = os.path.join(output_directory, 'proc')
    os.makedirs(output_directory, exist_ok=True)
    output = os.path.join(output_directory, f'{filename}.{output_format}')
    plot_object.savefig(output, format=output_format, bbox_inches='tight', pad_inches=0.5)
    logger.info(f"Plot saved as {filename}")



def print_handler(string, verbose):
    """
    Prints a string if verbose is set to True.
    
    Args:
        string (str): The string to print.
        verbose (bool): A boolean to determine if the string should be printed.
    
    Returns:
        None
    """
    if verbose:
        logger.debug(string)
    return