{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Data Exploration\n", "\n", "This notebook demonstrates how to load datasets and explore their contents using gcmprocpy's data inspection functions.\n", "\n", "**Note:** This notebook requires TIE-GCM or WACCM-X model output files." ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "import gcmprocpy as gy" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading Datasets\n", "\n", "Use `load_datasets()` to lazily load NetCDF files from a directory. The optional `dataset_filter` parameter filters filenames." ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loaded 5 files\n", " decsol_smin_2.5x0.25_sech_001.nc — model: TIE-GCM, times: 24\n", " decsol_smin_2.5x0.25_sech_002.nc — model: TIE-GCM, times: 24\n", " decsol_smin_2.5x0.25_sech_003.nc — model: TIE-GCM, times: 24\n", " decsol_smin_2.5x0.25_sech_004.nc — model: TIE-GCM, times: 24\n", " decsol_smin_2.5x0.25_sech_005.nc — model: TIE-GCM, times: 24\n" ] } ], "source": [ "directory = '/glade/work/nikhilr/tiegcm3.0/benchmarks/2.5/seasons/decsol_smin/hist'\n", "dataset_filter = 'sech'\n", "\n", "datasets = gy.load_datasets(directory, dataset_filter=dataset_filter)\n", "\n", "print(f'Loaded {len(datasets)} files')\n", "for mds in datasets:\n", " print(f' {mds.filename} — model: {mds.model}, times: {len(mds._time_values)}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Listing Timestamps\n", "\n", "`time_list()` returns all unique timestamps across the loaded datasets." ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total timestamps: 120\n", "First: 2002-12-21T01:00:00.000000000\n", "Last: 2002-12-26T00:00:00.000000000\n" ] } ], "source": [ "times = gy.time_list(datasets)\n", "print(f'Total timestamps: {len(times)}')\n", "print(f'First: {times[0]}')\n", "print(f'Last: {times[-1]}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Listing Variables\n", "\n", "`var_list()` returns all variable names (excluding coordinate variables) in sorted order." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Variables (84): ['HE', 'HMF2', 'Kp', 'NE', 'NMF2', 'NO', 'O1', 'O2', 'OP', 'POTEN', 'TE', 'TEC', 'TI', 'TLBC', 'TLBC_NM', 'TN', 'UI_ExB', 'ULBC', 'ULBC_NM', 'UN', 'VI_ExB', 'VLBC', 'VLBC_NM', 'VN', 'WI_ExB', 'WN', 'Z', 'ZG', 'ZMAG', 'al', 'alfac', 'alfad', 'amienh_ncfile', 'amiesh_ncfile', 'bgrddata_ncfile', 'bximf', 'byimf', 'bzimf', 'calendar_advance', 'colfac', 'coupled_mage', 'crit1', 'crit2', 'ctmt_ncfile', 'ctpoten', 'day', 'dtide', 'e1', 'e2', 'ec', 'ed', 'f107a', 'f107d', 'gnsrhs', 'gpi_ncfile', 'grav', 'gswm_mi_di_ncfile', 'gswm_mi_sdi_ncfile', 'gswm_nm_di_ncfile', 'gswm_nm_sdi_ncfile', 'gzigm1', 'gzigm2', 'h1', 'h2', 'hpower', 'imf_ncfile', 'iter', 'joulefac', 'mag', 'mtime', 'ncep_ncfile', 'ntask_mpi', 'p0', 'p0_model', 'saber_ncfile', 'sdtide', 'see_ncfile', 'swden', 'swvel', 'tidi_ncfile', 'timestep', 'ut', 'write_date', 'year']\n" ] } ], "source": [ "variables = gy.var_list(datasets)\n", "print(f'Variables ({len(variables)}): {variables}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Listing Levels, Longitudes, and Latitudes" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Levels (114): [np.float64(-7.0), np.float64(-6.875), np.float64(-6.75), np.float64(-6.625), np.float64(-6.5), np.float64(-6.375), np.float64(-6.25), np.float64(-6.125), np.float64(-6.0), np.float64(-5.875), np.float64(-5.75), np.float64(-5.625), np.float64(-5.5), np.float64(-5.375), np.float64(-5.25), np.float64(-5.125), np.float64(-5.0), np.float64(-4.875), np.float64(-4.75), np.float64(-4.625), np.float64(-4.5), np.float64(-4.375), np.float64(-4.25), np.float64(-4.125), np.float64(-4.0), np.float64(-3.875), np.float64(-3.75), np.float64(-3.625), np.float64(-3.5), np.float64(-3.375), np.float64(-3.25), np.float64(-3.125), np.float64(-3.0), np.float64(-2.875), np.float64(-2.75), np.float64(-2.625), np.float64(-2.5), np.float64(-2.375), np.float64(-2.25), np.float64(-2.125), np.float64(-2.0), np.float64(-1.875), np.float64(-1.75), np.float64(-1.625), np.float64(-1.5), np.float64(-1.375), np.float64(-1.25), np.float64(-1.125), np.float64(-1.0), np.float64(-0.875), np.float64(-0.75), np.float64(-0.625), np.float64(-0.5), np.float64(-0.375), np.float64(-0.25), np.float64(-0.125), np.float64(0.0), np.float64(0.125), np.float64(0.25), np.float64(0.375), np.float64(0.5), np.float64(0.625), np.float64(0.75), np.float64(0.875), np.float64(1.0), np.float64(1.125), np.float64(1.25), np.float64(1.375), np.float64(1.5), np.float64(1.625), np.float64(1.75), np.float64(1.875), np.float64(2.0), np.float64(2.125), np.float64(2.25), np.float64(2.375), np.float64(2.5), np.float64(2.625), np.float64(2.75), np.float64(2.875), np.float64(3.0), np.float64(3.125), np.float64(3.25), np.float64(3.375), np.float64(3.5), np.float64(3.625), np.float64(3.75), np.float64(3.875), np.float64(4.0), np.float64(4.125), np.float64(4.25), np.float64(4.375), np.float64(4.5), np.float64(4.625), np.float64(4.75), np.float64(4.875), np.float64(5.0), np.float64(5.125), np.float64(5.25), np.float64(5.375), np.float64(5.5), np.float64(5.625), np.float64(5.75), np.float64(5.875), np.float64(6.0), np.float64(6.125), np.float64(6.25), np.float64(6.375), np.float64(6.5), np.float64(6.625), np.float64(6.75), np.float64(6.875), np.float64(7.0), np.float64(7.125)]\n", "\n", "Longitudes (144): -180.0 to 177.5\n", "Latitudes (72): -88.75 to 88.75\n" ] } ], "source": [ "levels = gy.level_list(datasets)\n", "print(f'Levels ({len(levels)}): {levels}')\n", "\n", "lons = gy.lon_list(datasets)\n", "print(f'\\nLongitudes ({len(lons)}): {lons[0]} to {lons[-1]}')\n", "\n", "lats = gy.lat_list(datasets)\n", "print(f'Latitudes ({len(lats)}): {lats[0]} to {lats[-1]}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Listing Dimensions" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dimensions: ['dtidedim', 'ilev', 'imlev', 'lat', 'latlon', 'lev', 'lon', 'mlat', 'mlev', 'mlon', 'mtimedim', 'sdtidedim', 'time']\n" ] } ], "source": [ "dims = gy.dim_list(datasets)\n", "print(f'Dimensions: {dims}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Variable Information\n", "\n", "`var_info()` returns attributes and dimensions for a specific variable across all loaded files." ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "decsol_smin_2.5x0.25_sech_001.nc:\n", " units: K\n", " long_name: NEUTRAL TEMPERATURE\n", " dimensions: ('time', 'lev', 'lat', 'lon')\n" ] } ], "source": [ "info = gy.var_info(datasets, 'TN')\n", "for fname, details in info.items():\n", " if details:\n", " print(f'{fname}:')\n", " print(f' units: {details[\"attributes\"][\"units\"]}')\n", " print(f' long_name: {details[\"attributes\"][\"long_name\"]}')\n", " print(f' dimensions: {details[\"dimensions\"]}')\n", " break" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Dimension Information\n", "\n", "`dim_info()` returns the size of a specific dimension." ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "lat: size=72\n", "lon: size=144\n", "lev: size=57\n", "ilev: size=57\n" ] } ], "source": [ "for dim_name in ['lat', 'lon', 'lev', 'ilev']:\n", " info = gy.dim_info(datasets, dim_name)\n", " for fname, details in info.items():\n", " if details:\n", " print(f'{dim_name}: size={details[\"size\"]}')\n", " break" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Time Conversion Utilities\n", "\n", "`get_mtime()` converts a datetime to model time (mtime), and `get_time()` converts mtime back to datetime." ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time: 2002-12-21T01:00:00.000000000\n", "mtime: [355, 1, 0, 0]\n", "Back: 2002-12-21T01:00:00.000000000\n" ] } ], "source": [ "t = times[0]\n", "mtime = gy.get_mtime(datasets[0].ds, t)\n", "print(f'Time: {t}')\n", "print(f'mtime: {mtime}')\n", "\n", "t_back = gy.get_time(datasets, mtime)\n", "print(f'Back: {t_back}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Cleanup" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Datasets closed.\n" ] } ], "source": [ "gy.close_datasets(datasets)\n", "print('Datasets closed.')" ] } ], "metadata": { "kernelspec": { "display_name": "gcmprocpy_test", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.13" } }, "nbformat": 4, "nbformat_minor": 4 }