From a00274e89332c5bb4a2ea14b928af8055ae6f044 Mon Sep 17 00:00:00 2001 From: Peter J Gleckler Date: Tue, 1 Jul 2025 09:40:36 -0700 Subject: [PATCH 1/3] last updates --- DataPreparationExamples/MACA3-0/MACA3-0_CMIP6_runCMOR.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DataPreparationExamples/MACA3-0/MACA3-0_CMIP6_runCMOR.py b/DataPreparationExamples/MACA3-0/MACA3-0_CMIP6_runCMOR.py index 68a96f5..8b7b0f0 100644 --- a/DataPreparationExamples/MACA3-0/MACA3-0_CMIP6_runCMOR.py +++ b/DataPreparationExamples/MACA3-0/MACA3-0_CMIP6_runCMOR.py @@ -70,7 +70,7 @@ for yr in yrs_all: start_time = datetime.now() - f = fd.sel(time=slice(yr[0]+ '-01-01',yr[1]+ '-01-01')) + f = fd.sel(time=slice(yr[0]+ '-01-01',yr[1]+ '-12-31')) d = f[inputVarName] lat = f.lat.values lon = f.lon.values From 4afc863562f629a9ecf2231c8cb0cefce2135b64 Mon Sep 17 00:00:00 2001 From: Peter J Gleckler Date: Mon, 18 Aug 2025 17:54:31 -0700 Subject: [PATCH 2/3] some time ago --- src/createCVs.ipynb | 4 ++-- src/pullTable.ipynb | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/createCVs.ipynb b/src/createCVs.ipynb index 79f1727..4114e0f 100644 --- a/src/createCVs.ipynb +++ b/src/createCVs.ipynb @@ -1102,7 +1102,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "NERSC Python", "language": "python", "name": "python3" }, @@ -1116,7 +1116,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.3" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/src/pullTable.ipynb b/src/pullTable.ipynb index 1e59aee..9764858 100644 --- a/src/pullTable.ipynb +++ b/src/pullTable.ipynb @@ -387,7 +387,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "NERSC Python", "language": "python", "name": "python3" }, @@ -401,7 +401,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.3" + "version": "3.11.7" } }, "nbformat": 4, From 1f57d80d602ab6e6a117fbbb164c37b783a04a9a Mon Sep 17 00:00:00 2001 From: Peter J Gleckler Date: Tue, 20 Jan 2026 09:47:53 -0800 Subject: [PATCH 3/3] re introduced src notebooks --- src/createCVs.ipynb | 1229 +++++++++++++++++++++++++++++++++++++++++++ src/pullTable.ipynb | 428 +++++++++++++++ 2 files changed, 1657 insertions(+) create mode 100644 src/createCVs.ipynb create mode 100644 src/pullTable.ipynb diff --git a/src/createCVs.ipynb b/src/createCVs.ipynb new file mode 100644 index 0000000..1c5d332 --- /dev/null +++ b/src/createCVs.ipynb @@ -0,0 +1,1229 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3f1a7b22-8294-4b5d-846e-5a40bd59fd42", + "metadata": {}, + "source": [ + "# Pull CV entries files; strip out extraneous entries\n", + "
\n", + "

\n", + " \"Program \n", + " \"Lawrence \n", + " \"United\n", + "

\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "a6bc0f03-26e7-4fbe-adc7-85a213e0eed7", + "metadata": {}, + "source": [ + "**Summary:**\n", + "\n", + "This file pulls a CMIP6Plus_CVs/CMOR3.9.0-era CV files, strips out\n", + "extraneous entries and saves the files for local use\n", + "\n", + "**Authors:**\n", + "\n", + "Paul J. Durack ([durack1](https://github.com/durack1); [PCMDI](https://pcmdi.llnl.gov/), [Lawrence Livermore National Laboratory](https://www.llnl.gov/))\n", + "\n", + "**Notes:**\n", + "\n", + "PJD 15 Feb 2025 - initiated
\n", + "PJD 15 Feb 2025 - first prototype functioning
\n", + "PJD 16 Feb 2025 - add source_id contact and other info; Add DRS
\n", + "PJD 16 Feb 2025 - reconstructed DRS and filename templates to order relevant information
\n", + "PJD 18 Feb 2025 - updated to remove variant_info and variant_label from required_global_attributes
\n", + "PJD 18 Feb 2025 - updated to remove variant_label from DRS
\n", + "PJD 20 Feb 2025 - updated tracking_id -> tracking_id_prefix
\n", + "PJD 20 Feb 2025 - update MACA3-0 registration, augment with available variables
\n", + "PJD 24 Feb 2025 - updated source_version_number -> source_version
\n", + "PJD 27 Feb 2025 - update license info
\n", + "PJD 4 Mar 2025 - adding `gr` and `gr1` grid_labels ([#40](https://github.com/PCMDI/DRCDP/issues/40))
\n", + "PJD 28 May 2025 - updating `license_id` and `region` CVs to match CMOR 3.10+ changes ([PCMDI/DRCDP/issues#40](https://github.com/PCMDI/DRCDP/issues/43))
\n", + "PJD 28 May 2025 - updating `license[\"license_template\"]` to match CMOR 3.10+ changes ([PCMDI/DRCDP/issues#40](https://github.com/PCMDI/DRCDP/issues/43))
\n", + "PJD 24 Jun 2025 - update `DRCDP_CV.json:institution_id` following [PCMDI/DRCDP/issues#55](https://github.com/PCMDI/DRCDP/issues/55) and [WCRP-CMIP/WCRP-universe/issues#63](https://github.com/WCRP-CMIP/WCRP-universe/issues/63#issuecomment-3085735081)
\n", + "PJD 2 Sep 2025 - update fix conflict>
\n", + "PJD 2 Sep 2025 - add EPA/EDDE2-0 registration information back in ([PCMDI/DRCDP/issues#50](https://github.com/PCMDI/DRCDP/issues/50))
\n", + "PJD 2 Sep 2025 - add AP1hr table ([PCMDI/DRCDP/issues#50](https://github.com/PCMDI/DRCDP/issues/50))
\n", + "PJD 2 Sep 2025 - augment `activity_id` to list/array type ([PCMDI/DRCDP/pull#57](https://github.com/PCMDI/DRCDP/pull/57/files))
\n", + "\n", + "***TODO:***\n", + "\n", + "**Links:**" + ] + }, + { + "cell_type": "markdown", + "id": "f4c38f9e-a1ff-4c31-b9bb-77656678b3ac", + "metadata": {}, + "source": [ + "### imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1ee04ff8-d8ea-46dc-bbd2-6c4d33e180c4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 38.3 ms, sys: 13.6 ms, total: 51.8 ms\n", + "Wall time: 63.5 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "import datetime\n", + "import json\n", + "import os\n", + "import requests" + ] + }, + { + "cell_type": "markdown", + "id": "c95f906d-ed0f-48bd-9831-3007f6d389a7", + "metadata": {}, + "source": [ + "### set table, coordinate, formula_terms and grids URLs and pull files" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "9c30b7a6-28ab-4c26-bedd-a99cd1cfe201", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 frequency\n", + "1 grid_label\n", + "2 license\n", + "3 nominal_resolution\n", + "4 realm\n", + "5 source_type\n", + "CPU times: user 48.2 ms, sys: 19.9 ms, total: 68.1 ms\n", + "Wall time: 765 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "urls = {\n", + " \"frequency\": \"https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/MIP_frequency.json\",\n", + " \"grid_label\": \"https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/MIP_grid_label.json\",\n", + " \"license\": \"https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/MIP_license.json\",\n", + " \"nominal_resolution\": \"https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/MIP_nominal_resolution.json\",\n", + " \"realm\": \"https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/MIP_realm.json\",\n", + " \"source_type\": \"https://raw.githubusercontent.com/PCMDI/obs4MIPs-cmor-tables/refs/heads/master/obs4MIPs_source_type.json\",\n", + "}\n", + "# create: institution_id, product, required_global_attributes, source_id, table_id, tracking_id\n", + "\n", + "# loop through urls\n", + "keys = urls.keys()\n", + "for count, key in enumerate(keys):\n", + " print(count, key)\n", + " url = urls[key]\n", + " try:\n", + " response = requests.get(url)\n", + " response.raise_for_status() # Raise HTTPError for bad responses (4/5xx)\n", + " vars()[key] = json.loads(response.text)\n", + " except requests.exceptions.RequestException as e:\n", + " print(f\"Request failed: {e}\")\n", + " except json.JSONDecodeError as e:\n", + " print(f\"JSON decode failed: {e}\")\n", + " except Exception as e:\n", + " print(f\"Unexpected error occurred: {e}\")" + ] + }, + { + "cell_type": "markdown", + "id": "c902d430-2c08-493d-8a39-30d4d4efdbb4", + "metadata": {}, + "source": [ + "### create DRS" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f67bbe8f-7ca6-4f2c-82c0-c16812fcf557", + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'DRS': {'directory_path_template': '',\n", + " 'directory_path_example': 'DRCDP/NAM/UCSD-SIO/LOCA2-1/CMIP6/CMIP/historical/ACCESS-CM2/r1i1p1f1/day/tasmax/v20250216',\n", + " 'filename_template': '',\n", + " 'filename_example': 'tasmax_NAM_UCSD-SIO_LOCA2-1_CMIP6_historical_ACCESS-CM2_r1i1p1f1_day_19500102-19500131.nc'}}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DRS = {}\n", + "DRS[\"DRS\"] = {}\n", + "DRS[\"DRS\"][\"directory_path_template\"] = \"\".join(\n", + " [\n", + " \"\",\n", + " \"\",\n", + " \"\",\n", + " ]\n", + ")\n", + "DRS[\"DRS\"][\n", + " \"directory_path_example\"\n", + "] = \"DRCDP/NAM/UCSD-SIO/LOCA2-1/CMIP6/CMIP/historical/ACCESS-CM2/r1i1p1f1/day/tasmax/v20250216\"\n", + "DRS[\"DRS\"][\"filename_template\"] = \"\".join(\n", + " [\n", + " \"\",\n", + " \"\",\n", + " \"\",\n", + " ]\n", + ")\n", + "DRS[\"DRS\"][\n", + " \"filename_example\"\n", + "] = \"tasmax_NAM_UCSD-SIO_LOCA2-1_CMIP6_historical_ACCESS-CM2_r1i1p1f1_day_19500102-19500131.nc\"\n", + "DRS" + ] + }, + { + "cell_type": "markdown", + "id": "facd6113-bd56-470c-86f2-53188951e0d8", + "metadata": {}, + "source": [ + "### process frequency" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0f61fbc5-3297-46d6-836f-d0505b7180ba", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'frequency': {'1hr': 'sampled hourly', 'day': 'daily mean samples'}}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "frequency.pop(\"version_metadata\")\n", + "keyList = list(frequency[\"frequency\"].keys())\n", + "keepKey = [\"1hr\", \"day\"]\n", + "for count, key in enumerate(keyList):\n", + " if key not in keepKey:\n", + " frequency[\"frequency\"].pop(key)\n", + "frequency" + ] + }, + { + "cell_type": "markdown", + "id": "e72bdfef-c2fe-47a5-af47-05b1e123e406", + "metadata": {}, + "source": [ + "### process grid_label" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b7e0bced-bc23-4ac0-99e8-cc266e7364f5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'grid_label': {'gn': 'data reported on the analysis-native grid',\n", + " 'gr': \"regridded data reported on the data provider's preferred target grid\",\n", + " 'gr1': 'regridded data reported on a grid other than the native grid and other than the preferred target grid'}}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "grid_label.pop(\"version_metadata\")\n", + "keyList = list(grid_label[\"grid_label\"].keys())\n", + "keepKey = [\"gn\", \"gr\", \"gr1\"]\n", + "for count, key in enumerate(keyList):\n", + " if key not in keepKey:\n", + " grid_label[\"grid_label\"].pop(key)\n", + "grid_label[\"grid_label\"][\"gn\"] = grid_label[\"grid_label\"][\"gn\"].replace(\n", + " \"a model's \", \"the analysis-\"\n", + ")\n", + "grid_label" + ] + }, + { + "cell_type": "markdown", + "id": "be344080-c193-4d79-a43e-b99dc78deed9", + "metadata": {}, + "source": [ + "### create institution_id" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "897d11cc-426c-4533-a59d-23d2f7ef9146", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'institution_id': {'EPA': {'contact': 'Megan Mallard; Mallard.Megan@epa.gov',\n", + " 'name': 'United States Environmental Protection Agency, National Exposure Research Laboratory (NERL), Systems Exposure Division, 109 T.W. Alexander Drive, Durham, NC 27709, USA',\n", + " 'ROR': '03tns0030',\n", + " 'URL': 'https://www.epa.gov/'},\n", + " 'UCM-ACSL': {'contact': 'John T. Abatzoglou; jabatzoglou@ucmerced.edu',\n", + " 'name': 'Applied Climate Science Laboratory, University of California, Merced, 5200 N. Lake Road, Merced, CA 95343, USA',\n", + " 'ROR': '00d9ah105',\n", + " 'URL': 'https://www.climatologylab.org'},\n", + " 'UCSD-SIO': {'contact': 'Daniel Feldman; DRFeldman@lbl.gov',\n", + " 'name': 'Scripps Institution of Oceanography, University of California, San Diego, 9500 Gilman Drive, La Jolla, CA 92093, USA',\n", + " 'ROR': '04v7hvq31',\n", + " 'URL': 'https://scripps.ucsd.edu'},\n", + " 'TTU': {'contact': 'Katharine Hayhoe; katharine.hayhoe@ttu.edu',\n", + " 'name': 'Texas Tech University, 2520 Broadway Avenue, Lubbock, TX 79409, USA',\n", + " 'ROR': '0405mnx93',\n", + " 'URL': 'https://www.ttu.edu/'}}}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "institution_id = {}\n", + "institution_id[\"institution_id\"] = {}\n", + "institution_id[\"institution_id\"][\"EPA\"] = {}\n", + "institution_id[\"institution_id\"][\"EPA\"][\n", + " \"contact\"\n", + "] = \"Megan Mallard; Mallard.Megan@epa.gov\"\n", + "institution_id[\"institution_id\"][\"EPA\"][\n", + " \"name\"\n", + "] = \"United States Environmental Protection Agency, National Exposure Research Laboratory (NERL), Systems Exposure Division, 109 T.W. Alexander Drive, Durham, NC 27709, USA\"\n", + "institution_id[\"institution_id\"][\"EPA\"][\"ROR\"] = \"03tns0030\"\n", + "institution_id[\"institution_id\"][\"EPA\"][\"URL\"] = \"https://www.epa.gov/\"\n", + "institution_id[\"institution_id\"][\"UCM-ACSL\"] = {}\n", + "institution_id[\"institution_id\"][\"UCM-ACSL\"][\n", + " \"contact\"\n", + "] = \"John T. Abatzoglou; jabatzoglou@ucmerced.edu\"\n", + "institution_id[\"institution_id\"][\"UCM-ACSL\"][\n", + " \"name\"\n", + "] = \"Applied Climate Science Laboratory, University of California, Merced, 5200 N. Lake Road, Merced, CA 95343, USA\"\n", + "institution_id[\"institution_id\"][\"UCM-ACSL\"][\"ROR\"] = \"00d9ah105\"\n", + "institution_id[\"institution_id\"][\"UCM-ACSL\"][\"URL\"] = \"https://www.climatologylab.org\"\n", + "institution_id[\"institution_id\"][\"UCSD-SIO\"] = {}\n", + "institution_id[\"institution_id\"][\"UCSD-SIO\"][\n", + " \"contact\"\n", + "] = \"Daniel Feldman; DRFeldman@lbl.gov\"\n", + "institution_id[\"institution_id\"][\"UCSD-SIO\"][\n", + " \"name\"\n", + "] = \"Scripps Institution of Oceanography, University of California, San Diego, 9500 Gilman Drive, La Jolla, CA 92093, USA\"\n", + "institution_id[\"institution_id\"][\"UCSD-SIO\"][\"ROR\"] = \"04v7hvq31\"\n", + "institution_id[\"institution_id\"][\"UCSD-SIO\"][\"URL\"] = \"https://scripps.ucsd.edu\"\n", + "institution_id[\"institution_id\"][\"TTU\"] = {}\n", + "institution_id[\"institution_id\"][\"TTU\"][\n", + " \"contact\"\n", + "] = \"Katharine Hayhoe; katharine.hayhoe@ttu.edu\"\n", + "institution_id[\"institution_id\"][\"TTU\"][\n", + " \"name\"\n", + "] = \"Texas Tech University, 2520 Broadway Avenue, Lubbock, TX 79409, USA\"\n", + "institution_id[\"institution_id\"][\"TTU\"][\"ROR\"] = \"0405mnx93\"\n", + "institution_id[\"institution_id\"][\"TTU\"][\"URL\"] = \"https://www.ttu.edu/\"\n", + "institution_id" + ] + }, + { + "cell_type": "markdown", + "id": "18a245fc-07d2-4068-9e48-23d811882345", + "metadata": {}, + "source": [ + "### process license" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5a77acbb-0699-417e-88fa-7997aad12716", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'license': {'license_template': '; DRDCP data produced by is licensed under a License (). Consult https://pcmdi.llnl.gov/CMIP6/TermsOfUse for terms of use governing DRDCP output, including citation requirements and proper acknowledgment. The data producers and data providers make no warranty, either express or implied, including, but not limited to, warranties of merchantability and fitness for a particular purpose. All liabilities arising from the supply of the information (including any liability arising in negligence) are excluded to the fullest extent permitted by law.',\n", + " 'license_id': {'CC BY 4.0': {'license_type': 'Creative Commons Attribution 4.0 International',\n", + " 'license_url': 'https://creativecommons.org/licenses/by/4.0/'},\n", + " 'CC0 1.0': {'license_type': 'Creative Commons CC0 1.0 Universal Public Domain Dedication',\n", + " 'license_url': 'https://creativecommons.org/publicdomain/zero/1.0/'}}}}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# cleanup upstream metadata\n", + "license.pop(\"version_metadata\")\n", + "license[\"license\"].pop(\"license\")\n", + "# remap license_template\n", + "license[\"license\"][\"license_template\"] = \" \".join(\n", + " [\n", + " \"; DRDCP data produced by \",\n", + " \"is licensed under a License ().\",\n", + " \"Consult https://pcmdi.llnl.gov/CMIP6/TermsOfUse\",\n", + " \"for terms of use governing DRDCP output, including\",\n", + " \"citation requirements and proper acknowledgment.\",\n", + " \"The data producers and data providers make no\",\n", + " \"warranty, either express or implied, including,\",\n", + " \"but not limited to, warranties of merchantability\",\n", + " \"and fitness for a particular purpose. All\",\n", + " \"liabilities arising from the supply of the\",\n", + " \"information (including any liability arising\",\n", + " \"in negligence) are excluded to the fullest\",\n", + " \"extent permitted by law.\",\n", + " ]\n", + ")\n", + "\n", + "# remap license_options to license_id\n", + "license[\"license\"][\"license_id\"] = {}\n", + "# CC BY 4.0\n", + "license[\"license\"][\"license_id\"][\"CC BY 4.0\"] = {}\n", + "license[\"license\"][\"license_id\"][\"CC BY 4.0\"][\"license_type\"] = license[\"license\"][\n", + " \"license_options\"\n", + "][\"CC BY 4.0\"][\"license_id\"]\n", + "license[\"license\"][\"license_id\"][\"CC BY 4.0\"][\"license_url\"] = license[\"license\"][\n", + " \"license_options\"\n", + "][\"CC BY 4.0\"][\"license_url\"]\n", + "# CC0 1.0\n", + "license[\"license\"][\"license_id\"][\"CC0 1.0\"] = {}\n", + "license[\"license\"][\"license_id\"][\"CC0 1.0\"][\"license_type\"] = license[\"license\"][\n", + " \"license_options\"\n", + "][\"CC0 1.0\"][\"license_id\"]\n", + "license[\"license\"][\"license_id\"][\"CC0 1.0\"][\"license_url\"] = license[\"license\"][\n", + " \"license_options\"\n", + "][\"CC0 1.0\"][\"license_url\"]\n", + "# cleanup\n", + "license[\"license\"].pop(\"license_options\")\n", + "# print license\n", + "license" + ] + }, + { + "cell_type": "markdown", + "id": "4d29d469-258b-4e44-b437-36e9e59f2ac6", + "metadata": {}, + "source": [ + "### process nominal_resolution" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "c4031183-caeb-4ee4-83db-c3a4ee34459b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'nominal_resolution': {'0.5 km': 'Resolution of 0.5 km',\n", + " '10000 km': 'Resolution of 10000 km',\n", + " '1000 km': 'Resolution of 1000 km',\n", + " '100 km': 'Resolution of 100 km',\n", + " '10 km': 'Resolution of 10 km',\n", + " '1 km': 'Resolution of 1 km',\n", + " '1x1 degree': 'Resolution of 1x1 degree',\n", + " '2.5 km': 'Resolution of 2.5 km',\n", + " '2500 km': 'Resolution of 2500 km',\n", + " '250 km': 'Resolution of 250 km',\n", + " '25 km': 'Resolution of 25 km',\n", + " '5000 km': 'Resolution of 5000 km',\n", + " '500 km': 'Resolution of 500 km',\n", + " '50 km': 'Resolution of 50 km',\n", + " '5 km': 'Resolution of 5 km'}}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nominal_resolution.pop(\"version_metadata\")\n", + "nominal_resolution" + ] + }, + { + "cell_type": "markdown", + "id": "b685e7cc-ac8b-4d39-bcaa-55ff2d247147", + "metadata": {}, + "source": [ + "### create product" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f406b6e3-c2ac-4fd9-986a-d7fbc8b16660", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'product': ['downscaled-dynamical', 'downscaled-statistical']}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "product = {}\n", + "product[\"product\"] = {}\n", + "product[\"product\"] = [\"downscaled-dynamical\", \"downscaled-statistical\"]\n", + "product" + ] + }, + { + "cell_type": "markdown", + "id": "816b2549-351b-41a1-9240-761cbcec5ef5", + "metadata": {}, + "source": [ + "### process realm" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "231119ad-b71e-403e-b550-5d71a1f1b521", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'realm': {'atmos': 'Atmosphere', 'land': 'Land Surface and Subsurface'}}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "realm.pop(\"version_metadata\")\n", + "keyList = list(realm[\"realm\"].keys())\n", + "keepKeys = [\"atmos\", \"land\"]\n", + "for count, key in enumerate(keyList):\n", + " if key not in keepKeys:\n", + " realm[\"realm\"].pop(key)\n", + "realm" + ] + }, + { + "cell_type": "markdown", + "id": "092d66d2-ee02-48bc-b076-7884c445ef99", + "metadata": {}, + "source": [ + "### create region" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "e064169a-691c-429f-9bab-8c8aaedd5425", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'region': ['north_america']}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "region = {}\n", + "region[\"region\"] = [\n", + " \"north_america\",\n", + "]\n", + "region" + ] + }, + { + "cell_type": "markdown", + "id": "de394a56-a3a3-4d6d-aadb-a88306de1875", + "metadata": {}, + "source": [ + "### create required_global_attributes" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "aef1a997-5de5-4e79-8c54-4763a6509f83", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'required_global_attributes': ['Conventions',\n", + " 'activity_id',\n", + " 'calendar',\n", + " 'contact',\n", + " 'creation_date',\n", + " 'data_specs_version',\n", + " 'driving_activity_id',\n", + " 'driving_experiment_id',\n", + " 'driving_mip_era',\n", + " 'driving_source_id',\n", + " 'driving_variant_label',\n", + " 'frequency',\n", + " 'grid',\n", + " 'grid_label',\n", + " 'license',\n", + " 'license_id',\n", + " 'license_url',\n", + " 'nominal_resolution',\n", + " 'product',\n", + " 'realm',\n", + " 'region',\n", + " 'region_id',\n", + " 'table_id',\n", + " 'tracking_id',\n", + " 'variable_id']}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "required_global_attributes = {}\n", + "required_global_attributes[\"required_global_attributes\"] = [\n", + " \"Conventions\",\n", + " \"activity_id\",\n", + " \"calendar\",\n", + " \"contact\",\n", + " \"creation_date\",\n", + " \"data_specs_version\",\n", + " \"driving_activity_id\",\n", + " \"driving_experiment_id\",\n", + " \"driving_mip_era\",\n", + " \"driving_source_id\",\n", + " \"driving_variant_label\",\n", + " \"frequency\",\n", + " \"grid\",\n", + " \"grid_label\",\n", + " \"license\",\n", + " \"license_id\",\n", + " \"license_url\",\n", + " \"nominal_resolution\",\n", + " \"product\",\n", + " \"realm\",\n", + " \"region\",\n", + " \"region_id\",\n", + " \"table_id\",\n", + " \"tracking_id\",\n", + " \"variable_id\",\n", + "]\n", + "required_global_attributes" + ] + }, + { + "cell_type": "markdown", + "id": "9d881409-b00a-41e2-8b7b-00963a6163ee", + "metadata": {}, + "source": [ + "### create source_id" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "3252edfb-ebe0-42c4-b27d-c9bc3cbced1f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'source_id': {'EDDE2-0': {'calendar': 'gregorian',\n", + " 'contact': 'Megan Mallard; Mallard.Megan@epa.gov',\n", + " 'further_info_url': '',\n", + " 'grid': '10 x 10 km latitude x longitude',\n", + " 'grid_label': 'gn',\n", + " 'institution_id': 'EPA',\n", + " 'license': 'Creative Commons Attribution 4.0 International',\n", + " 'license_id': 'CC BY 4.0',\n", + " 'license_url': 'https://creativecommons.org/licenses/by/4.0/',\n", + " 'nominal_resolution': '10 km',\n", + " 'product': 'downscaled-statistical',\n", + " 'reference': 'Nolte, Christopher G., Tanya L. Spero, Jared H. Bowden, Marcus C. Sarofim, Jeremy Martinich, Megan S. Mallard (2021) Regional temperature-ozone relationships across the U.S. under multiple climate and emissions scenarios. Journal of the Air & Waste Management Association 74 (10), pp 1251-1264. https://doi.org/10.1080/10962247.2021.1970048',\n", + " 'region': 'north_america',\n", + " 'region_id': 'NAM',\n", + " 'source': 'EDDE 2.0: EPA Dynamically Downscaled Ensemble based on CMIP5',\n", + " 'source_name': 'EDDE',\n", + " 'source_version': '2.0',\n", + " 'title': 'EDDE 2.0 dataset prepared for DRCDP'},\n", + " 'LOCA2-0': {'calendar': 'gregorian',\n", + " 'contact': 'Daniel Feldman; DRFeldman@lbl.gov',\n", + " 'further_info_url': 'https://loca.ucsd.edu/',\n", + " 'grid': '5 x 5 km latitude x longitude',\n", + " 'grid_label': 'gn',\n", + " 'institution_id': 'UCSD-SIO',\n", + " 'license': 'Creative Commons Attribution 4.0 International',\n", + " 'license_id': 'CC BY 4.0',\n", + " 'license_url': 'https://creativecommons.org/licenses/by/4.0/',\n", + " 'nominal_resolution': '5 km',\n", + " 'product': 'downscaled-statistical',\n", + " 'reference': 'Pierce, David W., Daniel R. Cayan, and Bridget L. Thrasher (2014) Statistical downscaling using Localized Constructed Analogs (LOCA). Journal of Hydrometeorology, 15 (6), pp 2558-2585. https://doi.org/10.1175/JHM-D-14-0082.1',\n", + " 'region': 'north_america',\n", + " 'region_id': 'NAM',\n", + " 'source': 'LOCA 2.0: Statistically-downscaled climate model projections based on CMIP6',\n", + " 'source_name': 'LOCA',\n", + " 'source_version': '2.0',\n", + " 'title': 'LOCA 2.0 dataset prepared for DRCDP'},\n", + " 'LOCA2-1': {'calendar': 'gregorian',\n", + " 'contact': 'Daniel Feldman; DRFeldman@lbl.gov',\n", + " 'further_info_url': 'https://loca.ucsd.edu/',\n", + " 'grid': '5 x 5 km latitude x longitude',\n", + " 'grid_label': 'gn',\n", + " 'institution_id': 'UCSD-SIO',\n", + " 'license': 'Creative Commons Attribution 4.0 International',\n", + " 'license_id': 'CC BY 4.0',\n", + " 'license_url': 'https://creativecommons.org/licenses/by/4.0/',\n", + " 'nominal_resolution': '5 km',\n", + " 'product': 'downscaled-statistical',\n", + " 'reference': 'Pierce, David W., Daniel R. Cayan, and Bridget L. Thrasher (2014) Statistical downscaling using Localized Constructed Analogs (LOCA). Journal of Hydrometeorology, 15 (6), pp 2558-2585. https://doi.org/10.1175/JHM-D-14-0082.1',\n", + " 'region': 'north_america',\n", + " 'region_id': 'NAM',\n", + " 'source': 'LOCA 2.1: Statistically-downscaled climate model projections based on CMIP6',\n", + " 'source_name': 'LOCA',\n", + " 'source_version': '2.1',\n", + " 'title': 'LOCA 2.1 dataset prepared for DRCDP'},\n", + " 'MACA3-0': {'calendar': 'gregorian',\n", + " 'contact': 'John T. Abatzoglou; jabatzoglou@ucmerced.edu',\n", + " 'further_info_url': 'https://www.climatologylab.org/maca.html',\n", + " 'grid': '10 x 10 km latitude x longitude',\n", + " 'grid_label': 'gn',\n", + " 'institution_id': 'UCM-ACSL',\n", + " 'license': 'Creative Commons CC0 1.0 Universal Public Domain Dedication',\n", + " 'license_id': 'CC0 1.0',\n", + " 'license_url': 'https://creativecommons.org/publicdomain/zero/1.0/',\n", + " 'nominal_resolution': '10 km',\n", + " 'product': 'downscaled-statistical',\n", + " 'reference': 'Abatzoglou, John T., and Timothy J. Brown (2012) A comparison of statistical downscaling methods suited for wildfire applications. International Journal of Climatology, 32 (5), pp 772-780. https://doi.org/10.1002/joc.2312',\n", + " 'region': 'north_america',\n", + " 'region_id': 'NAM',\n", + " 'source': 'MACA 3.0: Statistically-downscaled climate model projections based on CMIP6',\n", + " 'source_name': 'MACA',\n", + " 'source_version': '3.0',\n", + " 'title': 'MACA 3.0 dataset prepared for DRCDP'},\n", + " 'STAR-ESDM1-0': {'calendar': '365_day',\n", + " 'contact': 'Katharine Hayhoe; katharine.hayhoe@ttu.edu',\n", + " 'further_info_url': 'https://www.depts.ttu.edu/csc/data/',\n", + " 'grid': '5 x 5 km latitude x longitude',\n", + " 'grid_label': 'gn',\n", + " 'institution_id': 'TTU',\n", + " 'license': 'Creative Commons Attribution 4.0 International',\n", + " 'license_id': 'CC BY 4.0',\n", + " 'license_url': 'https://creativecommons.org/licenses/by/4.0/',\n", + " 'nominal_resolution': '5 km',\n", + " 'product': 'downscaled-statistical',\n", + " 'reference': \"Hayhoe, K., Scott-Fleming, I., Stoner, A., and Wuebbles, D. J. (2024) STAR-ESDM: A generalizable approach to generating high-resolution climate projections through signal decomposition. Earth's Future, 12, e2023EF004107. https://doi.org/10.1029/2023EF004107\",\n", + " 'region': 'north_america',\n", + " 'region_id': 'NAM',\n", + " 'source': 'STAR-ESDM 1.0: Statistically-downscaled climate model projections based on CMIP6',\n", + " 'source_name': 'STAR-ESDM',\n", + " 'source_version': '1.0',\n", + " 'title': 'STAR-ESDM 1.0 dataset prepared for DRCDP'}}}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "source_id = {}\n", + "source_id[\"source_id\"] = {}\n", + "# EDDE2-0\n", + "source_id[\"source_id\"][\"EDDE2-0\"] = {}\n", + "source_id[\"source_id\"][\"EDDE2-0\"][\"calendar\"] = \"gregorian\"\n", + "source_id[\"source_id\"][\"EDDE2-0\"][\"contact\"] = \"Megan Mallard; Mallard.Megan@epa.gov\"\n", + "source_id[\"source_id\"][\"EDDE2-0\"][\"further_info_url\"] = \"\"\n", + "source_id[\"source_id\"][\"EDDE2-0\"][\"grid\"] = \"10 x 10 km latitude x longitude\"\n", + "source_id[\"source_id\"][\"EDDE2-0\"][\"grid_label\"] = \"gn\"\n", + "source_id[\"source_id\"][\"EDDE2-0\"][\"institution_id\"] = \"EPA\"\n", + "source_id[\"source_id\"][\"EDDE2-0\"][\n", + " \"license\"\n", + "] = \"Creative Commons Attribution 4.0 International\"\n", + "source_id[\"source_id\"][\"EDDE2-0\"][\"license_id\"] = \"CC BY 4.0\"\n", + "source_id[\"source_id\"][\"EDDE2-0\"][\n", + " \"license_url\"\n", + "] = \"https://creativecommons.org/licenses/by/4.0/\"\n", + "source_id[\"source_id\"][\"EDDE2-0\"][\"nominal_resolution\"] = \"10 km\"\n", + "source_id[\"source_id\"][\"EDDE2-0\"][\"product\"] = \"downscaled-statistical\"\n", + "source_id[\"source_id\"][\"EDDE2-0\"][\"reference\"] = \" \".join(\n", + " [\n", + " \"Nolte, Christopher G., Tanya L. Spero, Jared H. Bowden,\",\n", + " \"Marcus C. Sarofim, Jeremy Martinich, Megan S. Mallard\",\n", + " \"(2021) Regional temperature-ozone relationships across\",\n", + " \"the U.S. under multiple climate and emissions scenarios.\",\n", + " \"Journal of the Air & Waste Management Association 74 (10),\",\n", + " \"pp 1251-1264. https://doi.org/10.1080/10962247.2021.1970048\",\n", + " ]\n", + ")\n", + "source_id[\"source_id\"][\"EDDE2-0\"][\"region\"] = \"north_america\"\n", + "source_id[\"source_id\"][\"EDDE2-0\"][\"region_id\"] = \"NAM\"\n", + "source_id[\"source_id\"][\"EDDE2-0\"][\n", + " \"source\"\n", + "] = \"EDDE 2.0: EPA Dynamically Downscaled Ensemble based on CMIP5\"\n", + "source_id[\"source_id\"][\"EDDE2-0\"][\"source_name\"] = \"EDDE\"\n", + "source_id[\"source_id\"][\"EDDE2-0\"][\"source_version\"] = \"2.0\"\n", + "source_id[\"source_id\"][\"EDDE2-0\"][\"title\"] = \"EDDE 2.0 dataset prepared for DRCDP\"\n", + "# LOCA2-0\n", + "source_id[\"source_id\"][\"LOCA2-0\"] = {}\n", + "source_id[\"source_id\"][\"LOCA2-0\"][\"calendar\"] = \"gregorian\"\n", + "source_id[\"source_id\"][\"LOCA2-0\"][\"contact\"] = \"Daniel Feldman; DRFeldman@lbl.gov\"\n", + "source_id[\"source_id\"][\"LOCA2-0\"][\"further_info_url\"] = \"https://loca.ucsd.edu/\"\n", + "source_id[\"source_id\"][\"LOCA2-0\"][\"grid\"] = \"5 x 5 km latitude x longitude\"\n", + "source_id[\"source_id\"][\"LOCA2-0\"][\"grid_label\"] = \"gn\"\n", + "source_id[\"source_id\"][\"LOCA2-0\"][\"institution_id\"] = \"UCSD-SIO\"\n", + "source_id[\"source_id\"][\"LOCA2-0\"][\n", + " \"license\"\n", + "] = \"Creative Commons Attribution 4.0 International\"\n", + "source_id[\"source_id\"][\"LOCA2-0\"][\"license_id\"] = \"CC BY 4.0\"\n", + "source_id[\"source_id\"][\"LOCA2-0\"][\n", + " \"license_url\"\n", + "] = \"https://creativecommons.org/licenses/by/4.0/\"\n", + "source_id[\"source_id\"][\"LOCA2-0\"][\"nominal_resolution\"] = \"5 km\"\n", + "source_id[\"source_id\"][\"LOCA2-0\"][\"product\"] = \"downscaled-statistical\"\n", + "source_id[\"source_id\"][\"LOCA2-0\"][\"reference\"] = \" \".join(\n", + " [\n", + " \"Pierce, David W., Daniel R. Cayan, and Bridget L. Thrasher\",\n", + " \"(2014) Statistical downscaling using Localized Constructed\",\n", + " \"Analogs (LOCA). Journal of Hydrometeorology, 15 (6), pp\",\n", + " \"2558-2585. https://doi.org/10.1175/JHM-D-14-0082.1\",\n", + " ]\n", + ")\n", + "source_id[\"source_id\"][\"LOCA2-0\"][\"region\"] = \"north_america\"\n", + "source_id[\"source_id\"][\"LOCA2-0\"][\"region_id\"] = \"NAM\"\n", + "source_id[\"source_id\"][\"LOCA2-0\"][\n", + " \"source\"\n", + "] = \"LOCA 2.0: Statistically-downscaled climate model projections based on CMIP6\"\n", + "source_id[\"source_id\"][\"LOCA2-0\"][\"source_name\"] = \"LOCA\"\n", + "source_id[\"source_id\"][\"LOCA2-0\"][\"source_version\"] = \"2.0\"\n", + "source_id[\"source_id\"][\"LOCA2-0\"][\"title\"] = \"LOCA 2.0 dataset prepared for DRCDP\"\n", + "# LOCA2-1\n", + "source_id[\"source_id\"][\"LOCA2-1\"] = {}\n", + "source_id[\"source_id\"][\"LOCA2-1\"][\"calendar\"] = \"gregorian\"\n", + "source_id[\"source_id\"][\"LOCA2-1\"][\"contact\"] = \"Daniel Feldman; DRFeldman@lbl.gov\"\n", + "source_id[\"source_id\"][\"LOCA2-1\"][\"further_info_url\"] = \"https://loca.ucsd.edu/\"\n", + "source_id[\"source_id\"][\"LOCA2-1\"][\"grid\"] = \"5 x 5 km latitude x longitude\"\n", + "source_id[\"source_id\"][\"LOCA2-1\"][\"grid_label\"] = \"gn\"\n", + "source_id[\"source_id\"][\"LOCA2-1\"][\"institution_id\"] = \"UCSD-SIO\"\n", + "source_id[\"source_id\"][\"LOCA2-1\"][\n", + " \"license\"\n", + "] = \"Creative Commons Attribution 4.0 International\"\n", + "source_id[\"source_id\"][\"LOCA2-1\"][\"license_id\"] = \"CC BY 4.0\"\n", + "source_id[\"source_id\"][\"LOCA2-1\"][\n", + " \"license_url\"\n", + "] = \"https://creativecommons.org/licenses/by/4.0/\"\n", + "source_id[\"source_id\"][\"LOCA2-1\"][\"nominal_resolution\"] = \"5 km\"\n", + "source_id[\"source_id\"][\"LOCA2-1\"][\"product\"] = \"downscaled-statistical\"\n", + "source_id[\"source_id\"][\"LOCA2-1\"][\"reference\"] = \" \".join(\n", + " [\n", + " \"Pierce, David W., Daniel R. Cayan, and Bridget L. Thrasher\",\n", + " \"(2014) Statistical downscaling using Localized Constructed\",\n", + " \"Analogs (LOCA). Journal of Hydrometeorology, 15 (6), pp\",\n", + " \"2558-2585. https://doi.org/10.1175/JHM-D-14-0082.1\",\n", + " ]\n", + ")\n", + "source_id[\"source_id\"][\"LOCA2-1\"][\"region\"] = \"north_america\"\n", + "source_id[\"source_id\"][\"LOCA2-1\"][\"region_id\"] = \"NAM\"\n", + "source_id[\"source_id\"][\"LOCA2-1\"][\n", + " \"source\"\n", + "] = \"LOCA 2.1: Statistically-downscaled climate model projections based on CMIP6\"\n", + "source_id[\"source_id\"][\"LOCA2-1\"][\"source_name\"] = \"LOCA\"\n", + "source_id[\"source_id\"][\"LOCA2-1\"][\"source_version\"] = \"2.1\"\n", + "source_id[\"source_id\"][\"LOCA2-1\"][\"title\"] = \"LOCA 2.1 dataset prepared for DRCDP\"\n", + "# MACA3-0\n", + "source_id[\"source_id\"][\"MACA3-0\"] = {}\n", + "source_id[\"source_id\"][\"MACA3-0\"][\"calendar\"] = \"gregorian\"\n", + "source_id[\"source_id\"][\"MACA3-0\"][\n", + " \"contact\"\n", + "] = \"John T. Abatzoglou; jabatzoglou@ucmerced.edu\"\n", + "source_id[\"source_id\"][\"MACA3-0\"][\n", + " \"further_info_url\"\n", + "] = \"https://www.climatologylab.org/maca.html\"\n", + "source_id[\"source_id\"][\"MACA3-0\"][\"grid\"] = \"10 x 10 km latitude x longitude\"\n", + "source_id[\"source_id\"][\"MACA3-0\"][\"grid_label\"] = \"gn\"\n", + "source_id[\"source_id\"][\"MACA3-0\"][\"institution_id\"] = \"UCM-ACSL\"\n", + "source_id[\"source_id\"][\"MACA3-0\"][\n", + " \"license\"\n", + "] = \"Creative Commons CC0 1.0 Universal Public Domain Dedication\"\n", + "source_id[\"source_id\"][\"MACA3-0\"][\n", + " \"license_id\"\n", + "] = \"CC0 1.0\" # https://www.climatologylab.org/maca.html#References\n", + "source_id[\"source_id\"][\"MACA3-0\"][\n", + " \"license_url\"\n", + "] = \"https://creativecommons.org/publicdomain/zero/1.0/\"\n", + "source_id[\"source_id\"][\"MACA3-0\"][\"nominal_resolution\"] = \"10 km\"\n", + "source_id[\"source_id\"][\"MACA3-0\"][\"product\"] = \"downscaled-statistical\"\n", + "source_id[\"source_id\"][\"MACA3-0\"][\"reference\"] = \" \".join(\n", + " [\n", + " \"Abatzoglou, John T., and Timothy J. Brown\",\n", + " \"(2012) A comparison of statistical downscaling\",\n", + " \"methods suited for wildfire applications.\",\n", + " \"International Journal of Climatology, 32 (5),\",\n", + " \"pp 772-780. https://doi.org/10.1002/joc.2312\",\n", + " ]\n", + ")\n", + "source_id[\"source_id\"][\"MACA3-0\"][\"region\"] = \"north_america\"\n", + "source_id[\"source_id\"][\"MACA3-0\"][\"region_id\"] = \"NAM\"\n", + "source_id[\"source_id\"][\"MACA3-0\"][\n", + " \"source\"\n", + "] = \"MACA 3.0: Statistically-downscaled climate model projections based on CMIP6\"\n", + "source_id[\"source_id\"][\"MACA3-0\"][\"source_name\"] = \"MACA\"\n", + "source_id[\"source_id\"][\"MACA3-0\"][\"source_version\"] = \"3.0\"\n", + "source_id[\"source_id\"][\"MACA3-0\"][\"title\"] = \"MACA 3.0 dataset prepared for DRCDP\"\n", + "# STAR-ESDM1-0\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"] = {}\n", + "\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"][\"calendar\"] = \"365_day\"\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"][\n", + " \"contact\"\n", + "] = \"Katharine Hayhoe; katharine.hayhoe@ttu.edu\"\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"][\n", + " \"further_info_url\"\n", + "] = \"https://www.depts.ttu.edu/csc/data/\"\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"][\"grid\"] = \"5 x 5 km latitude x longitude\"\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"][\"grid_label\"] = \"gn\"\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"][\"institution_id\"] = \"TTU\"\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"][\n", + " \"license\"\n", + "] = \"Creative Commons Attribution 4.0 International\"\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"][\"license_id\"] = \"CC BY 4.0\"\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"][\n", + " \"license_url\"\n", + "] = \"https://creativecommons.org/licenses/by/4.0/\"\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"][\"nominal_resolution\"] = \"5 km\"\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"][\"product\"] = \"downscaled-statistical\"\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"][\"reference\"] = \" \".join(\n", + " [\n", + " \"Hayhoe, K., Scott-Fleming, I., Stoner, A., and\",\n", + " \"Wuebbles, D. J. (2024) STAR-ESDM: A generalizable\",\n", + " \"approach to generating high-resolution climate\",\n", + " \"projections through signal decomposition.\",\n", + " \"Earth's Future, 12, e2023EF004107.\",\n", + " \"https://doi.org/10.1029/2023EF004107\",\n", + " ]\n", + ")\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"][\"region\"] = \"north_america\"\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"][\"region_id\"] = \"NAM\"\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"][\n", + " \"source\"\n", + "] = \"STAR-ESDM 1.0: Statistically-downscaled climate model projections based on CMIP6\"\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"][\"source_name\"] = \"STAR-ESDM\"\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"][\"source_version\"] = \"1.0\"\n", + "source_id[\"source_id\"][\"STAR-ESDM1-0\"][\n", + " \"title\"\n", + "] = \"STAR-ESDM 1.0 dataset prepared for DRCDP\"\n", + "source_id" + ] + }, + { + "cell_type": "markdown", + "id": "c039c9c2-952c-4453-a724-44ffcfc2eac0", + "metadata": {}, + "source": [ + "### create table_id" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "521c4d74-2bd0-403a-a0ff-a6fba23317c4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'table_id': ['AP1hr', 'APday']}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "table_id = {}\n", + "table_id[\"table_id\"] = [\"AP1hr\", \"APday\"]\n", + "table_id" + ] + }, + { + "cell_type": "markdown", + "id": "b53902bf-a9c0-49a7-9251-236de870e855", + "metadata": {}, + "source": [ + "### create tracking_id" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "2d08fa88-60ec-410e-b613-9379cb9e492e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'tracking_id_prefix': ['hdl:21.14100']}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tracking_id = {}\n", + "tracking_id[\"tracking_id_prefix\"] = [\"hdl:21.14100\"]\n", + "tracking_id" + ] + }, + { + "cell_type": "markdown", + "id": "c490eaff-cef4-4d60-91d4-3e2eaf25fca3", + "metadata": {}, + "source": [ + "### write all files out to repo root dir" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "7b260d38-8910-4f81-a728-e4bdd4fee48c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 DRS\n", + "1 frequency\n", + "2 grid_label\n", + "3 institution_id\n", + "4 license\n", + "5 nominal_resolution\n", + "6 product\n", + "7 realm\n", + "8 region\n", + "9 required_global_attributes\n", + "10 source_id\n", + "11 table_id\n", + "12 tracking_id\n" + ] + } + ], + "source": [ + "files = [\n", + " \"DRS\",\n", + " \"frequency\",\n", + " \"grid_label\",\n", + " \"institution_id\",\n", + " \"license\",\n", + " \"nominal_resolution\",\n", + " \"product\",\n", + " \"realm\",\n", + " \"region\",\n", + " \"required_global_attributes\",\n", + " \"source_id\",\n", + " \"table_id\",\n", + " \"tracking_id\",\n", + "]\n", + "for count, name in enumerate(files):\n", + " print(count, name)\n", + " dic = eval(name)\n", + " # write file\n", + " outFile = \"\".join([\"../DRCDP_\", name, \".json\"])\n", + " with open(outFile, \"w\") as f:\n", + " json.dump(\n", + " dic, f, ensure_ascii=True, sort_keys=True, indent=4, separators=(\",\", \":\")\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "124499ca-dee0-4327-b30e-889121ba2e1a", + "metadata": {}, + "source": [ + "### create composite institution_id for DRCDP_CV.json" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "725bda20-55c4-447a-bf28-367b4de29bd3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 18 μs, sys: 1 μs, total: 19 μs\n", + "Wall time: 18.8 μs\n" + ] + }, + { + "data": { + "text/plain": [ + "{'EPA': 'United States Environmental Protection Agency, National Exposure Research Laboratory (NERL), Systems Exposure Division, 109 T.W. Alexander Drive, Durham, NC 27709, USA (ROR: 03tns0030)',\n", + " 'UCM-ACSL': 'Applied Climate Science Laboratory, University of California, Merced, 5200 N. Lake Road, Merced, CA 95343, USA (ROR: 00d9ah105)',\n", + " 'UCSD-SIO': 'Scripps Institution of Oceanography, University of California, San Diego, 9500 Gilman Drive, La Jolla, CA 92093, USA (ROR: 04v7hvq31)',\n", + " 'TTU': 'Texas Tech University, 2520 Broadway Avenue, Lubbock, TX 79409, USA (ROR: 0405mnx93)'}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "keys = institution_id[\"institution_id\"].keys()\n", + "newInstId = {}\n", + "for count, key in enumerate(keys):\n", + " newInstId[key] = {}\n", + " tmpStr = \"\".join([institution_id[\"institution_id\"][key][\"name\"], \" (ROR: \", institution_id[\"institution_id\"][key][\"ROR\"], \")\"])\n", + " newInstId[key] = tmpStr\n", + "newInstId" + ] + }, + { + "cell_type": "markdown", + "id": "c9eaf433-3247-48fd-8b4f-79fb2edabe32", + "metadata": {}, + "source": [ + "### create composite DRCDP_CV.json for CMOR" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "9b07bd0c-1f02-4b55-b5c9-924d69cfd232", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 578 μs, sys: 957 μs, total: 1.54 ms\n", + "Wall time: 855 μs\n" + ] + } + ], + "source": [ + "%%time\n", + "CV = {}\n", + "CV[\"CV\"] = {}\n", + "CV[\"CV\"][\"DRS\"] = DRS[\"DRS\"]\n", + "CV[\"CV\"][\"activity_id\"] = [\"DRCDP\"]\n", + "CV[\"CV\"][\"frequency\"] = frequency[\"frequency\"]\n", + "CV[\"CV\"][\"grid_label\"] = grid_label[\"grid_label\"]\n", + "CV[\"CV\"][\"institution_id\"] = newInstId\n", + "CV[\"CV\"][\"license\"] = license[\"license\"]\n", + "CV[\"CV\"][\"nominal_resolution\"] = nominal_resolution[\"nominal_resolution\"]\n", + "CV[\"CV\"][\"product\"] = product[\"product\"]\n", + "CV[\"CV\"][\"realm\"] = realm[\"realm\"]\n", + "CV[\"CV\"][\"region\"] = region[\"region\"]\n", + "CV[\"CV\"][\"required_global_attributes\"] = required_global_attributes[\n", + " \"required_global_attributes\"\n", + "]\n", + "CV[\"CV\"][\"source_id\"] = source_id[\"source_id\"]\n", + "CV[\"CV\"][\"table_id\"] = table_id[\"table_id\"]\n", + "CV[\"CV\"][\"tracking_id_prefix\"] = tracking_id[\"tracking_id_prefix\"]\n", + "# CV\n", + "# write file\n", + "outFile = \"\".join([\"../Tables/DRCDP_CV.json\"])\n", + "with open(outFile, \"w\") as f:\n", + " json.dump(CV, f, ensure_ascii=True, sort_keys=True, indent=4, separators=(\",\", \":\"))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/pullTable.ipynb b/src/pullTable.ipynb new file mode 100644 index 0000000..89a22f6 --- /dev/null +++ b/src/pullTable.ipynb @@ -0,0 +1,428 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4b55f3d1-4c9b-4f86-8369-e85f4c4a80cc", + "metadata": {}, + "source": [ + "# Pull MIP table files; strip out extraneous variables\n", + "
\n", + "

\n", + " \"Program \n", + " \"Lawrence \n", + " \"United\n", + "

\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "37b3c847-513c-4dc1-8c3e-a543d2e150df", + "metadata": {}, + "source": [ + "**Summary**\n", + "\n", + "This file pulls a CMIP6Plus/CMOR3.9.0-era MIP table files, strips out\n", + "extraneous variables and saves the files for local use\n", + "\n", + "**Authors**\n", + "\n", + "Paul J. Durack ([durack1](https://github.com/durack1); [PCMDI](https://pcmdi.llnl.gov/), [Lawrence Livermore National Laboratory](https://www.llnl.gov/))\n", + "\n", + "**Notes**\n", + "\n", + "PJD 14 Feb 2025 - initiated
\n", + "PJD 14 Feb 2025 - updated with coordinate, formula_terms and grids
\n", + "PJD 15 Feb 2025 - added \"Header\" to DRCDP_grids.json, may need to tweak this as grid_mapping functions are used (Header caused issues with coordinate, formula_terms)
\n", + "PJD 15 Feb 2025 - correct DRCDP_grids, erroneously was copy of coordinate info
\n", + "PJD 20 Feb 2025 - added hursmax, hursmin, rsds, sfcWind, and tdps variables
\n", + "PJD 2 Jun 2025 - updating `DRCDP_grids.json` to match CMIP6 template (missing Header)
\n", + "Note: `DRCDP_coordinate.json`, `DRCDP_formula_terms.json` match CMIP6 examples
\n", + "PJD 2 Sep 2025 - update, add source_id EDDE2-0; AP1hr table
\n", + "TODO:\n", + "\n", + "**Links**" + ] + }, + { + "cell_type": "markdown", + "id": "919a448f-79ac-49fe-ad24-024b5f8ace32", + "metadata": {}, + "source": [ + "### imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d7398b5e-e6a1-4320-92ef-a774fd942b95", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 42.3 ms, sys: 17.3 ms, total: 59.6 ms\n", + "Wall time: 68.7 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "import datetime\n", + "import json\n", + "import os\n", + "import requests" + ] + }, + { + "cell_type": "markdown", + "id": "b61a22fa-0937-44fa-b854-49edb9de05da", + "metadata": {}, + "source": [ + "### set table, coordinate, formula_terms and grids URLs and pull files" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4afa0c75-2a7e-48cb-b357-387ec7401adb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 AP1hr\n", + "1 APday\n", + "2 coordinate\n", + "3 formula_terms\n", + "4 grids\n", + "CPU times: user 13.6 ms, sys: 6.12 ms, total: 19.7 ms\n", + "Wall time: 96.5 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "urls = {\n", + " \"AP1hr\": \"https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/Tables/MIP_AP1hr.json\",\n", + " \"APday\": \"https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/Tables/MIP_APday.json\",\n", + " \"coordinate\": \"https://raw.githubusercontent.com/PCMDI/input4mips-cmor-tables/refs/heads/master/Tables/input4MIPs_coordinate.json\",\n", + " \"formula_terms\": \"https://raw.githubusercontent.com/PCMDI/input4mips-cmor-tables/refs/heads/master/Tables/input4MIPs_formula_terms.json\",\n", + " \"grids\": \"https://raw.githubusercontent.com/PCMDI/cmip6-cmor-tables/refs/heads/main/Tables/CMIP6_grids.json\",\n", + "}\n", + "\n", + "# loop through urls\n", + "keys = urls.keys()\n", + "for count, key in enumerate(keys):\n", + " print(count, key)\n", + " url = urls[key]\n", + " try:\n", + " response = requests.get(url)\n", + " response.raise_for_status() # Raise HTTPError for bad responses (4/5xx)\n", + " vars()[key] = json.loads(response.text)\n", + " except requests.exceptions.RequestException as e:\n", + " print(f\"Request failed: {e}\")\n", + " except json.JSONDecodeError as e:\n", + " print(f\"JSON decode failed: {e}\")\n", + " except Exception as e:\n", + " print(f\"Unexpected error occurred: {e}\")" + ] + }, + { + "cell_type": "markdown", + "id": "061841c6-6b22-4f6d-bf93-79424bb2baca", + "metadata": {}, + "source": [ + "### AP1hr, APday - trim out redundant variables" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "451eace0-e968-4e2e-a016-7933ef273449", + "metadata": {}, + "outputs": [], + "source": [ + "keepKeys = [\"hursmax\", \"hursmin\", \"pr\", \"rsds\", \"sfcWind\", \"tasmax\", \"tasmin\", \"tdps\"]\n", + "keyList = list(AP1hr[\"variable_entry\"].keys())\n", + "for count, key in enumerate(keyList):\n", + " if key not in keepKeys:\n", + " AP1hr[\"variable_entry\"].pop(key)\n", + "keyList = list(APday[\"variable_entry\"].keys())\n", + "for count, key in enumerate(keyList):\n", + " if key not in keepKeys:\n", + " APday[\"variable_entry\"].pop(key)" + ] + }, + { + "cell_type": "markdown", + "id": "d19aff9f-cff5-41de-a149-5f8185485dce", + "metadata": {}, + "source": [ + "### check remaining table entries" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cc757ae6-8cd1-4062-b305-183c6ec33911", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['hursmax', 'hursmin', 'pr', 'rsds', 'sfcWind', 'tasmax', 'tasmin', 'tdps'])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# cleanup comments\n", + "for key in APday[\"variable_entry\"].keys():\n", + " APday[\"variable_entry\"][key][\"comment\"] = APday[\"variable_entry\"][key][\n", + " \"comment\"\n", + " ].replace(\".\", \"\")\n", + " APday[\"variable_entry\"][key][\"comment\"] = APday[\"variable_entry\"][key][\n", + " \"comment\"\n", + " ].replace(\"T> 0\", \"T>0\")\n", + "APday[\"variable_entry\"].keys()" + ] + }, + { + "cell_type": "markdown", + "id": "f374545b-51b6-43da-9a89-b4c4766adad8", + "metadata": {}, + "source": [ + "### update header" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6edc9a93-39b7-4e7d-8c26-566fb0891dfb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Conventions': 'CF-1.7 CMIP-6.5',\n", + " 'approx_interval': 1.0,\n", + " 'checksum': '',\n", + " 'cmor_version': '3.8.0',\n", + " 'data_specs_version': '6.5.0.0',\n", + " 'generic_levels': '',\n", + " 'int_missing_value': '-999',\n", + " 'missing_value': '1e20',\n", + " 'product': 'model-output',\n", + " 'table_date': '2024-02-28',\n", + " 'table_id': 'APday'}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "APday[\"Header\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "21c4f23d-9f86-4180-980e-008db9f898fb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mipTHash: f42386929a0057ed15e66a3bac045b8c00d33c0f\n", + "inp4Hash: e4fa82f330125fbdd78fa8315225829eabffee70\n", + "CPU times: user 16 ms, sys: 5.19 ms, total: 21.2 ms\n", + "Wall time: 471 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "# get current commit hash\n", + "url = \"https://api.github.com/repos/PCMDI/mip-cmor-tables/commits/main\"\n", + "response = requests.get(url)\n", + "js = json.loads(response.text)\n", + "mipTHash = js[\"sha\"]\n", + "url = \"https://api.github.com/repos/PCMDI/input4MIPs-cmor-tables/commits/master\"\n", + "response = requests.get(url)\n", + "js = json.loads(response.text)\n", + "inp4Hash = js[\"sha\"]\n", + "print(\"mipTHash:\", mipTHash)\n", + "print(\"inp4Hash:\", inp4Hash)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b759f7d8-8deb-4069-b385-93e4c61e9610", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Conventions': 'CF-1.7 CMIP-6.5',\n", + " 'approx_interval': 1.0,\n", + " 'cmor_version': '3.9.0',\n", + " 'data_specs_version': '6.5.0.0; mip-cmor-tables; f42386929a0057ed15e66a3bac045b8c00d33c0f',\n", + " 'generic_levels': '',\n", + " 'int_missing_value': '-999',\n", + " 'missing_value': '1e20',\n", + " 'product': 'DRCDP',\n", + " 'table_date': '2025-09-03',\n", + " 'table_id': 'APday'}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "AP1hr[\"Header\"][\"cmor_version\"] = \"3.9.0\"\n", + "AP1hr[\"Header\"][\"data_specs_version\"] = \" \".join(\n", + " [\"6.5.0.0; mip-cmor-tables;\", mipTHash]\n", + ")\n", + "AP1hr[\"Header\"][\"table_date\"] = datetime.datetime.now().strftime(\"%Y-%m-%d\")\n", + "AP1hr[\"Header\"][\"product\"] = \"DRCDP\"\n", + "AP1hr[\"Header\"].pop(\"checksum\") # remove invalid entry, CMOR 3.9.0\n", + "AP1hr[\"Header\"]\n", + "\n", + "APday[\"Header\"][\"cmor_version\"] = \"3.9.0\"\n", + "APday[\"Header\"][\"data_specs_version\"] = \" \".join(\n", + " [\"6.5.0.0; mip-cmor-tables;\", mipTHash]\n", + ")\n", + "APday[\"Header\"][\"table_date\"] = datetime.datetime.now().strftime(\"%Y-%m-%d\")\n", + "APday[\"Header\"][\"product\"] = \"DRCDP\"\n", + "APday[\"Header\"].pop(\"checksum\") # remove invalid entry, CMOR 3.9.0\n", + "APday[\"Header\"]" + ] + }, + { + "cell_type": "markdown", + "id": "93331885-8c27-498f-ace6-e102e85060b6", + "metadata": {}, + "source": [ + "### build Header entry for grids (ignore coordinate, formula_terms)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "8f74c354-4f6f-42ca-9da2-0f82fbeee87d", + "metadata": {}, + "outputs": [], + "source": [ + "Header = {}\n", + "Header[\"Conventions\"] = \"CF-1.7 CMIP-6.5\"\n", + "Header[\"cmor_version\"] = \"3.9.0\"\n", + "# Header[\"data_specs_version\"] = \" \".join([\"6.5.0.0; input4MIPs-cmor-tables;\", inp4Hash])\n", + "Header[\"data_specs_version\"] = (\n", + " \"6.5.0.0; cmip6-cmor-tables; 30dbad3ea0785c760c621b02185025c69df4d314\"\n", + ")\n", + "Header[\"missing_value\"] = \"1e20\"\n", + "Header[\"product\"] = \"DRCDP\"\n", + "Header[\"table_date\"] = datetime.datetime.now().strftime(\"%Y-%m-%d\")\n", + "Header[\"table_id\"] = \"Table grids\"" + ] + }, + { + "cell_type": "markdown", + "id": "6b3727a6-8a0b-410a-afdb-92c810e95c9d", + "metadata": {}, + "source": [ + "### write all files out to Tables subdir" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c9b3f3d2-b64f-47be-90a0-48cb319bdd8d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 AP1hr\n", + "1 APday\n", + "2 coordinate\n", + "3 formula_terms\n", + "4 grids\n" + ] + } + ], + "source": [ + "tableName = \"../Tables/DRCDP_APday.json\"\n", + "files = [\"AP1hr\", \"APday\", \"coordinate\", \"formula_terms\", \"grids\"]\n", + "for count, name in enumerate(files):\n", + " print(count, name)\n", + " dic = eval(name)\n", + " # add Header\n", + " if name not in [\"AP1hr\", \"APday\", \"coordinate\", \"formula_terms\"]:\n", + " # exclude Header rewrite for APday, Header add for coordinate/formula_terms as changes CMOR 3.9.0 behaviours\n", + " dic[\"Header\"] = Header\n", + " # write file\n", + " outFile = \"\".join([\"../Tables/DRCDP_\", name, \".json\"])\n", + " with open(outFile, \"w\") as f:\n", + " json.dump(\n", + " dic, f, ensure_ascii=True, sort_keys=True, indent=4, separators=(\",\", \":\")\n", + " )" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}