From de778cc5ff4ba85fc09d7590c7461ca1e19d902c Mon Sep 17 00:00:00 2001 From: Brian Rose Date: Mon, 23 Jun 2025 10:31:20 -0400 Subject: [PATCH 1/6] First commit of JOSE manuscript --- paper.bib | 339 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ paper.md | 101 ++++++++++++++++ 2 files changed, 440 insertions(+) create mode 100644 paper.bib create mode 100644 paper.md diff --git a/paper.bib b/paper.bib new file mode 100644 index 000000000..14ef0b05a --- /dev/null +++ b/paper.bib @@ -0,0 +1,339 @@ +@article{Matplotlib_2007, + author = {Hunter, John D.}, + date-added = {2025-06-23 10:28:09 -0400}, + date-modified = {2025-06-23 10:28:24 -0400}, + doi = {10.1109/MCSE.2007.55}, + journal = {Computing in Science & Engineering}, + keywords = {Graphics;Interpolation;Equations;Graphical user interfaces;Packaging;Image generation;User interfaces;Operating systems;Computer languages;Programming profession;Python;scripting languages;application development;scientific programming}, + number = {3}, + pages = {90-95}, + title = {Matplotlib: A 2D Graphics Environment}, + volume = {9}, + year = {2007}, + bdsk-url-1 = {https://doi.org/10.1109/MCSE.2007.55}} + +@inproceedings{mckinney-proc-scipy-2010, + author = {{W}es {M}c{K}inney}, + booktitle = {{P}roceedings of the 9th {P}ython in {S}cience {C}onference}, + date-added = {2025-06-23 10:25:10 -0400}, + date-modified = {2025-06-23 10:25:10 -0400}, + doi = {10.25080/Majora-92bf1922-00a}, + editor = {{S}t\'efan van der {W}alt and {J}arrod {M}illman}, + pages = {56 - 61}, + title = {{D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython}, + year = {2010}, + bdsk-url-1 = {https://doi.org/10.25080/Majora-92bf1922-00a}} + +@inproceedings{binder-2018, + author = {{P}roject {J}upyter and {M}atthias {B}ussonnier and {J}essica {F}orde and {J}eremy {F}reeman and {B}rian {G}ranger and {T}im {H}ead and {C}hris {H}oldgraf and {K}yle {K}elley and {G}ladys {N}alvarte and {A}ndrew {O}sheroff and {M} {P}acer and {Y}uvi {P}anda and {F}ernando {P}erez and {B}enjamin {R}agan-{K}elley and {C}arol {W}illing}, + booktitle = {{P}roceedings of the 17th {P}ython in {S}cience {C}onference}, + date-added = {2025-06-23 10:23:31 -0400}, + date-modified = {2025-06-23 10:23:59 -0400}, + doi = {10.25080/Majora-4af1f417-011}, + editor = {{F}atih {A}kici and {D}avid {L}ippa and {D}illon {N}iederhut and {M} {P}acer}, + pages = {113 - 120}, + title = {{B}inder 2.0 - {R}eproducible, interactive, sharable environments for science at scale}, + year = {2018}, + bdsk-url-1 = {https://doi.org/10.25080/Majora-4af1f417-011}} + +@inproceedings{boerner_access_2023, + abstract = {As the National Science Foundation evolves its investments in cyberinfrastructure, it has made a significant investment in the ACCESS (Advanced Cyberinfrastructure Coordination Ecosystem: Services \& Support) program instantiating a novel set of services along with a novel governance and management model. Research cyberinfrastructure (CI) is a key catalyst for discovery and innovation and plays a critical role in ensuring U.S. leadership in science and engineering, economic competitiveness, and national security, consistent with NSF's mission. Funding of a set of awards through the ACCESS program has established a suite of CI coordination services targeted at supporting a broad and diverse set of requirements, researchers, and usage modalities spanning all areas of science and engineering research and education complemented by support for the collective and coordinated operation of the overall ACCESS program.}, + address = {New York, NY, USA}, + author = {Boerner, Timothy J. and Deems, Stephen and Furlani, Thomas R. and Knuth, Shelley L. and Towns, John}, + booktitle = {Practice and {Experience} in {Advanced} {Research} {Computing} 2023: {Computing} for the {Common} {Good}}, + date-added = {2025-06-23 10:14:04 -0400}, + date-modified = {2025-06-23 10:17:20 -0400}, + doi = {10.1145/3569951.3597559}, + isbn = {978-1-4503-9985-2}, + month = sep, + pages = {173--176}, + publisher = {Association for Computing Machinery}, + series = {{PEARC} '23}, + shorttitle = {{ACCESS}}, + title = {{ACCESS}: {Advancing} {Innovation}: {NSF}'s {Advanced} {Cyberinfrastructure} {Coordination} {Ecosystem}: {Services} \& {Support}}, + url = {https://dl.acm.org/doi/10.1145/3569951.3597559}, + urldate = {2025-03-08}, + year = {2023}, + bdsk-url-1 = {https://dl.acm.org/doi/10.1145/3569951.3597559}, + bdsk-url-2 = {https://doi.org/10.1145/3569951.3597559}} + +@inproceedings{hancock_jetstream2_2021, + abstract = {Jetstream2 will be a category I production cloud resource that is part of the National Science Foundation's Innovative HPC Program. The project's aim is to accelerate science and engineering by providing ``on-demand'' programmable infrastructure built around a core system at Indiana University and four regional sites. Jetstream2 is an evolution of the Jetstream platform, which functions primarily as an Infrastructure-as-a-Service cloud. The lessons learned in cloud architecture, distributed storage, and container orchestration have inspired changes in both hardware and software for Jetstream2. These lessons have wide implications as institutions converge HPC and cloud technology while building on prior work when deploying their own cloud environments. Jetstream2's next-generation hardware, robust open-source software, and enhanced virtualization will provide a significant platform to further cloud adoption within the US research and education communities.}, + address = {New York, NY, USA}, + author = {Hancock, David Y. and Fischer, Jeremy and Lowe, John Michael and Snapp-Childs, Winona and Pierce, Marlon and Marru, Suresh and Coulter, J. Eric and Vaughn, Matthew and Beck, Brian and Merchant, Nirav and Skidmore, Edwin and Jacobs, Gwen}, + booktitle = {Practice and {Experience} in {Advanced} {Research} {Computing} 2021: {Evolution} {Across} {All} {Dimensions}}, + date-added = {2025-06-23 10:13:55 -0400}, + date-modified = {2025-06-23 10:22:12 -0400}, + doi = {10.1145/3437359.3465565}, + isbn = {978-1-4503-8292-2}, + month = jul, + pages = {1--8}, + publisher = {Association for Computing Machinery}, + series = {{PEARC} '21}, + shorttitle = {Jetstream2}, + title = {Jetstream2: {Accelerating} cloud computing via {Jetstream}}, + url = {https://dl.acm.org/doi/10.1145/3437359.3465565}, + urldate = {2025-03-08}, + year = {2021}, + bdsk-url-1 = {https://dl.acm.org/doi/10.1145/3437359.3465565}, + bdsk-url-2 = {https://doi.org/10.1145/3437359.3465565}} + +@article{cockett_continuous_2024, + abstract = {Science requires new mediums to compose ideas and ways to share research findings iteratively, as early as possible and connected directly to software and data. In this paper we discuss two tools for scientific authoring and publishing, MyST Markdown and Curvenote, and illustrate examples of improving metadata, reimagining the reading experience, including computational content, and transforming publishing practices for individuals and societies through automation and continuous practices.}, + author = {Cockett, Rowan and Purves, Steve and Koch, Franklin and Morrison, Mike}, + date-added = {2025-06-23 10:13:45 -0400}, + date-modified = {2025-06-23 10:17:54 -0400}, + doi = {10.25080/NKVC9349}, + issn = {2575-9752}, + journal = {scipy}, + language = {en}, + month = jun, + pages = {121--136}, + title = {Continuous {Tools} for {Scientific} {Publishing}}, + url = {https://proceedings.scipy.org/articles/NKVC9349}, + urldate = {2025-02-06}, + year = {2024}, + bdsk-url-1 = {https://proceedings.scipy.org/articles/NKVC9349}, + bdsk-url-2 = {https://doi.org/10.25080/NKVC9349}} + +@misc{rowan_cockett_jupyter-bookmystmd_2025, + author = {Rowan Cockett and Franklin Koch and Steve Purves and Angus Hollands and Yuxi Wang and Dylan Grandmont and Chris Holdgraf and Andrea and Jan-Hendrik M{\"u}ller and Spencer Lyon and Cristian Le and Jim Madge and wwx and Sugan Reden and Yuanhao Geng and Ryan Lovett and Mikkel Roald-Arb{\o}l and Matt McKay and Matthew Brett and M Bussonnier and Mridul Seth and Nicolas M. Thi{\'e}ry and Raniere Silva and Sarah Brown and Sinan Bekar and Tavin Cole and Thad Guidry and Toby Driscoll}, + copyright = {MIT License}, + date-added = {2025-06-23 10:13:30 -0400}, + date-modified = {2025-06-23 10:29:42 -0400}, + doi = {10.5281/ZENODO.14805610}, + month = feb, + publisher = {Zenodo}, + shorttitle = {jupyter-book/mystmd}, + title = {mystmd v1.4.0}, + url = {https://zenodo.org/doi/10.5281/zenodo.14805610}, + urldate = {2025-03-08}, + year = {2025}, + bdsk-url-1 = {https://zenodo.org/doi/10.5281/zenodo.14805610}, + bdsk-url-2 = {https://doi.org/10.5281/ZENODO.14805610}} + +@misc{jupyterbook_2020, + abstract = {{\textless}strong{\textgreater}Jupyter Book{\textless}/strong{\textgreater} is an open source project for building beautiful, publication-quality books and documents from computational material. Here are some of the features of Jupyter Book: ✔ Write publication-quality content in Markdown. You can write in either Jupyter Markdown, or an extended flavor of Markdown with publishing features. This includes support for rich syntax such as citations and cross-references, math and equations, and figures. ✔ Write content in Jupyter Notebook. This allows you to include your code and outputs in your book. You can also write notebooks entirely in Markdown that get executed when you build your book. ✔ Execute and cache your book's content. For {\textless}code{\textgreater}.ipynb{\textless}/code{\textgreater} and Markdown notebooks, execute code and insert the latest outputs into your book. In addition, cache and re-use outputs to be used later. ✔ Insert notebook outputs into your content. Generate outputs as you build your documentation, and insert them in-line with your content across pages. ✔ Add interactivity to your book. You can toggle cell visibility, include interactive outputs from Jupyter, and connect with online services like Binder. ✔ Generate a variety of outputs. This includes single- and multi-page websites, as well as PDF outputs. ✔ Build books with a simple command-line interface. You can quickly generate your books with one command, like so: {\textless}code{\textgreater}jupyter-book build mybook/{\textless}/code{\textgreater}}, + author = {{Executable Books Community}}, + copyright = {Open Access}, + date-added = {2025-06-23 10:13:12 -0400}, + date-modified = {2025-06-23 10:21:56 -0400}, + doi = {10.5281/ZENODO.2561065}, + keywords = {data science, jupyter, publishing, scholarship}, + month = feb, + publisher = {Zenodo}, + title = {Jupyter {Book}}, + url = {https://zenodo.org/record/2561065}, + urldate = {2025-03-08}, + year = {2020}, + bdsk-url-1 = {https://zenodo.org/record/2561065}, + bdsk-url-2 = {https://doi.org/10.5281/ZENODO.2561065}} + +@misc{atmoscol2023, + abstract = {This is a stable release}, + author = {Alfonso Ladino and Nicole Rivera and Max Grover}, + copyright = {Creative Commons Attribution 4.0 International}, + date-added = {2025-06-23 10:12:58 -0400}, + date-modified = {2025-06-23 10:24:34 -0400}, + doi = {10.5281/ZENODO.8316796}, + month = oct, + publisher = {Zenodo}, + shorttitle = {aladinor/{Atmoscol2023}}, + title = {aladinor/{Atmoscol2023}: {Steable} version 0.1.0-{Beta}}, + url = {https://zenodo.org/doi/10.5281/zenodo.8316796}, + urldate = {2025-06-09}, + year = {2023}, + bdsk-url-1 = {https://zenodo.org/doi/10.5281/zenodo.8316796}, + bdsk-url-2 = {https://doi.org/10.5281/ZENODO.8316796}} + +@misc{ladino_erad_2024, + abstract = {The course will discuss the principles of open science and provide an overview of the most mature and exciting software packages available for radar data processing (ex. LROSE, Py-ART, pyrad, BAL- TRAD, wradlib) and how they connect with the scientific software stack. The course will be built with Jupyter Notebooks as hands-on approach for interactive user experi- ence. The main course programming language is Python, but also Command Line Tools are used. The course will also highlight the "xradar" package, implementing the newly adopted FM301/CfRadial2 WMO standard, as well as the gpm-api software, which facilitates the download and analysis of TRMM PR and GPM DPR spaceborne radars data. These two tools will be used to showcase how to harness the power of xarray and dask for efficient, distributed radar data processing. The course will cover operational use (e.g. in HPC environments or Cloud Infrastructure) as well as algorithm development, enabling the participants to implement their own algorithms. The course will also show how to create workflows for different aspects of weather radar data processing, using open datasets relevant to the attendees and ERAD 2024}, + author = {Ladino, Alfonso and del Moral M{\'e}ndez, Anna and Javornik, Brenda and Michelson, Daniel and Wolfensberger, Daniel and Ghiggi, Gionata and DeHart, Jen and Figueras i Ventura, Jordi and Giles, Julian and M{\"u}hlbauer, Kai and Grover, Maxwell and Dixon, Mike and Jackson, Robert and Collis, Scott and Cha, Ting-Yu and contributors, ERAD2024 Open Radar Science Shortcourse}, + date-added = {2025-06-23 10:12:48 -0400}, + date-modified = {2025-06-23 10:24:42 -0400}, + doi = {10.5281/zenodo.13694511}, + month = sep, + publisher = {Zenodo}, + title = {{ERAD} 2024 {Open} {Radar} {Science} {Shortcourse}}, + url = {https://zenodo.org/records/13694511}, + urldate = {2025-03-10}, + year = {2024}, + bdsk-url-1 = {https://zenodo.org/records/13694511}, + bdsk-url-2 = {https://doi.org/10.5281/zenodo.13694511}} + +@inproceedings{pimentel_large-scale_2019, + abstract = {Jupyter Notebooks have been widely adopted by many different communities, both in science and industry. They support the creation of literate programming documents that combine code, text, and execution results with visualizations and all sorts of rich media. The self-documenting aspects and the ability to reproduce results have been touted as significant benefits of notebooks. At the same time, there has been growing criticism that the way notebooks are being used leads to unexpected behavior, encourage poor coding practices, and that their results can be hard to reproduce. To understand good and bad practices used in the development of real notebooks, we studied 1.4 million notebooks from GitHub. We present a detailed analysis of their characteristics that impact reproducibility. We also propose a set of best practices that can improve the rate of reproducibility and discuss open challenges that require further research and development.}, + author = {Pimentel, Jo{\~a}o Felipe and Murta, Leonardo and Braganholo, Vanessa and Freire, Juliana}, + booktitle = {2019 {IEEE}/{ACM} 16th {International} {Conference} on {Mining} {Software} {Repositories} ({MSR})}, + date-added = {2025-06-23 10:12:37 -0400}, + date-modified = {2025-06-23 10:12:37 -0400}, + doi = {10.1109/MSR.2019.00077}, + file = {IEEE Xplore Abstract Record:/Users/brianrose/Zotero/storage/MDAB5D8U/8816763.html:text/html}, + keywords = {Best practices, github, jupyter notebook, Media, Programming, Python, reproducibility, Testing, Tools}, + month = may, + note = {ISSN: 2574-3864}, + pages = {507--517}, + title = {A {Large}-{Scale} {Study} {About} {Quality} and {Reproducibility} of {Jupyter} {Notebooks}}, + url = {https://ieeexplore.ieee.org/document/8816763}, + urldate = {2025-03-08}, + year = {2019}, + bdsk-url-1 = {https://ieeexplore.ieee.org/document/8816763}, + bdsk-url-2 = {https://doi.org/10.1109/MSR.2019.00077}} + +@article{wilkinson_fair_2016, + abstract = {There is an urgent need to improve the infrastructure supporting the reuse of scholarly data. A diverse set of stakeholders---representing academia, industry, funding agencies, and scholarly publishers---have come together to design and jointly endorse a concise and measureable set of principles that we refer to as the FAIR Data Principles. The intent is that these may act as a guideline for those wishing to enhance the reusability of their data holdings. Distinct from peer initiatives that focus on the human scholar, the FAIR Principles put specific emphasis on enhancing the ability of machines to automatically find and use the data, in addition to supporting its reuse by individuals. This Comment is the first formal publication of the FAIR Principles, and includes the rationale behind them, and some exemplar implementations in the community.}, + author = {Wilkinson, Mark D. and Dumontier, Michel and Aalbersberg, IJsbrand Jan and Appleton, Gabrielle and Axton, Myles and Baak, Arie and Blomberg, Niklas and Boiten, Jan-Willem and da Silva Santos, Luiz Bonino and Bourne, Philip E. and Bouwman, Jildau and Brookes, Anthony J. and Clark, Tim and Crosas, Merc{\`e} and Dillo, Ingrid and Dumon, Olivier and Edmunds, Scott and Evelo, Chris T. and Finkers, Richard and Gonzalez-Beltran, Alejandra and Gray, Alasdair J. G. and Groth, Paul and Goble, Carole and Grethe, Jeffrey S. and Heringa, Jaap and 't Hoen, Peter A. C. and Hooft, Rob and Kuhn, Tobias and Kok, Ruben and Kok, Joost and Lusher, Scott J. and Martone, Maryann E. and Mons, Albert and Packer, Abel L. and Persson, Bengt and Rocca-Serra, Philippe and Roos, Marco and van Schaik, Rene and Sansone, Susanna-Assunta and Schultes, Erik and Sengstag, Thierry and Slater, Ted and Strawn, George and Swertz, Morris A. and Thompson, Mark and van der Lei, Johan and van Mulligen, Erik and Velterop, Jan and Waagmeester, Andra and Wittenburg, Peter and Wolstencroft, Katherine and Zhao, Jun and Mons, Barend}, + copyright = {2016 The Author(s)}, + date-added = {2025-06-23 10:12:32 -0400}, + date-modified = {2025-06-23 10:12:32 -0400}, + doi = {10.1038/sdata.2016.18}, + file = {Full Text PDF:/Users/brianrose/Zotero/storage/YX8Y3T6Q/Wilkinson et al. - 2016 - The FAIR Guiding Principles for scientific data management and stewardship.pdf:application/pdf}, + issn = {2052-4463}, + journal = {Scientific Data}, + keywords = {Publication characteristics, Research data}, + language = {en}, + month = mar, + note = {Publisher: Nature Publishing Group}, + number = {1}, + pages = {160018}, + title = {The {FAIR} {Guiding} {Principles} for scientific data management and stewardship}, + url = {https://www.nature.com/articles/sdata201618}, + urldate = {2025-02-06}, + volume = {3}, + year = {2016}, + bdsk-url-1 = {https://www.nature.com/articles/sdata201618}, + bdsk-url-2 = {https://doi.org/10.1038/sdata.2016.18}} + +@book{dask_2016, + author = {{Dask Development Team}}, + date-added = {2025-06-23 10:12:06 -0400}, + date-modified = {2025-06-23 10:20:19 -0400}, + title = {Dask: {Library} for dynamic task scheduling}, + url = {https://dask.org}, + year = {2016}, + bdsk-url-1 = {http://dask.pydata.org}} + +@article{hoyer_xarray_2017, + abstract = {xarray is an open source project and Python package that provides a toolkit and data structures for N-dimensional labeled arrays. Our approach combines an application programing interface (API) inspired by pandas with the Common Data Model for self-described scientific data. Key features of the xarray package include label-based indexing and arithmetic, interoperability with the core scientific Python packages (e.g., pandas, NumPy, Matplotlib), out-of-core computation on datasets that don't fit into memory, a wide range of serialization and input/output (I/O) options, and advanced multi-dimensional data manipulation tools such as group-by and resampling. xarray, as a data model and analytics toolkit, has been widely adopted in the geoscience community but is also used more broadly for multi-dimensional data analysis in physics, machine learning and finance.}, + author = {Hoyer, Stephan and Hamman, Joe}, + date-added = {2025-06-23 10:11:48 -0400}, + date-modified = {2025-06-23 10:22:40 -0400}, + doi = {10.5334/jors.148}, + issn = {2049-9647}, + journal = {Journal of Open Research Software}, + language = {en-US}, + month = apr, + number = {1}, + shorttitle = {xarray}, + title = {xarray: {N}-{D} labeled {Arrays} and {Datasets} in {Python}}, + url = {https://openresearchsoftware.metajnl.com/articles/10.5334/jors.148}, + urldate = {2025-03-08}, + volume = {5}, + year = {2017}, + bdsk-url-1 = {https://openresearchsoftware.metajnl.com/articles/10.5334/jors.148}, + bdsk-url-2 = {https://doi.org/10.5334/jors.148}} + +@misc{phil_elson_scitoolscartopy_2024, + author = {Phil Elson and Elliott Sales de Andrade and Greg Lucas and Ryan May and Richard Hattersley and Ed Campbell and Ruth Comer and Andrew Dawson and Bill Little and Stephane Raynaud and scmc72 and Alan D. Snow and lgolston and Byron Blay and Peter Killick and lbdreyer and Patrick Peglar and Nat Wilson and Andrew and Jon Szymaniak and Adrien Berchet and Corinne Bosley and Luke Davis and Filipe and John Krasting and Matthew Bradbury and stephenworsley and Daniel Kirkham}, + copyright = {Creative Commons Attribution 4.0 International}, + date-added = {2025-06-23 10:11:35 -0400}, + date-modified = {2025-06-23 10:30:29 -0400}, + doi = {10.5281/ZENODO.1182735}, + month = oct, + publisher = {Zenodo}, + shorttitle = {{SciTools}/cartopy}, + title = {cartopy v0.24.1}, + url = {https://zenodo.org/doi/10.5281/zenodo.1182735}, + urldate = {2025-03-08}, + year = {2024}, + bdsk-url-1 = {https://zenodo.org/doi/10.5281/zenodo.1182735}, + bdsk-url-2 = {https://doi.org/10.5281/ZENODO.1182735}} + +@misc{pandas, + abstract = {Pandas is a powerful data structures for data analysis, time series, and statistics.}, + annote = {Other +If you use this software, please cite it as below.}, + author = {{Pandas development team}}, + copyright = {BSD 3-Clause "New" or "Revised" License}, + date-added = {2025-06-23 10:11:15 -0400}, + date-modified = {2025-06-23 10:27:09 -0400}, + doi = {10.5281/ZENODO.3509134}, + keywords = {data science, python}, + month = sep, + publisher = {Zenodo}, + shorttitle = {pandas-dev/pandas}, + title = {pandas-dev/pandas: {Pandas}}, + url = {https://zenodo.org/doi/10.5281/zenodo.3509134}, + urldate = {2025-03-08}, + year = {2024}, + bdsk-url-1 = {https://zenodo.org/doi/10.5281/zenodo.3509134}, + bdsk-url-2 = {https://doi.org/10.5281/ZENODO.3509134}} + +@article{harris_array_2020, + abstract = {Array programming provides a powerful, compact and expressive syntax for accessing, manipulating and operating on data in vectors, matrices and higher-dimensional arrays. NumPy is the primary array programming library for the Python language. It has an essential role in research analysis pipelines in fields as diverse as physics, chemistry, astronomy, geoscience, biology, psychology, materials science, engineering, finance and economics. For example, in astronomy, NumPy was an important part of the software stack used in the discovery of gravitational waves1 and in the first imaging of a black hole2. Here we review how a few fundamental array concepts lead to a simple and powerful programming paradigm for organizing, exploring and analysing scientific data. NumPy is the foundation upon which the scientific Python ecosystem is constructed. It is so pervasive that several projects, targeting audiences with specialized needs, have developed their own NumPy-like interfaces and array objects. Owing to its central position in the ecosystem, NumPy increasingly acts as an interoperability layer between such array computation libraries and, together with its application programming interface (API), provides a flexible framework to support the next decade of scientific and industrial analysis.}, + author = {Harris, Charles R. and Millman, K. Jarrod and van der Walt, St{\'e}fan J. and Gommers, Ralf and Virtanen, Pauli and Cournapeau, David and Wieser, Eric and Taylor, Julian and Berg, Sebastian and Smith, Nathaniel J. and Kern, Robert and Picus, Matti and Hoyer, Stephan and van Kerkwijk, Marten H. and Brett, Matthew and Haldane, Allan and del R{\'\i}o, Jaime Fern{\'a}ndez and Wiebe, Mark and Peterson, Pearu and G{\'e}rard-Marchant, Pierre and Sheppard, Kevin and Reddy, Tyler and Weckesser, Warren and Abbasi, Hameer and Gohlke, Christoph and Oliphant, Travis E.}, + copyright = {2020 The Author(s)}, + date-added = {2025-06-23 10:10:22 -0400}, + date-modified = {2025-06-23 10:22:24 -0400}, + doi = {10.1038/s41586-020-2649-2}, + issn = {1476-4687}, + journal = {Nature}, + keywords = {Computational neuroscience, Computational science, Computer science, Software, Solar physics}, + language = {en}, + month = sep, + note = {Publisher: Nature Publishing Group}, + number = {7825}, + pages = {357--362}, + title = {Array programming with {NumPy}}, + url = {https://www.nature.com/articles/s41586-020-2649-2}, + urldate = {2025-03-08}, + volume = {585}, + year = {2020}, + bdsk-url-1 = {https://www.nature.com/articles/s41586-020-2649-2}, + bdsk-url-2 = {https://doi.org/10.1038/s41586-020-2649-2}} + +@article{rose_GEO_OSE_2023, + author = {Rose, Brian E. J. and Clyne, John and May, Ryan and Munroe, James and Snyder, Amelia and Eroglu, Orhan and Tyle, Kevin}, + date-added = {2025-06-23 10:10:11 -0400}, + date-modified = {2025-06-23 10:30:03 -0400}, + doi = {10.5281/zenodo.8184298}, + language = {eng}, + month = jul, + note = {Publisher: Zenodo}, + shorttitle = {Project Pythia GEO OSE proposal}, + title = {Collaborative {Research}: {GEO} {OSE} {TRACK} 2: {Project} {Pythia} and {Pangeo}: {Building} an inclusive geoscience community through accessible, reusable, and reproducible workflows}, + url = {https://zenodo.org/records/8184298}, + urldate = {2025-02-06}, + year = {2023}, + bdsk-url-1 = {https://zenodo.org/records/8184298}, + bdsk-url-2 = {https://doi.org/10.5281/zenodo.8184298}} + +@misc{Foundations, + author = {Rose, Brian E. J. and Kent, Julia and Tyle, Kevin and Clyne, John and Banihirwe, Anderson and Camron, Drew and May, Ryan and Grover, Maxwell and Ford, Robert R. and Paul, Kevin and Morley, James and Eroglu, Orhan and Kailyn, Lily and Zacharias, Anissa}, + date-added = {2025-06-23 10:10:09 -0400}, + date-modified = {2025-06-23 10:30:13 -0400}, + doi = {10.5281/zenodo.14759276}, + file = {Snapshot:/Users/brianrose/Zotero/storage/EC446TLR/14759276.html:text/html}, + month = jan, + publisher = {Zenodo}, + title = {Pythia {Foundations}}, + url = {https://zenodo.org/records/14759276}, + urldate = {2025-02-06}, + year = {2025}, + bdsk-url-1 = {https://zenodo.org/records/14759276}, + bdsk-url-2 = {https://doi.org/10.5281/zenodo.14759276}} + +@article{abernathey_pangeo_2017, + abstract = {The Project Description from the NSF-funded Earthcube project "Pangeo: An Open Source Big Data Climate Science Platform" (NSF award 1740648)}, + author = {Abernathey, Ryan and Paul, Kevin and Hamman, Joe and Rocklin, Matthew and Lepore, Chiara and Tippett, Michael and Henderson, Naomi and Seager, Richard and May, Ryan and Del Vento, Davide}, + date-added = {2025-06-23 10:10:05 -0400}, + date-modified = {2025-06-23 10:16:56 -0400}, + doi = {10.6084/m9.figshare.5361094.v1}, + language = {en}, + month = aug, + title = {Pangeo {NSF} {Earthcube} {Proposal}}, + url = {https://figshare.com/articles/journal_contribution/Pangeo_NSF_Earthcube_Proposal/5361094/1}, + urldate = {2025-03-08}, + year = {2017}, + bdsk-url-1 = {https://figshare.com/articles/journal_contribution/Pangeo_NSF_Earthcube_Proposal/5361094/1}, + bdsk-url-2 = {https://doi.org/10.6084/m9.figshare.5361094.v1}} diff --git a/paper.md b/paper.md new file mode 100644 index 000000000..43f1de73d --- /dev/null +++ b/paper.md @@ -0,0 +1,101 @@ +--- +title: 'Pythia Foundations: A community learning resource for Python-based computing in the geosciences' +tags: + - Python +# - astronomy +# - dynamics +# - galactic dynamics +# - milky way +authors: + - name: Brian E. J. Rose + orcid: 0000-0002-9961-3821 + affiliation: 1 # (Multiple affiliations must be quoted) + - name: Robert R. Ford + orcid: 0000-0001-5483-4965 + affiliation: 1 + - name: Anderson Banihirwe + orcid: 0000-0001-6583-571X + affiliation: 2 + - name: M. Drew Camron + orcid: 0000-0001-7246-6502 + affiliation: 3 + - name: John Clyne + orcid: 0000-0003-2788-9017 + affiliation: 4 + - name: Orhan Eroglu + orcid: 0000-0003-3099-8775 + affiliation: 4 + - name: Katelyn FitzGerald + orcid: 0000-0003-4184-1917 + affiliation: 4 + - name: Maxwell A. Grover + orcid: 0000-0002-0370-8974 + affiliation: 5 + - name: Julia Kent + orcid: 0000-0002-5611-8986 + affiliation: 4 + - name: Ryan May + orcid: 0000-0003-2907-038X + affiliation: 3 + - name: Kevin Paul + orcid: 0000-0001-8155-8038 + affiliation: 6 + - name: Kevin R. Tyle + orcid: 0000-0001-5249-9665 + affiliation: 1 + - name: Anissa Zacharias + orcid: 0000-0002-2666-8493 + affiliation: 4 + - name: Author Without ORCID + affiliation: 2 +affiliations: + - name: Department of Atmospheric and Environmental Sciences, University at Albany (State University of New York) + index: 1 + - name: CarbonPlan + index: 2 + - name: NSF Unidata, University Corporation for Atmospheric Research + index: 3 + - name: Computational Information Systems Lab., NSF National Center for Atmospheric Research + index: 4 + - name: Environmental Science Division, Argonne National Laboratory + index: 5 + - name: NVIDIA Corporation + index: 6 +date: 26 June 2025 +bibliography: paper.bib +--- + +# Summary + +Pythia Foundations [1] is the flagship product of the first phase of Project Pythia [2], a broad community effort to build, house, share, and maintain high-quality learning resources for Python-based computing in the geosciences. Project Pythia’s central mission is to accelerate progress across the geosciences by reducing roadblocks to sharing technical knowledge, particularly related to scalable and reproducible data analysis in the cloud using the open-source Python software ecosystem. + +Pythia Foundations is a geoscience-flavored introduction to essential tools in the scientific Python ecosystem and Pangeo [3] stack (e.g., JupyterLab, NumPy [4], Matplotlib [5], Pandas [6], [7], Cartopy [8], Xarray [9], Dask [10]), plus environment management tools (conda), basics of version control (git), and effective use of GitHub as an technical communication platform (Figure 1). It is a community-owned executable textbook backed by computational resources for automated health-checking and interactive use. It covers the foundational knowledge that is needed to get started with Python in the computational geosciences, as well as to become an effective citizen-practitioner in key open geoscience software ecosystems. It is intended for anyone from undergraduate students through established geoscientists who are relatively new to working in Python. The book assumes a basic knowledge of programming concepts, but a brief "Quickstart" lesson highlights distinctive features of Python for users migrating from other languages. + +A distinguishing feature of Pythia Foundations is its rigorous quality control and maintenance. All Python code and external web links are tested nightly, and book contents are kept up to date as the software ecosystem and data sources evolve. Users can run the examples with a “one click” launch into a dedicated cloud-based Binder service [11]. + +# Statement of Need +Today’s geoscientists require not only domain expertise but also proficiency with specialized software and high-level technical skills to effectively analyze, manipulate, and manage potentially vast volumes of digital data in a complex and ever-changing computing environment. The scientific Python ecosystem and the emergence of cloud computing have been game-changers for many, providing an abundance of open-source tools with wide ranging functionality. Ironically, however, this abundance is often untapped, and can be a source of great frustration. Scientists spend an inordinate amount of time pondering questions such as: Which tool or technology should I use? How do I use it? Can I trust it? Is it compatible with other tools in my workflow? Often, the answers are unclear, due to inadequate documentation or difficulty in finding relevant up-to-date working examples. The result is too much time spent navigating or avoiding technology—time that could have been spent productively doing science. Pythia Foundations fills this need by providing a trusted community-owned, web-accessible, geoscience-specific education and training resource for scientists and students at all career stages who want to know what tools to use and how to use them to explore their data. + +The Foundations book embodies the FAIR principles [12] that play a central role in open science. Findability is served by gathering geoscience-specific tutorials into a high-visibility community archive. Accessibility is served by our automated CI testing and integrated public binder. Tutorials and example code are largely Interoperable due to reliance on a common ecosystem of tools (e.g., NumPy and Xarray). Reusability is addressed through permissive licensing of book content and geoscience relevance of the examples, as well as our commitment to maintaining up-to-date working examples—an essential need in light of the widespread problem of rapid obsolescence of computational notebooks [13]. + +# Content, instructional design, and usage +The scope of Pythia Foundations is limited to tools and packages that are currently in broad use across multiple geoscience disciplines; packages tailored to more narrow scientific domains are not covered in Foundations but may be suitable for a Cookbook. The book outline was designed collaboratively by the core author team, informed by community feedback, and drawing on our substantial collective experience in teaching Python-based scientific workflows in classrooms, workshops, and outreach events. + +The book is organized into two main sections: Foundational skills and Core Scientific Python packages (Figure 1). The foundational skills section covers “getting started” skills such as how to install Python and manage environments and how to run Python code in JupyterLab. There is also a set of tutorials on the use of GitHub and git for version control and collaboration on open source projects. The scope of this section was chosen with the specific goal of enabling users to contribute back to Pythia Foundations. + +A template notebook and contribution guide is provided for new content, encouraging consistency of style and organization. Each chapter includes explicit prerequisites, references, and estimated learning time. The book is intended primarily for self-study and reference, backed by the interactive Binder or deployed on user machines following the detailed guidance in the book. From web-based metrics, Pythia Foundations served roughly 29,000 users in 111 countries during calendar year 2024. + +Subsets of the book contents have been modified and repackaged for various workshops and short courses. A few examples include the 2022 EarthCube-AMGeO Hackathon, the ERAD 2024 Open Radar Science Shortcourse [14], the Climatematch Academy international virtual summer school (annually since 2023), and in Spanish-language translation for a Colombian hydrometeorological workshop in 2023 [15]. Co-authors Rose and Tyle have integrated material from Foundations into the formal curriculum for several semester-length undergraduate and graduate level courses at the University at Albany. + +# Computational infrastructure +The book is deployed as an easy-to-navigate website using JupyterBook [16] and MyST-MD [17], including “one-click” Binder links to interactive versions of every chapter. It features complete reproducibility: source materials are stored in a GitHub repository as unexecuted Jupyter notebooks, and all content is recreated in a bespoke computational environment during nightly builds and whenever the book pages are re-rendered. A full preview of the executed and rendered book is created whenever a change is proposed via a Pull Request. Development of the novel notebook publishing infrastructure enabling this full reproducibility was driven by the Pythia team’s need to collaborate on a large computational document. The build-and-preview automation that our team developed while authoring Foundations is now in wide use by the community of Cookbook creators. The automation notably includes the ability to route notebook execution through the same Binder environment offered to users, guaranteeing that the output of the automated builds are identical to those that users see when running code examples interactively. + +# Future plans +Pythia Foundations is a living document and is receiving continuous updates [18] and improvements, both from the core author team and the broader community of user-contributors. On the content side, Project Pythia is simultaneously fostering a growing collection of more advanced and domain-specific tutorials in our crowd-sourced community Cookbook gallery, with explicit links to prerequisites from Foundations. We anticipate periodic reviews of the Cookbook collection to identify cross-cutting content that should be abstracted back to Foundations, e.g., common data access patterns or analysis workflows. + +The computational and publishing infrastructure for Foundations is also continuously evolving. As of this writing, Foundations and all other Pythia content has just undergone a significant refresh and upgrade with the migration to JupyterBook 2 which is based on the MyST-MD publishing engine [17]. Among the compelling new functionality unlocked by this transition is a rich content cross-referencing and embedding model that will enable more modular reuse and repacking of Foundations content tailored to specific courses or audiences. + +# Acknowledgements +The authors gratefully acknowledge support from the broad open geoscience communities of Project Pythia and Pangeo for their feedback, suggestions, pull requests, and enthusiasm. Development and maintenance of Pythia Foundations was supported by the U.S. National Science Foundation (NSF) awards 2026899, 2026863, 2324302, 2324303 and 2324304. The Pythia BinderHub is deployed on Jetstream2 [19] at Indiana University through allocations EES230007 and SEE240014 from the Advanced Cyberinfrastructure Coordination Ecosystem: Services & Support (ACCESS) program [20], which is supported by NSF grants 2138259, 2138286, 2138307, 2137603, and 2138296. + +# References From d11abe17da93ca703d471307d347907620c284be Mon Sep 17 00:00:00 2001 From: Brian Rose Date: Mon, 23 Jun 2025 10:43:42 -0400 Subject: [PATCH 2/6] Fix citations --- paper.bib | 46 ++++++++++++++++++++++++++++------------------ paper.md | 18 +++++++++--------- 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/paper.bib b/paper.bib index 14ef0b05a..b6d0fbe5c 100644 --- a/paper.bib +++ b/paper.bib @@ -1,3 +1,13 @@ +%% This BibTeX bibliography file was created using BibDesk. +%% https://bibdesk.sourceforge.io/ + +%% Created for Brian Rose at 2025-06-23 10:41:48 -0400 + + +%% Saved with string encoding Unicode (UTF-8) + + + @article{Matplotlib_2007, author = {Hunter, John D.}, date-added = {2025-06-23 10:28:09 -0400}, @@ -12,11 +22,11 @@ @article{Matplotlib_2007 year = {2007}, bdsk-url-1 = {https://doi.org/10.1109/MCSE.2007.55}} -@inproceedings{mckinney-proc-scipy-2010, +@inproceedings{McKinney-2010, author = {{W}es {M}c{K}inney}, booktitle = {{P}roceedings of the 9th {P}ython in {S}cience {C}onference}, date-added = {2025-06-23 10:25:10 -0400}, - date-modified = {2025-06-23 10:25:10 -0400}, + date-modified = {2025-06-23 10:37:21 -0400}, doi = {10.25080/Majora-92bf1922-00a}, editor = {{S}t\'efan van der {W}alt and {J}arrod {M}illman}, pages = {56 - 61}, @@ -25,10 +35,10 @@ @inproceedings{mckinney-proc-scipy-2010 bdsk-url-1 = {https://doi.org/10.25080/Majora-92bf1922-00a}} @inproceedings{binder-2018, - author = {{P}roject {J}upyter and {M}atthias {B}ussonnier and {J}essica {F}orde and {J}eremy {F}reeman and {B}rian {G}ranger and {T}im {H}ead and {C}hris {H}oldgraf and {K}yle {K}elley and {G}ladys {N}alvarte and {A}ndrew {O}sheroff and {M} {P}acer and {Y}uvi {P}anda and {F}ernando {P}erez and {B}enjamin {R}agan-{K}elley and {C}arol {W}illing}, + author = {{Project Jupyter} and {M}atthias {B}ussonnier and {J}essica {F}orde and {J}eremy {F}reeman and {B}rian {G}ranger and {T}im {H}ead and {C}hris {H}oldgraf and {K}yle {K}elley and {G}ladys {N}alvarte and {A}ndrew {O}sheroff and {M} {P}acer and {Y}uvi {P}anda and {F}ernando {P}erez and {B}enjamin {R}agan-{K}elley and {C}arol {W}illing}, booktitle = {{P}roceedings of the 17th {P}ython in {S}cience {C}onference}, date-added = {2025-06-23 10:23:31 -0400}, - date-modified = {2025-06-23 10:23:59 -0400}, + date-modified = {2025-06-23 10:38:54 -0400}, doi = {10.25080/Majora-4af1f417-011}, editor = {{F}atih {A}kici and {D}avid {L}ippa and {D}illon {N}iederhut and {M} {P}acer}, pages = {113 - 120}, @@ -96,11 +106,11 @@ @article{cockett_continuous_2024 bdsk-url-1 = {https://proceedings.scipy.org/articles/NKVC9349}, bdsk-url-2 = {https://doi.org/10.25080/NKVC9349}} -@misc{rowan_cockett_jupyter-bookmystmd_2025, +@misc{mystmd_2025, author = {Rowan Cockett and Franklin Koch and Steve Purves and Angus Hollands and Yuxi Wang and Dylan Grandmont and Chris Holdgraf and Andrea and Jan-Hendrik M{\"u}ller and Spencer Lyon and Cristian Le and Jim Madge and wwx and Sugan Reden and Yuanhao Geng and Ryan Lovett and Mikkel Roald-Arb{\o}l and Matt McKay and Matthew Brett and M Bussonnier and Mridul Seth and Nicolas M. Thi{\'e}ry and Raniere Silva and Sarah Brown and Sinan Bekar and Tavin Cole and Thad Guidry and Toby Driscoll}, copyright = {MIT License}, date-added = {2025-06-23 10:13:30 -0400}, - date-modified = {2025-06-23 10:29:42 -0400}, + date-modified = {2025-06-23 10:41:47 -0400}, doi = {10.5281/ZENODO.14805610}, month = feb, publisher = {Zenodo}, @@ -161,12 +171,12 @@ @misc{ladino_erad_2024 bdsk-url-1 = {https://zenodo.org/records/13694511}, bdsk-url-2 = {https://doi.org/10.5281/zenodo.13694511}} -@inproceedings{pimentel_large-scale_2019, +@inproceedings{pimentel_2019, abstract = {Jupyter Notebooks have been widely adopted by many different communities, both in science and industry. They support the creation of literate programming documents that combine code, text, and execution results with visualizations and all sorts of rich media. The self-documenting aspects and the ability to reproduce results have been touted as significant benefits of notebooks. At the same time, there has been growing criticism that the way notebooks are being used leads to unexpected behavior, encourage poor coding practices, and that their results can be hard to reproduce. To understand good and bad practices used in the development of real notebooks, we studied 1.4 million notebooks from GitHub. We present a detailed analysis of their characteristics that impact reproducibility. We also propose a set of best practices that can improve the rate of reproducibility and discuss open challenges that require further research and development.}, author = {Pimentel, Jo{\~a}o Felipe and Murta, Leonardo and Braganholo, Vanessa and Freire, Juliana}, booktitle = {2019 {IEEE}/{ACM} 16th {International} {Conference} on {Mining} {Software} {Repositories} ({MSR})}, date-added = {2025-06-23 10:12:37 -0400}, - date-modified = {2025-06-23 10:12:37 -0400}, + date-modified = {2025-06-23 10:40:12 -0400}, doi = {10.1109/MSR.2019.00077}, file = {IEEE Xplore Abstract Record:/Users/brianrose/Zotero/storage/MDAB5D8U/8816763.html:text/html}, keywords = {Best practices, github, jupyter notebook, Media, Programming, Python, reproducibility, Testing, Tools}, @@ -213,11 +223,11 @@ @book{dask_2016 year = {2016}, bdsk-url-1 = {http://dask.pydata.org}} -@article{hoyer_xarray_2017, +@article{xarray_2017, abstract = {xarray is an open source project and Python package that provides a toolkit and data structures for N-dimensional labeled arrays. Our approach combines an application programing interface (API) inspired by pandas with the Common Data Model for self-described scientific data. Key features of the xarray package include label-based indexing and arithmetic, interoperability with the core scientific Python packages (e.g., pandas, NumPy, Matplotlib), out-of-core computation on datasets that don't fit into memory, a wide range of serialization and input/output (I/O) options, and advanced multi-dimensional data manipulation tools such as group-by and resampling. xarray, as a data model and analytics toolkit, has been widely adopted in the geoscience community but is also used more broadly for multi-dimensional data analysis in physics, machine learning and finance.}, author = {Hoyer, Stephan and Hamman, Joe}, date-added = {2025-06-23 10:11:48 -0400}, - date-modified = {2025-06-23 10:22:40 -0400}, + date-modified = {2025-06-23 10:38:09 -0400}, doi = {10.5334/jors.148}, issn = {2049-9647}, journal = {Journal of Open Research Software}, @@ -233,11 +243,11 @@ @article{hoyer_xarray_2017 bdsk-url-1 = {https://openresearchsoftware.metajnl.com/articles/10.5334/jors.148}, bdsk-url-2 = {https://doi.org/10.5334/jors.148}} -@misc{phil_elson_scitoolscartopy_2024, +@misc{cartopy_2024, author = {Phil Elson and Elliott Sales de Andrade and Greg Lucas and Ryan May and Richard Hattersley and Ed Campbell and Ruth Comer and Andrew Dawson and Bill Little and Stephane Raynaud and scmc72 and Alan D. Snow and lgolston and Byron Blay and Peter Killick and lbdreyer and Patrick Peglar and Nat Wilson and Andrew and Jon Szymaniak and Adrien Berchet and Corinne Bosley and Luke Davis and Filipe and John Krasting and Matthew Bradbury and stephenworsley and Daniel Kirkham}, copyright = {Creative Commons Attribution 4.0 International}, date-added = {2025-06-23 10:11:35 -0400}, - date-modified = {2025-06-23 10:30:29 -0400}, + date-modified = {2025-06-23 10:37:47 -0400}, doi = {10.5281/ZENODO.1182735}, month = oct, publisher = {Zenodo}, @@ -269,12 +279,12 @@ @misc{pandas bdsk-url-1 = {https://zenodo.org/doi/10.5281/zenodo.3509134}, bdsk-url-2 = {https://doi.org/10.5281/ZENODO.3509134}} -@article{harris_array_2020, +@article{Harris_array_2020, abstract = {Array programming provides a powerful, compact and expressive syntax for accessing, manipulating and operating on data in vectors, matrices and higher-dimensional arrays. NumPy is the primary array programming library for the Python language. It has an essential role in research analysis pipelines in fields as diverse as physics, chemistry, astronomy, geoscience, biology, psychology, materials science, engineering, finance and economics. For example, in astronomy, NumPy was an important part of the software stack used in the discovery of gravitational waves1 and in the first imaging of a black hole2. Here we review how a few fundamental array concepts lead to a simple and powerful programming paradigm for organizing, exploring and analysing scientific data. NumPy is the foundation upon which the scientific Python ecosystem is constructed. It is so pervasive that several projects, targeting audiences with specialized needs, have developed their own NumPy-like interfaces and array objects. Owing to its central position in the ecosystem, NumPy increasingly acts as an interoperability layer between such array computation libraries and, together with its application programming interface (API), provides a flexible framework to support the next decade of scientific and industrial analysis.}, author = {Harris, Charles R. and Millman, K. Jarrod and van der Walt, St{\'e}fan J. and Gommers, Ralf and Virtanen, Pauli and Cournapeau, David and Wieser, Eric and Taylor, Julian and Berg, Sebastian and Smith, Nathaniel J. and Kern, Robert and Picus, Matti and Hoyer, Stephan and van Kerkwijk, Marten H. and Brett, Matthew and Haldane, Allan and del R{\'\i}o, Jaime Fern{\'a}ndez and Wiebe, Mark and Peterson, Pearu and G{\'e}rard-Marchant, Pierre and Sheppard, Kevin and Reddy, Tyler and Weckesser, Warren and Abbasi, Hameer and Gohlke, Christoph and Oliphant, Travis E.}, copyright = {2020 The Author(s)}, date-added = {2025-06-23 10:10:22 -0400}, - date-modified = {2025-06-23 10:22:24 -0400}, + date-modified = {2025-06-23 10:36:09 -0400}, doi = {10.1038/s41586-020-2649-2}, issn = {1476-4687}, journal = {Nature}, @@ -292,10 +302,10 @@ @article{harris_array_2020 bdsk-url-1 = {https://www.nature.com/articles/s41586-020-2649-2}, bdsk-url-2 = {https://doi.org/10.1038/s41586-020-2649-2}} -@article{rose_GEO_OSE_2023, +@article{Rose_GEO_OSE_2023, author = {Rose, Brian E. J. and Clyne, John and May, Ryan and Munroe, James and Snyder, Amelia and Eroglu, Orhan and Tyle, Kevin}, date-added = {2025-06-23 10:10:11 -0400}, - date-modified = {2025-06-23 10:30:03 -0400}, + date-modified = {2025-06-23 10:35:16 -0400}, doi = {10.5281/zenodo.8184298}, language = {eng}, month = jul, @@ -323,11 +333,11 @@ @misc{Foundations bdsk-url-1 = {https://zenodo.org/records/14759276}, bdsk-url-2 = {https://doi.org/10.5281/zenodo.14759276}} -@article{abernathey_pangeo_2017, +@article{Abernathey_pangeo_2017, abstract = {The Project Description from the NSF-funded Earthcube project "Pangeo: An Open Source Big Data Climate Science Platform" (NSF award 1740648)}, author = {Abernathey, Ryan and Paul, Kevin and Hamman, Joe and Rocklin, Matthew and Lepore, Chiara and Tippett, Michael and Henderson, Naomi and Seager, Richard and May, Ryan and Del Vento, Davide}, date-added = {2025-06-23 10:10:05 -0400}, - date-modified = {2025-06-23 10:16:56 -0400}, + date-modified = {2025-06-23 10:35:39 -0400}, doi = {10.6084/m9.figshare.5361094.v1}, language = {en}, month = aug, diff --git a/paper.md b/paper.md index 43f1de73d..52113cc39 100644 --- a/paper.md +++ b/paper.md @@ -67,16 +67,16 @@ bibliography: paper.bib # Summary -Pythia Foundations [1] is the flagship product of the first phase of Project Pythia [2], a broad community effort to build, house, share, and maintain high-quality learning resources for Python-based computing in the geosciences. Project Pythia’s central mission is to accelerate progress across the geosciences by reducing roadblocks to sharing technical knowledge, particularly related to scalable and reproducible data analysis in the cloud using the open-source Python software ecosystem. +Pythia Foundation [@Foundations] is the flagship product of the first phase of Project Pythia [@Rose_GEO_OSE_2023], a broad community effort to build, house, share, and maintain high-quality learning resources for Python-based computing in the geosciences. Project Pythia’s central mission is to accelerate progress across the geosciences by reducing roadblocks to sharing technical knowledge, particularly related to scalable and reproducible data analysis in the cloud using the open-source Python software ecosystem. -Pythia Foundations is a geoscience-flavored introduction to essential tools in the scientific Python ecosystem and Pangeo [3] stack (e.g., JupyterLab, NumPy [4], Matplotlib [5], Pandas [6], [7], Cartopy [8], Xarray [9], Dask [10]), plus environment management tools (conda), basics of version control (git), and effective use of GitHub as an technical communication platform (Figure 1). It is a community-owned executable textbook backed by computational resources for automated health-checking and interactive use. It covers the foundational knowledge that is needed to get started with Python in the computational geosciences, as well as to become an effective citizen-practitioner in key open geoscience software ecosystems. It is intended for anyone from undergraduate students through established geoscientists who are relatively new to working in Python. The book assumes a basic knowledge of programming concepts, but a brief "Quickstart" lesson highlights distinctive features of Python for users migrating from other languages. +Pythia Foundations is a geoscience-flavored introduction to essential tools in the scientific Python ecosystem and Pangeo [@Abernathey_pangeo_2017] stack (e.g., JupyterLab, NumPy [@Harris_array_2020], Matplotlib [@Matplotlib_2007], Pandas [@McKinney-2010], [@pandas], Cartopy [@cartopy_2024], Xarray [@xarray_2017], Dask [@dask_2016]), plus environment management tools (conda), basics of version control (git), and effective use of GitHub as an technical communication platform (Figure 1). It is a community-owned executable textbook backed by computational resources for automated health-checking and interactive use. It covers the foundational knowledge that is needed to get started with Python in the computational geosciences, as well as to become an effective citizen-practitioner in key open geoscience software ecosystems. It is intended for anyone from undergraduate students through established geoscientists who are relatively new to working in Python. The book assumes a basic knowledge of programming concepts, but a brief "Quickstart" lesson highlights distinctive features of Python for users migrating from other languages. -A distinguishing feature of Pythia Foundations is its rigorous quality control and maintenance. All Python code and external web links are tested nightly, and book contents are kept up to date as the software ecosystem and data sources evolve. Users can run the examples with a “one click” launch into a dedicated cloud-based Binder service [11]. +A distinguishing feature of Pythia Foundations is its rigorous quality control and maintenance. All Python code and external web links are tested nightly, and book contents are kept up to date as the software ecosystem and data sources evolve. Users can run the examples with a “one click” launch into a dedicated cloud-based Binder service [@binder-2018]. # Statement of Need Today’s geoscientists require not only domain expertise but also proficiency with specialized software and high-level technical skills to effectively analyze, manipulate, and manage potentially vast volumes of digital data in a complex and ever-changing computing environment. The scientific Python ecosystem and the emergence of cloud computing have been game-changers for many, providing an abundance of open-source tools with wide ranging functionality. Ironically, however, this abundance is often untapped, and can be a source of great frustration. Scientists spend an inordinate amount of time pondering questions such as: Which tool or technology should I use? How do I use it? Can I trust it? Is it compatible with other tools in my workflow? Often, the answers are unclear, due to inadequate documentation or difficulty in finding relevant up-to-date working examples. The result is too much time spent navigating or avoiding technology—time that could have been spent productively doing science. Pythia Foundations fills this need by providing a trusted community-owned, web-accessible, geoscience-specific education and training resource for scientists and students at all career stages who want to know what tools to use and how to use them to explore their data. -The Foundations book embodies the FAIR principles [12] that play a central role in open science. Findability is served by gathering geoscience-specific tutorials into a high-visibility community archive. Accessibility is served by our automated CI testing and integrated public binder. Tutorials and example code are largely Interoperable due to reliance on a common ecosystem of tools (e.g., NumPy and Xarray). Reusability is addressed through permissive licensing of book content and geoscience relevance of the examples, as well as our commitment to maintaining up-to-date working examples—an essential need in light of the widespread problem of rapid obsolescence of computational notebooks [13]. +The Foundations book embodies the FAIR principles [@wilkinson_fair_2016] that play a central role in open science. Findability is served by gathering geoscience-specific tutorials into a high-visibility community archive. Accessibility is served by our automated CI testing and integrated public binder. Tutorials and example code are largely Interoperable due to reliance on a common ecosystem of tools (e.g., NumPy and Xarray). Reusability is addressed through permissive licensing of book content and geoscience relevance of the examples, as well as our commitment to maintaining up-to-date working examples—an essential need in light of the widespread problem of rapid obsolescence of computational notebooks [@pimentel_2019]. # Content, instructional design, and usage The scope of Pythia Foundations is limited to tools and packages that are currently in broad use across multiple geoscience disciplines; packages tailored to more narrow scientific domains are not covered in Foundations but may be suitable for a Cookbook. The book outline was designed collaboratively by the core author team, informed by community feedback, and drawing on our substantial collective experience in teaching Python-based scientific workflows in classrooms, workshops, and outreach events. @@ -85,17 +85,17 @@ The book is organized into two main sections: Foundational skills and Core Scien A template notebook and contribution guide is provided for new content, encouraging consistency of style and organization. Each chapter includes explicit prerequisites, references, and estimated learning time. The book is intended primarily for self-study and reference, backed by the interactive Binder or deployed on user machines following the detailed guidance in the book. From web-based metrics, Pythia Foundations served roughly 29,000 users in 111 countries during calendar year 2024. -Subsets of the book contents have been modified and repackaged for various workshops and short courses. A few examples include the 2022 EarthCube-AMGeO Hackathon, the ERAD 2024 Open Radar Science Shortcourse [14], the Climatematch Academy international virtual summer school (annually since 2023), and in Spanish-language translation for a Colombian hydrometeorological workshop in 2023 [15]. Co-authors Rose and Tyle have integrated material from Foundations into the formal curriculum for several semester-length undergraduate and graduate level courses at the University at Albany. +Subsets of the book contents have been modified and repackaged for various workshops and short courses. A few examples include the 2022 EarthCube-AMGeO Hackathon, the ERAD 2024 Open Radar Science Shortcourse [@ladino_erad_2024], the Climatematch Academy international virtual summer school (annually since 2023), and in Spanish-language translation for a Colombian hydrometeorological workshop in 2023 [@atmoscol2023]. Co-authors Rose and Tyle have integrated material from Foundations into the formal curriculum for several semester-length undergraduate and graduate level courses at the University at Albany. # Computational infrastructure -The book is deployed as an easy-to-navigate website using JupyterBook [16] and MyST-MD [17], including “one-click” Binder links to interactive versions of every chapter. It features complete reproducibility: source materials are stored in a GitHub repository as unexecuted Jupyter notebooks, and all content is recreated in a bespoke computational environment during nightly builds and whenever the book pages are re-rendered. A full preview of the executed and rendered book is created whenever a change is proposed via a Pull Request. Development of the novel notebook publishing infrastructure enabling this full reproducibility was driven by the Pythia team’s need to collaborate on a large computational document. The build-and-preview automation that our team developed while authoring Foundations is now in wide use by the community of Cookbook creators. The automation notably includes the ability to route notebook execution through the same Binder environment offered to users, guaranteeing that the output of the automated builds are identical to those that users see when running code examples interactively. +The book is deployed as an easy-to-navigate website using JupyterBook [@jupyterbook_2020] and MyST-MD [@mystmd_2025], including “one-click” Binder links to interactive versions of every chapter. It features complete reproducibility: source materials are stored in a GitHub repository as unexecuted Jupyter notebooks, and all content is recreated in a bespoke computational environment during nightly builds and whenever the book pages are re-rendered. A full preview of the executed and rendered book is created whenever a change is proposed via a Pull Request. Development of the novel notebook publishing infrastructure enabling this full reproducibility was driven by the Pythia team’s need to collaborate on a large computational document. The build-and-preview automation that our team developed while authoring Foundations is now in wide use by the community of Cookbook creators. The automation notably includes the ability to route notebook execution through the same Binder environment offered to users, guaranteeing that the output of the automated builds are identical to those that users see when running code examples interactively. # Future plans -Pythia Foundations is a living document and is receiving continuous updates [18] and improvements, both from the core author team and the broader community of user-contributors. On the content side, Project Pythia is simultaneously fostering a growing collection of more advanced and domain-specific tutorials in our crowd-sourced community Cookbook gallery, with explicit links to prerequisites from Foundations. We anticipate periodic reviews of the Cookbook collection to identify cross-cutting content that should be abstracted back to Foundations, e.g., common data access patterns or analysis workflows. +Pythia Foundations is a living document and is receiving continuous updates [@cockett_continuous_2024] and improvements, both from the core author team and the broader community of user-contributors. On the content side, Project Pythia is simultaneously fostering a growing collection of more advanced and domain-specific tutorials in our crowd-sourced community Cookbook gallery, with explicit links to prerequisites from Foundations. We anticipate periodic reviews of the Cookbook collection to identify cross-cutting content that should be abstracted back to Foundations, e.g., common data access patterns or analysis workflows. -The computational and publishing infrastructure for Foundations is also continuously evolving. As of this writing, Foundations and all other Pythia content has just undergone a significant refresh and upgrade with the migration to JupyterBook 2 which is based on the MyST-MD publishing engine [17]. Among the compelling new functionality unlocked by this transition is a rich content cross-referencing and embedding model that will enable more modular reuse and repacking of Foundations content tailored to specific courses or audiences. +The computational and publishing infrastructure for Foundations is also continuously evolving. As of this writing, Foundations and all other Pythia content has just undergone a significant refresh and upgrade with the migration to JupyterBook 2 which is based on the MyST-MD publishing engine [@mystmd_2025]. Among the compelling new functionality unlocked by this transition is a rich content cross-referencing and embedding model that will enable more modular reuse and repacking of Foundations content tailored to specific courses or audiences. # Acknowledgements -The authors gratefully acknowledge support from the broad open geoscience communities of Project Pythia and Pangeo for their feedback, suggestions, pull requests, and enthusiasm. Development and maintenance of Pythia Foundations was supported by the U.S. National Science Foundation (NSF) awards 2026899, 2026863, 2324302, 2324303 and 2324304. The Pythia BinderHub is deployed on Jetstream2 [19] at Indiana University through allocations EES230007 and SEE240014 from the Advanced Cyberinfrastructure Coordination Ecosystem: Services & Support (ACCESS) program [20], which is supported by NSF grants 2138259, 2138286, 2138307, 2137603, and 2138296. +The authors gratefully acknowledge support from the broad open geoscience communities of Project Pythia and Pangeo for their feedback, suggestions, pull requests, and enthusiasm. Development and maintenance of Pythia Foundations was supported by the U.S. National Science Foundation (NSF) awards 2026899, 2026863, 2324302, 2324303 and 2324304. The Pythia BinderHub is deployed on Jetstream2 [@hancock_jetstream2_2021] at Indiana University through allocations EES230007 and SEE240014 from the Advanced Cyberinfrastructure Coordination Ecosystem: Services & Support (ACCESS) program [@boerner_access_2023], which is supported by NSF grants 2138259, 2138286, 2138307, 2137603, and 2138296. # References From 7b94eed42eb8d09cc1612ebf417c475d5ba07b55 Mon Sep 17 00:00:00 2001 From: Brian Rose Date: Mon, 23 Jun 2025 10:47:25 -0400 Subject: [PATCH 3/6] Add figure and caption --- paper.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/paper.md b/paper.md index 52113cc39..0bb7d13b7 100644 --- a/paper.md +++ b/paper.md @@ -73,6 +73,10 @@ Pythia Foundations is a geoscience-flavored introduction to essential tools in t A distinguishing feature of Pythia Foundations is its rigorous quality control and maintenance. All Python code and external web links are tested nightly, and book contents are kept up to date as the software ecosystem and data sources evolve. Users can run the examples with a “one click” launch into a dedicated cloud-based Binder service [@binder-2018]. +![Figure 1: Pythia Foundations infographic](images/ProjectPythia_Foundations_Infographic_v4.jpg) + +> Figure 1: A schematic of the content and organization of Pythia Foundations. The book is divided into two major sections, "Foundational Skills" and "Core Scientific Python Packages", limited to those currently in broad use across multiple geoscience disciplines. Pythia Cookbooks house more advanced or domain-specific content that reference individual chapters from Foundations as prerequisites. + # Statement of Need Today’s geoscientists require not only domain expertise but also proficiency with specialized software and high-level technical skills to effectively analyze, manipulate, and manage potentially vast volumes of digital data in a complex and ever-changing computing environment. The scientific Python ecosystem and the emergence of cloud computing have been game-changers for many, providing an abundance of open-source tools with wide ranging functionality. Ironically, however, this abundance is often untapped, and can be a source of great frustration. Scientists spend an inordinate amount of time pondering questions such as: Which tool or technology should I use? How do I use it? Can I trust it? Is it compatible with other tools in my workflow? Often, the answers are unclear, due to inadequate documentation or difficulty in finding relevant up-to-date working examples. The result is too much time spent navigating or avoiding technology—time that could have been spent productively doing science. Pythia Foundations fills this need by providing a trusted community-owned, web-accessible, geoscience-specific education and training resource for scientists and students at all career stages who want to know what tools to use and how to use them to explore their data. From 5a6b187ed50085d65acd73167a29c866d970670b Mon Sep 17 00:00:00 2001 From: Brian Rose Date: Mon, 23 Jun 2025 10:51:59 -0400 Subject: [PATCH 4/6] Fix tags --- paper.md | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/paper.md b/paper.md index 0bb7d13b7..340cfdc56 100644 --- a/paper.md +++ b/paper.md @@ -2,10 +2,17 @@ title: 'Pythia Foundations: A community learning resource for Python-based computing in the geosciences' tags: - Python -# - astronomy -# - dynamics -# - galactic dynamics -# - milky way + - Geoscience + - Jupyter + - GitHub + - Numpy + - Matplotlib + - Cartopy + - Datetime + - Pandas + - Netcdf + - Xarray + - Dask authors: - name: Brian E. J. Rose orcid: 0000-0002-9961-3821 From 37314e6ef64c27384a9da9a840aa3491b971e0ab Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 23 Jun 2025 14:55:14 +0000 Subject: [PATCH 5/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- paper.bib | 4 ++-- paper.md | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/paper.bib b/paper.bib index b6d0fbe5c..85666894f 100644 --- a/paper.bib +++ b/paper.bib @@ -1,10 +1,10 @@ %% This BibTeX bibliography file was created using BibDesk. %% https://bibdesk.sourceforge.io/ -%% Created for Brian Rose at 2025-06-23 10:41:48 -0400 +%% Created for Brian Rose at 2025-06-23 10:41:48 -0400 -%% Saved with string encoding Unicode (UTF-8) +%% Saved with string encoding Unicode (UTF-8) diff --git a/paper.md b/paper.md index 340cfdc56..93ae88ead 100644 --- a/paper.md +++ b/paper.md @@ -74,7 +74,7 @@ bibliography: paper.bib # Summary -Pythia Foundation [@Foundations] is the flagship product of the first phase of Project Pythia [@Rose_GEO_OSE_2023], a broad community effort to build, house, share, and maintain high-quality learning resources for Python-based computing in the geosciences. Project Pythia’s central mission is to accelerate progress across the geosciences by reducing roadblocks to sharing technical knowledge, particularly related to scalable and reproducible data analysis in the cloud using the open-source Python software ecosystem. +Pythia Foundation [@Foundations] is the flagship product of the first phase of Project Pythia [@Rose_GEO_OSE_2023], a broad community effort to build, house, share, and maintain high-quality learning resources for Python-based computing in the geosciences. Project Pythia’s central mission is to accelerate progress across the geosciences by reducing roadblocks to sharing technical knowledge, particularly related to scalable and reproducible data analysis in the cloud using the open-source Python software ecosystem. Pythia Foundations is a geoscience-flavored introduction to essential tools in the scientific Python ecosystem and Pangeo [@Abernathey_pangeo_2017] stack (e.g., JupyterLab, NumPy [@Harris_array_2020], Matplotlib [@Matplotlib_2007], Pandas [@McKinney-2010], [@pandas], Cartopy [@cartopy_2024], Xarray [@xarray_2017], Dask [@dask_2016]), plus environment management tools (conda), basics of version control (git), and effective use of GitHub as an technical communication platform (Figure 1). It is a community-owned executable textbook backed by computational resources for automated health-checking and interactive use. It covers the foundational knowledge that is needed to get started with Python in the computational geosciences, as well as to become an effective citizen-practitioner in key open geoscience software ecosystems. It is intended for anyone from undergraduate students through established geoscientists who are relatively new to working in Python. The book assumes a basic knowledge of programming concepts, but a brief "Quickstart" lesson highlights distinctive features of Python for users migrating from other languages. @@ -85,26 +85,26 @@ A distinguishing feature of Pythia Foundations is its rigorous quality control a > Figure 1: A schematic of the content and organization of Pythia Foundations. The book is divided into two major sections, "Foundational Skills" and "Core Scientific Python Packages", limited to those currently in broad use across multiple geoscience disciplines. Pythia Cookbooks house more advanced or domain-specific content that reference individual chapters from Foundations as prerequisites. # Statement of Need -Today’s geoscientists require not only domain expertise but also proficiency with specialized software and high-level technical skills to effectively analyze, manipulate, and manage potentially vast volumes of digital data in a complex and ever-changing computing environment. The scientific Python ecosystem and the emergence of cloud computing have been game-changers for many, providing an abundance of open-source tools with wide ranging functionality. Ironically, however, this abundance is often untapped, and can be a source of great frustration. Scientists spend an inordinate amount of time pondering questions such as: Which tool or technology should I use? How do I use it? Can I trust it? Is it compatible with other tools in my workflow? Often, the answers are unclear, due to inadequate documentation or difficulty in finding relevant up-to-date working examples. The result is too much time spent navigating or avoiding technology—time that could have been spent productively doing science. Pythia Foundations fills this need by providing a trusted community-owned, web-accessible, geoscience-specific education and training resource for scientists and students at all career stages who want to know what tools to use and how to use them to explore their data. +Today’s geoscientists require not only domain expertise but also proficiency with specialized software and high-level technical skills to effectively analyze, manipulate, and manage potentially vast volumes of digital data in a complex and ever-changing computing environment. The scientific Python ecosystem and the emergence of cloud computing have been game-changers for many, providing an abundance of open-source tools with wide ranging functionality. Ironically, however, this abundance is often untapped, and can be a source of great frustration. Scientists spend an inordinate amount of time pondering questions such as: Which tool or technology should I use? How do I use it? Can I trust it? Is it compatible with other tools in my workflow? Often, the answers are unclear, due to inadequate documentation or difficulty in finding relevant up-to-date working examples. The result is too much time spent navigating or avoiding technology—time that could have been spent productively doing science. Pythia Foundations fills this need by providing a trusted community-owned, web-accessible, geoscience-specific education and training resource for scientists and students at all career stages who want to know what tools to use and how to use them to explore their data. -The Foundations book embodies the FAIR principles [@wilkinson_fair_2016] that play a central role in open science. Findability is served by gathering geoscience-specific tutorials into a high-visibility community archive. Accessibility is served by our automated CI testing and integrated public binder. Tutorials and example code are largely Interoperable due to reliance on a common ecosystem of tools (e.g., NumPy and Xarray). Reusability is addressed through permissive licensing of book content and geoscience relevance of the examples, as well as our commitment to maintaining up-to-date working examples—an essential need in light of the widespread problem of rapid obsolescence of computational notebooks [@pimentel_2019]. +The Foundations book embodies the FAIR principles [@wilkinson_fair_2016] that play a central role in open science. Findability is served by gathering geoscience-specific tutorials into a high-visibility community archive. Accessibility is served by our automated CI testing and integrated public binder. Tutorials and example code are largely Interoperable due to reliance on a common ecosystem of tools (e.g., NumPy and Xarray). Reusability is addressed through permissive licensing of book content and geoscience relevance of the examples, as well as our commitment to maintaining up-to-date working examples—an essential need in light of the widespread problem of rapid obsolescence of computational notebooks [@pimentel_2019]. # Content, instructional design, and usage The scope of Pythia Foundations is limited to tools and packages that are currently in broad use across multiple geoscience disciplines; packages tailored to more narrow scientific domains are not covered in Foundations but may be suitable for a Cookbook. The book outline was designed collaboratively by the core author team, informed by community feedback, and drawing on our substantial collective experience in teaching Python-based scientific workflows in classrooms, workshops, and outreach events. The book is organized into two main sections: Foundational skills and Core Scientific Python packages (Figure 1). The foundational skills section covers “getting started” skills such as how to install Python and manage environments and how to run Python code in JupyterLab. There is also a set of tutorials on the use of GitHub and git for version control and collaboration on open source projects. The scope of this section was chosen with the specific goal of enabling users to contribute back to Pythia Foundations. -A template notebook and contribution guide is provided for new content, encouraging consistency of style and organization. Each chapter includes explicit prerequisites, references, and estimated learning time. The book is intended primarily for self-study and reference, backed by the interactive Binder or deployed on user machines following the detailed guidance in the book. From web-based metrics, Pythia Foundations served roughly 29,000 users in 111 countries during calendar year 2024. +A template notebook and contribution guide is provided for new content, encouraging consistency of style and organization. Each chapter includes explicit prerequisites, references, and estimated learning time. The book is intended primarily for self-study and reference, backed by the interactive Binder or deployed on user machines following the detailed guidance in the book. From web-based metrics, Pythia Foundations served roughly 29,000 users in 111 countries during calendar year 2024. -Subsets of the book contents have been modified and repackaged for various workshops and short courses. A few examples include the 2022 EarthCube-AMGeO Hackathon, the ERAD 2024 Open Radar Science Shortcourse [@ladino_erad_2024], the Climatematch Academy international virtual summer school (annually since 2023), and in Spanish-language translation for a Colombian hydrometeorological workshop in 2023 [@atmoscol2023]. Co-authors Rose and Tyle have integrated material from Foundations into the formal curriculum for several semester-length undergraduate and graduate level courses at the University at Albany. +Subsets of the book contents have been modified and repackaged for various workshops and short courses. A few examples include the 2022 EarthCube-AMGeO Hackathon, the ERAD 2024 Open Radar Science Shortcourse [@ladino_erad_2024], the Climatematch Academy international virtual summer school (annually since 2023), and in Spanish-language translation for a Colombian hydrometeorological workshop in 2023 [@atmoscol2023]. Co-authors Rose and Tyle have integrated material from Foundations into the formal curriculum for several semester-length undergraduate and graduate level courses at the University at Albany. # Computational infrastructure -The book is deployed as an easy-to-navigate website using JupyterBook [@jupyterbook_2020] and MyST-MD [@mystmd_2025], including “one-click” Binder links to interactive versions of every chapter. It features complete reproducibility: source materials are stored in a GitHub repository as unexecuted Jupyter notebooks, and all content is recreated in a bespoke computational environment during nightly builds and whenever the book pages are re-rendered. A full preview of the executed and rendered book is created whenever a change is proposed via a Pull Request. Development of the novel notebook publishing infrastructure enabling this full reproducibility was driven by the Pythia team’s need to collaborate on a large computational document. The build-and-preview automation that our team developed while authoring Foundations is now in wide use by the community of Cookbook creators. The automation notably includes the ability to route notebook execution through the same Binder environment offered to users, guaranteeing that the output of the automated builds are identical to those that users see when running code examples interactively. +The book is deployed as an easy-to-navigate website using JupyterBook [@jupyterbook_2020] and MyST-MD [@mystmd_2025], including “one-click” Binder links to interactive versions of every chapter. It features complete reproducibility: source materials are stored in a GitHub repository as unexecuted Jupyter notebooks, and all content is recreated in a bespoke computational environment during nightly builds and whenever the book pages are re-rendered. A full preview of the executed and rendered book is created whenever a change is proposed via a Pull Request. Development of the novel notebook publishing infrastructure enabling this full reproducibility was driven by the Pythia team’s need to collaborate on a large computational document. The build-and-preview automation that our team developed while authoring Foundations is now in wide use by the community of Cookbook creators. The automation notably includes the ability to route notebook execution through the same Binder environment offered to users, guaranteeing that the output of the automated builds are identical to those that users see when running code examples interactively. # Future plans -Pythia Foundations is a living document and is receiving continuous updates [@cockett_continuous_2024] and improvements, both from the core author team and the broader community of user-contributors. On the content side, Project Pythia is simultaneously fostering a growing collection of more advanced and domain-specific tutorials in our crowd-sourced community Cookbook gallery, with explicit links to prerequisites from Foundations. We anticipate periodic reviews of the Cookbook collection to identify cross-cutting content that should be abstracted back to Foundations, e.g., common data access patterns or analysis workflows. +Pythia Foundations is a living document and is receiving continuous updates [@cockett_continuous_2024] and improvements, both from the core author team and the broader community of user-contributors. On the content side, Project Pythia is simultaneously fostering a growing collection of more advanced and domain-specific tutorials in our crowd-sourced community Cookbook gallery, with explicit links to prerequisites from Foundations. We anticipate periodic reviews of the Cookbook collection to identify cross-cutting content that should be abstracted back to Foundations, e.g., common data access patterns or analysis workflows. -The computational and publishing infrastructure for Foundations is also continuously evolving. As of this writing, Foundations and all other Pythia content has just undergone a significant refresh and upgrade with the migration to JupyterBook 2 which is based on the MyST-MD publishing engine [@mystmd_2025]. Among the compelling new functionality unlocked by this transition is a rich content cross-referencing and embedding model that will enable more modular reuse and repacking of Foundations content tailored to specific courses or audiences. +The computational and publishing infrastructure for Foundations is also continuously evolving. As of this writing, Foundations and all other Pythia content has just undergone a significant refresh and upgrade with the migration to JupyterBook 2 which is based on the MyST-MD publishing engine [@mystmd_2025]. Among the compelling new functionality unlocked by this transition is a rich content cross-referencing and embedding model that will enable more modular reuse and repacking of Foundations content tailored to specific courses or audiences. # Acknowledgements The authors gratefully acknowledge support from the broad open geoscience communities of Project Pythia and Pangeo for their feedback, suggestions, pull requests, and enthusiasm. Development and maintenance of Pythia Foundations was supported by the U.S. National Science Foundation (NSF) awards 2026899, 2026863, 2324302, 2324303 and 2324304. The Pythia BinderHub is deployed on Jetstream2 [@hancock_jetstream2_2021] at Indiana University through allocations EES230007 and SEE240014 from the Advanced Cyberinfrastructure Coordination Ecosystem: Services & Support (ACCESS) program [@boerner_access_2023], which is supported by NSF grants 2138259, 2138286, 2138307, 2137603, and 2138296. From a1dd1609b2251560d463e55c57526d9dd9d6da89 Mon Sep 17 00:00:00 2001 From: Brian Rose Date: Wed, 25 Jun 2025 16:45:02 -0400 Subject: [PATCH 6/6] remove cruft from bib file --- paper.bib | 62 ------------------------------------------------------- 1 file changed, 62 deletions(-) diff --git a/paper.bib b/paper.bib index 85666894f..cbdb8a909 100644 --- a/paper.bib +++ b/paper.bib @@ -1,17 +1,5 @@ -%% This BibTeX bibliography file was created using BibDesk. -%% https://bibdesk.sourceforge.io/ - -%% Created for Brian Rose at 2025-06-23 10:41:48 -0400 - - -%% Saved with string encoding Unicode (UTF-8) - - - @article{Matplotlib_2007, author = {Hunter, John D.}, - date-added = {2025-06-23 10:28:09 -0400}, - date-modified = {2025-06-23 10:28:24 -0400}, doi = {10.1109/MCSE.2007.55}, journal = {Computing in Science & Engineering}, keywords = {Graphics;Interpolation;Equations;Graphical user interfaces;Packaging;Image generation;User interfaces;Operating systems;Computer languages;Programming profession;Python;scripting languages;application development;scientific programming}, @@ -25,8 +13,6 @@ @article{Matplotlib_2007 @inproceedings{McKinney-2010, author = {{W}es {M}c{K}inney}, booktitle = {{P}roceedings of the 9th {P}ython in {S}cience {C}onference}, - date-added = {2025-06-23 10:25:10 -0400}, - date-modified = {2025-06-23 10:37:21 -0400}, doi = {10.25080/Majora-92bf1922-00a}, editor = {{S}t\'efan van der {W}alt and {J}arrod {M}illman}, pages = {56 - 61}, @@ -37,8 +23,6 @@ @inproceedings{McKinney-2010 @inproceedings{binder-2018, author = {{Project Jupyter} and {M}atthias {B}ussonnier and {J}essica {F}orde and {J}eremy {F}reeman and {B}rian {G}ranger and {T}im {H}ead and {C}hris {H}oldgraf and {K}yle {K}elley and {G}ladys {N}alvarte and {A}ndrew {O}sheroff and {M} {P}acer and {Y}uvi {P}anda and {F}ernando {P}erez and {B}enjamin {R}agan-{K}elley and {C}arol {W}illing}, booktitle = {{P}roceedings of the 17th {P}ython in {S}cience {C}onference}, - date-added = {2025-06-23 10:23:31 -0400}, - date-modified = {2025-06-23 10:38:54 -0400}, doi = {10.25080/Majora-4af1f417-011}, editor = {{F}atih {A}kici and {D}avid {L}ippa and {D}illon {N}iederhut and {M} {P}acer}, pages = {113 - 120}, @@ -47,12 +31,9 @@ @inproceedings{binder-2018 bdsk-url-1 = {https://doi.org/10.25080/Majora-4af1f417-011}} @inproceedings{boerner_access_2023, - abstract = {As the National Science Foundation evolves its investments in cyberinfrastructure, it has made a significant investment in the ACCESS (Advanced Cyberinfrastructure Coordination Ecosystem: Services \& Support) program instantiating a novel set of services along with a novel governance and management model. Research cyberinfrastructure (CI) is a key catalyst for discovery and innovation and plays a critical role in ensuring U.S. leadership in science and engineering, economic competitiveness, and national security, consistent with NSF's mission. Funding of a set of awards through the ACCESS program has established a suite of CI coordination services targeted at supporting a broad and diverse set of requirements, researchers, and usage modalities spanning all areas of science and engineering research and education complemented by support for the collective and coordinated operation of the overall ACCESS program.}, address = {New York, NY, USA}, author = {Boerner, Timothy J. and Deems, Stephen and Furlani, Thomas R. and Knuth, Shelley L. and Towns, John}, booktitle = {Practice and {Experience} in {Advanced} {Research} {Computing} 2023: {Computing} for the {Common} {Good}}, - date-added = {2025-06-23 10:14:04 -0400}, - date-modified = {2025-06-23 10:17:20 -0400}, doi = {10.1145/3569951.3597559}, isbn = {978-1-4503-9985-2}, month = sep, @@ -68,12 +49,9 @@ @inproceedings{boerner_access_2023 bdsk-url-2 = {https://doi.org/10.1145/3569951.3597559}} @inproceedings{hancock_jetstream2_2021, - abstract = {Jetstream2 will be a category I production cloud resource that is part of the National Science Foundation's Innovative HPC Program. The project's aim is to accelerate science and engineering by providing ``on-demand'' programmable infrastructure built around a core system at Indiana University and four regional sites. Jetstream2 is an evolution of the Jetstream platform, which functions primarily as an Infrastructure-as-a-Service cloud. The lessons learned in cloud architecture, distributed storage, and container orchestration have inspired changes in both hardware and software for Jetstream2. These lessons have wide implications as institutions converge HPC and cloud technology while building on prior work when deploying their own cloud environments. Jetstream2's next-generation hardware, robust open-source software, and enhanced virtualization will provide a significant platform to further cloud adoption within the US research and education communities.}, address = {New York, NY, USA}, author = {Hancock, David Y. and Fischer, Jeremy and Lowe, John Michael and Snapp-Childs, Winona and Pierce, Marlon and Marru, Suresh and Coulter, J. Eric and Vaughn, Matthew and Beck, Brian and Merchant, Nirav and Skidmore, Edwin and Jacobs, Gwen}, booktitle = {Practice and {Experience} in {Advanced} {Research} {Computing} 2021: {Evolution} {Across} {All} {Dimensions}}, - date-added = {2025-06-23 10:13:55 -0400}, - date-modified = {2025-06-23 10:22:12 -0400}, doi = {10.1145/3437359.3465565}, isbn = {978-1-4503-8292-2}, month = jul, @@ -89,10 +67,7 @@ @inproceedings{hancock_jetstream2_2021 bdsk-url-2 = {https://doi.org/10.1145/3437359.3465565}} @article{cockett_continuous_2024, - abstract = {Science requires new mediums to compose ideas and ways to share research findings iteratively, as early as possible and connected directly to software and data. In this paper we discuss two tools for scientific authoring and publishing, MyST Markdown and Curvenote, and illustrate examples of improving metadata, reimagining the reading experience, including computational content, and transforming publishing practices for individuals and societies through automation and continuous practices.}, author = {Cockett, Rowan and Purves, Steve and Koch, Franklin and Morrison, Mike}, - date-added = {2025-06-23 10:13:45 -0400}, - date-modified = {2025-06-23 10:17:54 -0400}, doi = {10.25080/NKVC9349}, issn = {2575-9752}, journal = {scipy}, @@ -109,8 +84,6 @@ @article{cockett_continuous_2024 @misc{mystmd_2025, author = {Rowan Cockett and Franklin Koch and Steve Purves and Angus Hollands and Yuxi Wang and Dylan Grandmont and Chris Holdgraf and Andrea and Jan-Hendrik M{\"u}ller and Spencer Lyon and Cristian Le and Jim Madge and wwx and Sugan Reden and Yuanhao Geng and Ryan Lovett and Mikkel Roald-Arb{\o}l and Matt McKay and Matthew Brett and M Bussonnier and Mridul Seth and Nicolas M. Thi{\'e}ry and Raniere Silva and Sarah Brown and Sinan Bekar and Tavin Cole and Thad Guidry and Toby Driscoll}, copyright = {MIT License}, - date-added = {2025-06-23 10:13:30 -0400}, - date-modified = {2025-06-23 10:41:47 -0400}, doi = {10.5281/ZENODO.14805610}, month = feb, publisher = {Zenodo}, @@ -123,11 +96,8 @@ @misc{mystmd_2025 bdsk-url-2 = {https://doi.org/10.5281/ZENODO.14805610}} @misc{jupyterbook_2020, - abstract = {{\textless}strong{\textgreater}Jupyter Book{\textless}/strong{\textgreater} is an open source project for building beautiful, publication-quality books and documents from computational material. Here are some of the features of Jupyter Book: ✔ Write publication-quality content in Markdown. You can write in either Jupyter Markdown, or an extended flavor of Markdown with publishing features. This includes support for rich syntax such as citations and cross-references, math and equations, and figures. ✔ Write content in Jupyter Notebook. This allows you to include your code and outputs in your book. You can also write notebooks entirely in Markdown that get executed when you build your book. ✔ Execute and cache your book's content. For {\textless}code{\textgreater}.ipynb{\textless}/code{\textgreater} and Markdown notebooks, execute code and insert the latest outputs into your book. In addition, cache and re-use outputs to be used later. ✔ Insert notebook outputs into your content. Generate outputs as you build your documentation, and insert them in-line with your content across pages. ✔ Add interactivity to your book. You can toggle cell visibility, include interactive outputs from Jupyter, and connect with online services like Binder. ✔ Generate a variety of outputs. This includes single- and multi-page websites, as well as PDF outputs. ✔ Build books with a simple command-line interface. You can quickly generate your books with one command, like so: {\textless}code{\textgreater}jupyter-book build mybook/{\textless}/code{\textgreater}}, author = {{Executable Books Community}}, copyright = {Open Access}, - date-added = {2025-06-23 10:13:12 -0400}, - date-modified = {2025-06-23 10:21:56 -0400}, doi = {10.5281/ZENODO.2561065}, keywords = {data science, jupyter, publishing, scholarship}, month = feb, @@ -140,11 +110,8 @@ @misc{jupyterbook_2020 bdsk-url-2 = {https://doi.org/10.5281/ZENODO.2561065}} @misc{atmoscol2023, - abstract = {This is a stable release}, author = {Alfonso Ladino and Nicole Rivera and Max Grover}, copyright = {Creative Commons Attribution 4.0 International}, - date-added = {2025-06-23 10:12:58 -0400}, - date-modified = {2025-06-23 10:24:34 -0400}, doi = {10.5281/ZENODO.8316796}, month = oct, publisher = {Zenodo}, @@ -157,10 +124,7 @@ @misc{atmoscol2023 bdsk-url-2 = {https://doi.org/10.5281/ZENODO.8316796}} @misc{ladino_erad_2024, - abstract = {The course will discuss the principles of open science and provide an overview of the most mature and exciting software packages available for radar data processing (ex. LROSE, Py-ART, pyrad, BAL- TRAD, wradlib) and how they connect with the scientific software stack. The course will be built with Jupyter Notebooks as hands-on approach for interactive user experi- ence. The main course programming language is Python, but also Command Line Tools are used. The course will also highlight the "xradar" package, implementing the newly adopted FM301/CfRadial2 WMO standard, as well as the gpm-api software, which facilitates the download and analysis of TRMM PR and GPM DPR spaceborne radars data. These two tools will be used to showcase how to harness the power of xarray and dask for efficient, distributed radar data processing. The course will cover operational use (e.g. in HPC environments or Cloud Infrastructure) as well as algorithm development, enabling the participants to implement their own algorithms. The course will also show how to create workflows for different aspects of weather radar data processing, using open datasets relevant to the attendees and ERAD 2024}, author = {Ladino, Alfonso and del Moral M{\'e}ndez, Anna and Javornik, Brenda and Michelson, Daniel and Wolfensberger, Daniel and Ghiggi, Gionata and DeHart, Jen and Figueras i Ventura, Jordi and Giles, Julian and M{\"u}hlbauer, Kai and Grover, Maxwell and Dixon, Mike and Jackson, Robert and Collis, Scott and Cha, Ting-Yu and contributors, ERAD2024 Open Radar Science Shortcourse}, - date-added = {2025-06-23 10:12:48 -0400}, - date-modified = {2025-06-23 10:24:42 -0400}, doi = {10.5281/zenodo.13694511}, month = sep, publisher = {Zenodo}, @@ -172,11 +136,8 @@ @misc{ladino_erad_2024 bdsk-url-2 = {https://doi.org/10.5281/zenodo.13694511}} @inproceedings{pimentel_2019, - abstract = {Jupyter Notebooks have been widely adopted by many different communities, both in science and industry. They support the creation of literate programming documents that combine code, text, and execution results with visualizations and all sorts of rich media. The self-documenting aspects and the ability to reproduce results have been touted as significant benefits of notebooks. At the same time, there has been growing criticism that the way notebooks are being used leads to unexpected behavior, encourage poor coding practices, and that their results can be hard to reproduce. To understand good and bad practices used in the development of real notebooks, we studied 1.4 million notebooks from GitHub. We present a detailed analysis of their characteristics that impact reproducibility. We also propose a set of best practices that can improve the rate of reproducibility and discuss open challenges that require further research and development.}, author = {Pimentel, Jo{\~a}o Felipe and Murta, Leonardo and Braganholo, Vanessa and Freire, Juliana}, booktitle = {2019 {IEEE}/{ACM} 16th {International} {Conference} on {Mining} {Software} {Repositories} ({MSR})}, - date-added = {2025-06-23 10:12:37 -0400}, - date-modified = {2025-06-23 10:40:12 -0400}, doi = {10.1109/MSR.2019.00077}, file = {IEEE Xplore Abstract Record:/Users/brianrose/Zotero/storage/MDAB5D8U/8816763.html:text/html}, keywords = {Best practices, github, jupyter notebook, Media, Programming, Python, reproducibility, Testing, Tools}, @@ -191,11 +152,8 @@ @inproceedings{pimentel_2019 bdsk-url-2 = {https://doi.org/10.1109/MSR.2019.00077}} @article{wilkinson_fair_2016, - abstract = {There is an urgent need to improve the infrastructure supporting the reuse of scholarly data. A diverse set of stakeholders---representing academia, industry, funding agencies, and scholarly publishers---have come together to design and jointly endorse a concise and measureable set of principles that we refer to as the FAIR Data Principles. The intent is that these may act as a guideline for those wishing to enhance the reusability of their data holdings. Distinct from peer initiatives that focus on the human scholar, the FAIR Principles put specific emphasis on enhancing the ability of machines to automatically find and use the data, in addition to supporting its reuse by individuals. This Comment is the first formal publication of the FAIR Principles, and includes the rationale behind them, and some exemplar implementations in the community.}, author = {Wilkinson, Mark D. and Dumontier, Michel and Aalbersberg, IJsbrand Jan and Appleton, Gabrielle and Axton, Myles and Baak, Arie and Blomberg, Niklas and Boiten, Jan-Willem and da Silva Santos, Luiz Bonino and Bourne, Philip E. and Bouwman, Jildau and Brookes, Anthony J. and Clark, Tim and Crosas, Merc{\`e} and Dillo, Ingrid and Dumon, Olivier and Edmunds, Scott and Evelo, Chris T. and Finkers, Richard and Gonzalez-Beltran, Alejandra and Gray, Alasdair J. G. and Groth, Paul and Goble, Carole and Grethe, Jeffrey S. and Heringa, Jaap and 't Hoen, Peter A. C. and Hooft, Rob and Kuhn, Tobias and Kok, Ruben and Kok, Joost and Lusher, Scott J. and Martone, Maryann E. and Mons, Albert and Packer, Abel L. and Persson, Bengt and Rocca-Serra, Philippe and Roos, Marco and van Schaik, Rene and Sansone, Susanna-Assunta and Schultes, Erik and Sengstag, Thierry and Slater, Ted and Strawn, George and Swertz, Morris A. and Thompson, Mark and van der Lei, Johan and van Mulligen, Erik and Velterop, Jan and Waagmeester, Andra and Wittenburg, Peter and Wolstencroft, Katherine and Zhao, Jun and Mons, Barend}, copyright = {2016 The Author(s)}, - date-added = {2025-06-23 10:12:32 -0400}, - date-modified = {2025-06-23 10:12:32 -0400}, doi = {10.1038/sdata.2016.18}, file = {Full Text PDF:/Users/brianrose/Zotero/storage/YX8Y3T6Q/Wilkinson et al. - 2016 - The FAIR Guiding Principles for scientific data management and stewardship.pdf:application/pdf}, issn = {2052-4463}, @@ -216,18 +174,13 @@ @article{wilkinson_fair_2016 @book{dask_2016, author = {{Dask Development Team}}, - date-added = {2025-06-23 10:12:06 -0400}, - date-modified = {2025-06-23 10:20:19 -0400}, title = {Dask: {Library} for dynamic task scheduling}, url = {https://dask.org}, year = {2016}, bdsk-url-1 = {http://dask.pydata.org}} @article{xarray_2017, - abstract = {xarray is an open source project and Python package that provides a toolkit and data structures for N-dimensional labeled arrays. Our approach combines an application programing interface (API) inspired by pandas with the Common Data Model for self-described scientific data. Key features of the xarray package include label-based indexing and arithmetic, interoperability with the core scientific Python packages (e.g., pandas, NumPy, Matplotlib), out-of-core computation on datasets that don't fit into memory, a wide range of serialization and input/output (I/O) options, and advanced multi-dimensional data manipulation tools such as group-by and resampling. xarray, as a data model and analytics toolkit, has been widely adopted in the geoscience community but is also used more broadly for multi-dimensional data analysis in physics, machine learning and finance.}, author = {Hoyer, Stephan and Hamman, Joe}, - date-added = {2025-06-23 10:11:48 -0400}, - date-modified = {2025-06-23 10:38:09 -0400}, doi = {10.5334/jors.148}, issn = {2049-9647}, journal = {Journal of Open Research Software}, @@ -246,8 +199,6 @@ @article{xarray_2017 @misc{cartopy_2024, author = {Phil Elson and Elliott Sales de Andrade and Greg Lucas and Ryan May and Richard Hattersley and Ed Campbell and Ruth Comer and Andrew Dawson and Bill Little and Stephane Raynaud and scmc72 and Alan D. Snow and lgolston and Byron Blay and Peter Killick and lbdreyer and Patrick Peglar and Nat Wilson and Andrew and Jon Szymaniak and Adrien Berchet and Corinne Bosley and Luke Davis and Filipe and John Krasting and Matthew Bradbury and stephenworsley and Daniel Kirkham}, copyright = {Creative Commons Attribution 4.0 International}, - date-added = {2025-06-23 10:11:35 -0400}, - date-modified = {2025-06-23 10:37:47 -0400}, doi = {10.5281/ZENODO.1182735}, month = oct, publisher = {Zenodo}, @@ -260,13 +211,10 @@ @misc{cartopy_2024 bdsk-url-2 = {https://doi.org/10.5281/ZENODO.1182735}} @misc{pandas, - abstract = {Pandas is a powerful data structures for data analysis, time series, and statistics.}, annote = {Other If you use this software, please cite it as below.}, author = {{Pandas development team}}, copyright = {BSD 3-Clause "New" or "Revised" License}, - date-added = {2025-06-23 10:11:15 -0400}, - date-modified = {2025-06-23 10:27:09 -0400}, doi = {10.5281/ZENODO.3509134}, keywords = {data science, python}, month = sep, @@ -280,11 +228,8 @@ @misc{pandas bdsk-url-2 = {https://doi.org/10.5281/ZENODO.3509134}} @article{Harris_array_2020, - abstract = {Array programming provides a powerful, compact and expressive syntax for accessing, manipulating and operating on data in vectors, matrices and higher-dimensional arrays. NumPy is the primary array programming library for the Python language. It has an essential role in research analysis pipelines in fields as diverse as physics, chemistry, astronomy, geoscience, biology, psychology, materials science, engineering, finance and economics. For example, in astronomy, NumPy was an important part of the software stack used in the discovery of gravitational waves1 and in the first imaging of a black hole2. Here we review how a few fundamental array concepts lead to a simple and powerful programming paradigm for organizing, exploring and analysing scientific data. NumPy is the foundation upon which the scientific Python ecosystem is constructed. It is so pervasive that several projects, targeting audiences with specialized needs, have developed their own NumPy-like interfaces and array objects. Owing to its central position in the ecosystem, NumPy increasingly acts as an interoperability layer between such array computation libraries and, together with its application programming interface (API), provides a flexible framework to support the next decade of scientific and industrial analysis.}, author = {Harris, Charles R. and Millman, K. Jarrod and van der Walt, St{\'e}fan J. and Gommers, Ralf and Virtanen, Pauli and Cournapeau, David and Wieser, Eric and Taylor, Julian and Berg, Sebastian and Smith, Nathaniel J. and Kern, Robert and Picus, Matti and Hoyer, Stephan and van Kerkwijk, Marten H. and Brett, Matthew and Haldane, Allan and del R{\'\i}o, Jaime Fern{\'a}ndez and Wiebe, Mark and Peterson, Pearu and G{\'e}rard-Marchant, Pierre and Sheppard, Kevin and Reddy, Tyler and Weckesser, Warren and Abbasi, Hameer and Gohlke, Christoph and Oliphant, Travis E.}, copyright = {2020 The Author(s)}, - date-added = {2025-06-23 10:10:22 -0400}, - date-modified = {2025-06-23 10:36:09 -0400}, doi = {10.1038/s41586-020-2649-2}, issn = {1476-4687}, journal = {Nature}, @@ -304,8 +249,6 @@ @article{Harris_array_2020 @article{Rose_GEO_OSE_2023, author = {Rose, Brian E. J. and Clyne, John and May, Ryan and Munroe, James and Snyder, Amelia and Eroglu, Orhan and Tyle, Kevin}, - date-added = {2025-06-23 10:10:11 -0400}, - date-modified = {2025-06-23 10:35:16 -0400}, doi = {10.5281/zenodo.8184298}, language = {eng}, month = jul, @@ -320,8 +263,6 @@ @article{Rose_GEO_OSE_2023 @misc{Foundations, author = {Rose, Brian E. J. and Kent, Julia and Tyle, Kevin and Clyne, John and Banihirwe, Anderson and Camron, Drew and May, Ryan and Grover, Maxwell and Ford, Robert R. and Paul, Kevin and Morley, James and Eroglu, Orhan and Kailyn, Lily and Zacharias, Anissa}, - date-added = {2025-06-23 10:10:09 -0400}, - date-modified = {2025-06-23 10:30:13 -0400}, doi = {10.5281/zenodo.14759276}, file = {Snapshot:/Users/brianrose/Zotero/storage/EC446TLR/14759276.html:text/html}, month = jan, @@ -334,10 +275,7 @@ @misc{Foundations bdsk-url-2 = {https://doi.org/10.5281/zenodo.14759276}} @article{Abernathey_pangeo_2017, - abstract = {The Project Description from the NSF-funded Earthcube project "Pangeo: An Open Source Big Data Climate Science Platform" (NSF award 1740648)}, author = {Abernathey, Ryan and Paul, Kevin and Hamman, Joe and Rocklin, Matthew and Lepore, Chiara and Tippett, Michael and Henderson, Naomi and Seager, Richard and May, Ryan and Del Vento, Davide}, - date-added = {2025-06-23 10:10:05 -0400}, - date-modified = {2025-06-23 10:35:39 -0400}, doi = {10.6084/m9.figshare.5361094.v1}, language = {en}, month = aug,