diff --git a/paper.bib b/paper.bib new file mode 100644 index 000000000..cbdb8a909 --- /dev/null +++ b/paper.bib @@ -0,0 +1,287 @@ +@article{Matplotlib_2007, + author = {Hunter, John D.}, + doi = {10.1109/MCSE.2007.55}, + journal = {Computing in Science & Engineering}, + keywords = {Graphics;Interpolation;Equations;Graphical user interfaces;Packaging;Image generation;User interfaces;Operating systems;Computer languages;Programming profession;Python;scripting languages;application development;scientific programming}, + number = {3}, + pages = {90-95}, + title = {Matplotlib: A 2D Graphics Environment}, + volume = {9}, + year = {2007}, + bdsk-url-1 = {https://doi.org/10.1109/MCSE.2007.55}} + +@inproceedings{McKinney-2010, + author = {{W}es {M}c{K}inney}, + booktitle = {{P}roceedings of the 9th {P}ython in {S}cience {C}onference}, + doi = {10.25080/Majora-92bf1922-00a}, + editor = {{S}t\'efan van der {W}alt and {J}arrod {M}illman}, + pages = {56 - 61}, + title = {{D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython}, + year = {2010}, + bdsk-url-1 = {https://doi.org/10.25080/Majora-92bf1922-00a}} + +@inproceedings{binder-2018, + author = {{Project Jupyter} and {M}atthias {B}ussonnier and {J}essica {F}orde and {J}eremy {F}reeman and {B}rian {G}ranger and {T}im {H}ead and {C}hris {H}oldgraf and {K}yle {K}elley and {G}ladys {N}alvarte and {A}ndrew {O}sheroff and {M} {P}acer and {Y}uvi {P}anda and {F}ernando {P}erez and {B}enjamin {R}agan-{K}elley and {C}arol {W}illing}, + booktitle = {{P}roceedings of the 17th {P}ython in {S}cience {C}onference}, + doi = {10.25080/Majora-4af1f417-011}, + editor = {{F}atih {A}kici and {D}avid {L}ippa and {D}illon {N}iederhut and {M} {P}acer}, + pages = {113 - 120}, + title = {{B}inder 2.0 - {R}eproducible, interactive, sharable environments for science at scale}, + year = {2018}, + bdsk-url-1 = {https://doi.org/10.25080/Majora-4af1f417-011}} + +@inproceedings{boerner_access_2023, + address = {New York, NY, USA}, + author = {Boerner, Timothy J. and Deems, Stephen and Furlani, Thomas R. and Knuth, Shelley L. and Towns, John}, + booktitle = {Practice and {Experience} in {Advanced} {Research} {Computing} 2023: {Computing} for the {Common} {Good}}, + doi = {10.1145/3569951.3597559}, + isbn = {978-1-4503-9985-2}, + month = sep, + pages = {173--176}, + publisher = {Association for Computing Machinery}, + series = {{PEARC} '23}, + shorttitle = {{ACCESS}}, + title = {{ACCESS}: {Advancing} {Innovation}: {NSF}'s {Advanced} {Cyberinfrastructure} {Coordination} {Ecosystem}: {Services} \& {Support}}, + url = {https://dl.acm.org/doi/10.1145/3569951.3597559}, + urldate = {2025-03-08}, + year = {2023}, + bdsk-url-1 = {https://dl.acm.org/doi/10.1145/3569951.3597559}, + bdsk-url-2 = {https://doi.org/10.1145/3569951.3597559}} + +@inproceedings{hancock_jetstream2_2021, + address = {New York, NY, USA}, + author = {Hancock, David Y. and Fischer, Jeremy and Lowe, John Michael and Snapp-Childs, Winona and Pierce, Marlon and Marru, Suresh and Coulter, J. Eric and Vaughn, Matthew and Beck, Brian and Merchant, Nirav and Skidmore, Edwin and Jacobs, Gwen}, + booktitle = {Practice and {Experience} in {Advanced} {Research} {Computing} 2021: {Evolution} {Across} {All} {Dimensions}}, + doi = {10.1145/3437359.3465565}, + isbn = {978-1-4503-8292-2}, + month = jul, + pages = {1--8}, + publisher = {Association for Computing Machinery}, + series = {{PEARC} '21}, + shorttitle = {Jetstream2}, + title = {Jetstream2: {Accelerating} cloud computing via {Jetstream}}, + url = {https://dl.acm.org/doi/10.1145/3437359.3465565}, + urldate = {2025-03-08}, + year = {2021}, + bdsk-url-1 = {https://dl.acm.org/doi/10.1145/3437359.3465565}, + bdsk-url-2 = {https://doi.org/10.1145/3437359.3465565}} + +@article{cockett_continuous_2024, + author = {Cockett, Rowan and Purves, Steve and Koch, Franklin and Morrison, Mike}, + doi = {10.25080/NKVC9349}, + issn = {2575-9752}, + journal = {scipy}, + language = {en}, + month = jun, + pages = {121--136}, + title = {Continuous {Tools} for {Scientific} {Publishing}}, + url = {https://proceedings.scipy.org/articles/NKVC9349}, + urldate = {2025-02-06}, + year = {2024}, + bdsk-url-1 = {https://proceedings.scipy.org/articles/NKVC9349}, + bdsk-url-2 = {https://doi.org/10.25080/NKVC9349}} + +@misc{mystmd_2025, + author = {Rowan Cockett and Franklin Koch and Steve Purves and Angus Hollands and Yuxi Wang and Dylan Grandmont and Chris Holdgraf and Andrea and Jan-Hendrik M{\"u}ller and Spencer Lyon and Cristian Le and Jim Madge and wwx and Sugan Reden and Yuanhao Geng and Ryan Lovett and Mikkel Roald-Arb{\o}l and Matt McKay and Matthew Brett and M Bussonnier and Mridul Seth and Nicolas M. Thi{\'e}ry and Raniere Silva and Sarah Brown and Sinan Bekar and Tavin Cole and Thad Guidry and Toby Driscoll}, + copyright = {MIT License}, + doi = {10.5281/ZENODO.14805610}, + month = feb, + publisher = {Zenodo}, + shorttitle = {jupyter-book/mystmd}, + title = {mystmd v1.4.0}, + url = {https://zenodo.org/doi/10.5281/zenodo.14805610}, + urldate = {2025-03-08}, + year = {2025}, + bdsk-url-1 = {https://zenodo.org/doi/10.5281/zenodo.14805610}, + bdsk-url-2 = {https://doi.org/10.5281/ZENODO.14805610}} + +@misc{jupyterbook_2020, + author = {{Executable Books Community}}, + copyright = {Open Access}, + doi = {10.5281/ZENODO.2561065}, + keywords = {data science, jupyter, publishing, scholarship}, + month = feb, + publisher = {Zenodo}, + title = {Jupyter {Book}}, + url = {https://zenodo.org/record/2561065}, + urldate = {2025-03-08}, + year = {2020}, + bdsk-url-1 = {https://zenodo.org/record/2561065}, + bdsk-url-2 = {https://doi.org/10.5281/ZENODO.2561065}} + +@misc{atmoscol2023, + author = {Alfonso Ladino and Nicole Rivera and Max Grover}, + copyright = {Creative Commons Attribution 4.0 International}, + doi = {10.5281/ZENODO.8316796}, + month = oct, + publisher = {Zenodo}, + shorttitle = {aladinor/{Atmoscol2023}}, + title = {aladinor/{Atmoscol2023}: {Steable} version 0.1.0-{Beta}}, + url = {https://zenodo.org/doi/10.5281/zenodo.8316796}, + urldate = {2025-06-09}, + year = {2023}, + bdsk-url-1 = {https://zenodo.org/doi/10.5281/zenodo.8316796}, + bdsk-url-2 = {https://doi.org/10.5281/ZENODO.8316796}} + +@misc{ladino_erad_2024, + author = {Ladino, Alfonso and del Moral M{\'e}ndez, Anna and Javornik, Brenda and Michelson, Daniel and Wolfensberger, Daniel and Ghiggi, Gionata and DeHart, Jen and Figueras i Ventura, Jordi and Giles, Julian and M{\"u}hlbauer, Kai and Grover, Maxwell and Dixon, Mike and Jackson, Robert and Collis, Scott and Cha, Ting-Yu and contributors, ERAD2024 Open Radar Science Shortcourse}, + doi = {10.5281/zenodo.13694511}, + month = sep, + publisher = {Zenodo}, + title = {{ERAD} 2024 {Open} {Radar} {Science} {Shortcourse}}, + url = {https://zenodo.org/records/13694511}, + urldate = {2025-03-10}, + year = {2024}, + bdsk-url-1 = {https://zenodo.org/records/13694511}, + bdsk-url-2 = {https://doi.org/10.5281/zenodo.13694511}} + +@inproceedings{pimentel_2019, + author = {Pimentel, Jo{\~a}o Felipe and Murta, Leonardo and Braganholo, Vanessa and Freire, Juliana}, + booktitle = {2019 {IEEE}/{ACM} 16th {International} {Conference} on {Mining} {Software} {Repositories} ({MSR})}, + doi = {10.1109/MSR.2019.00077}, + file = {IEEE Xplore Abstract Record:/Users/brianrose/Zotero/storage/MDAB5D8U/8816763.html:text/html}, + keywords = {Best practices, github, jupyter notebook, Media, Programming, Python, reproducibility, Testing, Tools}, + month = may, + note = {ISSN: 2574-3864}, + pages = {507--517}, + title = {A {Large}-{Scale} {Study} {About} {Quality} and {Reproducibility} of {Jupyter} {Notebooks}}, + url = {https://ieeexplore.ieee.org/document/8816763}, + urldate = {2025-03-08}, + year = {2019}, + bdsk-url-1 = {https://ieeexplore.ieee.org/document/8816763}, + bdsk-url-2 = {https://doi.org/10.1109/MSR.2019.00077}} + +@article{wilkinson_fair_2016, + author = {Wilkinson, Mark D. and Dumontier, Michel and Aalbersberg, IJsbrand Jan and Appleton, Gabrielle and Axton, Myles and Baak, Arie and Blomberg, Niklas and Boiten, Jan-Willem and da Silva Santos, Luiz Bonino and Bourne, Philip E. and Bouwman, Jildau and Brookes, Anthony J. and Clark, Tim and Crosas, Merc{\`e} and Dillo, Ingrid and Dumon, Olivier and Edmunds, Scott and Evelo, Chris T. and Finkers, Richard and Gonzalez-Beltran, Alejandra and Gray, Alasdair J. G. and Groth, Paul and Goble, Carole and Grethe, Jeffrey S. and Heringa, Jaap and 't Hoen, Peter A. C. and Hooft, Rob and Kuhn, Tobias and Kok, Ruben and Kok, Joost and Lusher, Scott J. and Martone, Maryann E. and Mons, Albert and Packer, Abel L. and Persson, Bengt and Rocca-Serra, Philippe and Roos, Marco and van Schaik, Rene and Sansone, Susanna-Assunta and Schultes, Erik and Sengstag, Thierry and Slater, Ted and Strawn, George and Swertz, Morris A. and Thompson, Mark and van der Lei, Johan and van Mulligen, Erik and Velterop, Jan and Waagmeester, Andra and Wittenburg, Peter and Wolstencroft, Katherine and Zhao, Jun and Mons, Barend}, + copyright = {2016 The Author(s)}, + doi = {10.1038/sdata.2016.18}, + file = {Full Text PDF:/Users/brianrose/Zotero/storage/YX8Y3T6Q/Wilkinson et al. - 2016 - The FAIR Guiding Principles for scientific data management and stewardship.pdf:application/pdf}, + issn = {2052-4463}, + journal = {Scientific Data}, + keywords = {Publication characteristics, Research data}, + language = {en}, + month = mar, + note = {Publisher: Nature Publishing Group}, + number = {1}, + pages = {160018}, + title = {The {FAIR} {Guiding} {Principles} for scientific data management and stewardship}, + url = {https://www.nature.com/articles/sdata201618}, + urldate = {2025-02-06}, + volume = {3}, + year = {2016}, + bdsk-url-1 = {https://www.nature.com/articles/sdata201618}, + bdsk-url-2 = {https://doi.org/10.1038/sdata.2016.18}} + +@book{dask_2016, + author = {{Dask Development Team}}, + title = {Dask: {Library} for dynamic task scheduling}, + url = {https://dask.org}, + year = {2016}, + bdsk-url-1 = {http://dask.pydata.org}} + +@article{xarray_2017, + author = {Hoyer, Stephan and Hamman, Joe}, + doi = {10.5334/jors.148}, + issn = {2049-9647}, + journal = {Journal of Open Research Software}, + language = {en-US}, + month = apr, + number = {1}, + shorttitle = {xarray}, + title = {xarray: {N}-{D} labeled {Arrays} and {Datasets} in {Python}}, + url = {https://openresearchsoftware.metajnl.com/articles/10.5334/jors.148}, + urldate = {2025-03-08}, + volume = {5}, + year = {2017}, + bdsk-url-1 = {https://openresearchsoftware.metajnl.com/articles/10.5334/jors.148}, + bdsk-url-2 = {https://doi.org/10.5334/jors.148}} + +@misc{cartopy_2024, + author = {Phil Elson and Elliott Sales de Andrade and Greg Lucas and Ryan May and Richard Hattersley and Ed Campbell and Ruth Comer and Andrew Dawson and Bill Little and Stephane Raynaud and scmc72 and Alan D. Snow and lgolston and Byron Blay and Peter Killick and lbdreyer and Patrick Peglar and Nat Wilson and Andrew and Jon Szymaniak and Adrien Berchet and Corinne Bosley and Luke Davis and Filipe and John Krasting and Matthew Bradbury and stephenworsley and Daniel Kirkham}, + copyright = {Creative Commons Attribution 4.0 International}, + doi = {10.5281/ZENODO.1182735}, + month = oct, + publisher = {Zenodo}, + shorttitle = {{SciTools}/cartopy}, + title = {cartopy v0.24.1}, + url = {https://zenodo.org/doi/10.5281/zenodo.1182735}, + urldate = {2025-03-08}, + year = {2024}, + bdsk-url-1 = {https://zenodo.org/doi/10.5281/zenodo.1182735}, + bdsk-url-2 = {https://doi.org/10.5281/ZENODO.1182735}} + +@misc{pandas, + annote = {Other +If you use this software, please cite it as below.}, + author = {{Pandas development team}}, + copyright = {BSD 3-Clause "New" or "Revised" License}, + doi = {10.5281/ZENODO.3509134}, + keywords = {data science, python}, + month = sep, + publisher = {Zenodo}, + shorttitle = {pandas-dev/pandas}, + title = {pandas-dev/pandas: {Pandas}}, + url = {https://zenodo.org/doi/10.5281/zenodo.3509134}, + urldate = {2025-03-08}, + year = {2024}, + bdsk-url-1 = {https://zenodo.org/doi/10.5281/zenodo.3509134}, + bdsk-url-2 = {https://doi.org/10.5281/ZENODO.3509134}} + +@article{Harris_array_2020, + author = {Harris, Charles R. and Millman, K. Jarrod and van der Walt, St{\'e}fan J. and Gommers, Ralf and Virtanen, Pauli and Cournapeau, David and Wieser, Eric and Taylor, Julian and Berg, Sebastian and Smith, Nathaniel J. and Kern, Robert and Picus, Matti and Hoyer, Stephan and van Kerkwijk, Marten H. and Brett, Matthew and Haldane, Allan and del R{\'\i}o, Jaime Fern{\'a}ndez and Wiebe, Mark and Peterson, Pearu and G{\'e}rard-Marchant, Pierre and Sheppard, Kevin and Reddy, Tyler and Weckesser, Warren and Abbasi, Hameer and Gohlke, Christoph and Oliphant, Travis E.}, + copyright = {2020 The Author(s)}, + doi = {10.1038/s41586-020-2649-2}, + issn = {1476-4687}, + journal = {Nature}, + keywords = {Computational neuroscience, Computational science, Computer science, Software, Solar physics}, + language = {en}, + month = sep, + note = {Publisher: Nature Publishing Group}, + number = {7825}, + pages = {357--362}, + title = {Array programming with {NumPy}}, + url = {https://www.nature.com/articles/s41586-020-2649-2}, + urldate = {2025-03-08}, + volume = {585}, + year = {2020}, + bdsk-url-1 = {https://www.nature.com/articles/s41586-020-2649-2}, + bdsk-url-2 = {https://doi.org/10.1038/s41586-020-2649-2}} + +@article{Rose_GEO_OSE_2023, + author = {Rose, Brian E. J. and Clyne, John and May, Ryan and Munroe, James and Snyder, Amelia and Eroglu, Orhan and Tyle, Kevin}, + doi = {10.5281/zenodo.8184298}, + language = {eng}, + month = jul, + note = {Publisher: Zenodo}, + shorttitle = {Project Pythia GEO OSE proposal}, + title = {Collaborative {Research}: {GEO} {OSE} {TRACK} 2: {Project} {Pythia} and {Pangeo}: {Building} an inclusive geoscience community through accessible, reusable, and reproducible workflows}, + url = {https://zenodo.org/records/8184298}, + urldate = {2025-02-06}, + year = {2023}, + bdsk-url-1 = {https://zenodo.org/records/8184298}, + bdsk-url-2 = {https://doi.org/10.5281/zenodo.8184298}} + +@misc{Foundations, + author = {Rose, Brian E. J. and Kent, Julia and Tyle, Kevin and Clyne, John and Banihirwe, Anderson and Camron, Drew and May, Ryan and Grover, Maxwell and Ford, Robert R. and Paul, Kevin and Morley, James and Eroglu, Orhan and Kailyn, Lily and Zacharias, Anissa}, + doi = {10.5281/zenodo.14759276}, + file = {Snapshot:/Users/brianrose/Zotero/storage/EC446TLR/14759276.html:text/html}, + month = jan, + publisher = {Zenodo}, + title = {Pythia {Foundations}}, + url = {https://zenodo.org/records/14759276}, + urldate = {2025-02-06}, + year = {2025}, + bdsk-url-1 = {https://zenodo.org/records/14759276}, + bdsk-url-2 = {https://doi.org/10.5281/zenodo.14759276}} + +@article{Abernathey_pangeo_2017, + author = {Abernathey, Ryan and Paul, Kevin and Hamman, Joe and Rocklin, Matthew and Lepore, Chiara and Tippett, Michael and Henderson, Naomi and Seager, Richard and May, Ryan and Del Vento, Davide}, + doi = {10.6084/m9.figshare.5361094.v1}, + language = {en}, + month = aug, + title = {Pangeo {NSF} {Earthcube} {Proposal}}, + url = {https://figshare.com/articles/journal_contribution/Pangeo_NSF_Earthcube_Proposal/5361094/1}, + urldate = {2025-03-08}, + year = {2017}, + bdsk-url-1 = {https://figshare.com/articles/journal_contribution/Pangeo_NSF_Earthcube_Proposal/5361094/1}, + bdsk-url-2 = {https://doi.org/10.6084/m9.figshare.5361094.v1}} diff --git a/paper.md b/paper.md new file mode 100644 index 000000000..93ae88ead --- /dev/null +++ b/paper.md @@ -0,0 +1,112 @@ +--- +title: 'Pythia Foundations: A community learning resource for Python-based computing in the geosciences' +tags: + - Python + - Geoscience + - Jupyter + - GitHub + - Numpy + - Matplotlib + - Cartopy + - Datetime + - Pandas + - Netcdf + - Xarray + - Dask +authors: + - name: Brian E. J. Rose + orcid: 0000-0002-9961-3821 + affiliation: 1 # (Multiple affiliations must be quoted) + - name: Robert R. Ford + orcid: 0000-0001-5483-4965 + affiliation: 1 + - name: Anderson Banihirwe + orcid: 0000-0001-6583-571X + affiliation: 2 + - name: M. Drew Camron + orcid: 0000-0001-7246-6502 + affiliation: 3 + - name: John Clyne + orcid: 0000-0003-2788-9017 + affiliation: 4 + - name: Orhan Eroglu + orcid: 0000-0003-3099-8775 + affiliation: 4 + - name: Katelyn FitzGerald + orcid: 0000-0003-4184-1917 + affiliation: 4 + - name: Maxwell A. Grover + orcid: 0000-0002-0370-8974 + affiliation: 5 + - name: Julia Kent + orcid: 0000-0002-5611-8986 + affiliation: 4 + - name: Ryan May + orcid: 0000-0003-2907-038X + affiliation: 3 + - name: Kevin Paul + orcid: 0000-0001-8155-8038 + affiliation: 6 + - name: Kevin R. Tyle + orcid: 0000-0001-5249-9665 + affiliation: 1 + - name: Anissa Zacharias + orcid: 0000-0002-2666-8493 + affiliation: 4 + - name: Author Without ORCID + affiliation: 2 +affiliations: + - name: Department of Atmospheric and Environmental Sciences, University at Albany (State University of New York) + index: 1 + - name: CarbonPlan + index: 2 + - name: NSF Unidata, University Corporation for Atmospheric Research + index: 3 + - name: Computational Information Systems Lab., NSF National Center for Atmospheric Research + index: 4 + - name: Environmental Science Division, Argonne National Laboratory + index: 5 + - name: NVIDIA Corporation + index: 6 +date: 26 June 2025 +bibliography: paper.bib +--- + +# Summary + +Pythia Foundation [@Foundations] is the flagship product of the first phase of Project Pythia [@Rose_GEO_OSE_2023], a broad community effort to build, house, share, and maintain high-quality learning resources for Python-based computing in the geosciences. Project Pythia’s central mission is to accelerate progress across the geosciences by reducing roadblocks to sharing technical knowledge, particularly related to scalable and reproducible data analysis in the cloud using the open-source Python software ecosystem. + +Pythia Foundations is a geoscience-flavored introduction to essential tools in the scientific Python ecosystem and Pangeo [@Abernathey_pangeo_2017] stack (e.g., JupyterLab, NumPy [@Harris_array_2020], Matplotlib [@Matplotlib_2007], Pandas [@McKinney-2010], [@pandas], Cartopy [@cartopy_2024], Xarray [@xarray_2017], Dask [@dask_2016]), plus environment management tools (conda), basics of version control (git), and effective use of GitHub as an technical communication platform (Figure 1). It is a community-owned executable textbook backed by computational resources for automated health-checking and interactive use. It covers the foundational knowledge that is needed to get started with Python in the computational geosciences, as well as to become an effective citizen-practitioner in key open geoscience software ecosystems. It is intended for anyone from undergraduate students through established geoscientists who are relatively new to working in Python. The book assumes a basic knowledge of programming concepts, but a brief "Quickstart" lesson highlights distinctive features of Python for users migrating from other languages. + +A distinguishing feature of Pythia Foundations is its rigorous quality control and maintenance. All Python code and external web links are tested nightly, and book contents are kept up to date as the software ecosystem and data sources evolve. Users can run the examples with a “one click” launch into a dedicated cloud-based Binder service [@binder-2018]. + +![Figure 1: Pythia Foundations infographic](images/ProjectPythia_Foundations_Infographic_v4.jpg) + +> Figure 1: A schematic of the content and organization of Pythia Foundations. The book is divided into two major sections, "Foundational Skills" and "Core Scientific Python Packages", limited to those currently in broad use across multiple geoscience disciplines. Pythia Cookbooks house more advanced or domain-specific content that reference individual chapters from Foundations as prerequisites. + +# Statement of Need +Today’s geoscientists require not only domain expertise but also proficiency with specialized software and high-level technical skills to effectively analyze, manipulate, and manage potentially vast volumes of digital data in a complex and ever-changing computing environment. The scientific Python ecosystem and the emergence of cloud computing have been game-changers for many, providing an abundance of open-source tools with wide ranging functionality. Ironically, however, this abundance is often untapped, and can be a source of great frustration. Scientists spend an inordinate amount of time pondering questions such as: Which tool or technology should I use? How do I use it? Can I trust it? Is it compatible with other tools in my workflow? Often, the answers are unclear, due to inadequate documentation or difficulty in finding relevant up-to-date working examples. The result is too much time spent navigating or avoiding technology—time that could have been spent productively doing science. Pythia Foundations fills this need by providing a trusted community-owned, web-accessible, geoscience-specific education and training resource for scientists and students at all career stages who want to know what tools to use and how to use them to explore their data. + +The Foundations book embodies the FAIR principles [@wilkinson_fair_2016] that play a central role in open science. Findability is served by gathering geoscience-specific tutorials into a high-visibility community archive. Accessibility is served by our automated CI testing and integrated public binder. Tutorials and example code are largely Interoperable due to reliance on a common ecosystem of tools (e.g., NumPy and Xarray). Reusability is addressed through permissive licensing of book content and geoscience relevance of the examples, as well as our commitment to maintaining up-to-date working examples—an essential need in light of the widespread problem of rapid obsolescence of computational notebooks [@pimentel_2019]. + +# Content, instructional design, and usage +The scope of Pythia Foundations is limited to tools and packages that are currently in broad use across multiple geoscience disciplines; packages tailored to more narrow scientific domains are not covered in Foundations but may be suitable for a Cookbook. The book outline was designed collaboratively by the core author team, informed by community feedback, and drawing on our substantial collective experience in teaching Python-based scientific workflows in classrooms, workshops, and outreach events. + +The book is organized into two main sections: Foundational skills and Core Scientific Python packages (Figure 1). The foundational skills section covers “getting started” skills such as how to install Python and manage environments and how to run Python code in JupyterLab. There is also a set of tutorials on the use of GitHub and git for version control and collaboration on open source projects. The scope of this section was chosen with the specific goal of enabling users to contribute back to Pythia Foundations. + +A template notebook and contribution guide is provided for new content, encouraging consistency of style and organization. Each chapter includes explicit prerequisites, references, and estimated learning time. The book is intended primarily for self-study and reference, backed by the interactive Binder or deployed on user machines following the detailed guidance in the book. From web-based metrics, Pythia Foundations served roughly 29,000 users in 111 countries during calendar year 2024. + +Subsets of the book contents have been modified and repackaged for various workshops and short courses. A few examples include the 2022 EarthCube-AMGeO Hackathon, the ERAD 2024 Open Radar Science Shortcourse [@ladino_erad_2024], the Climatematch Academy international virtual summer school (annually since 2023), and in Spanish-language translation for a Colombian hydrometeorological workshop in 2023 [@atmoscol2023]. Co-authors Rose and Tyle have integrated material from Foundations into the formal curriculum for several semester-length undergraduate and graduate level courses at the University at Albany. + +# Computational infrastructure +The book is deployed as an easy-to-navigate website using JupyterBook [@jupyterbook_2020] and MyST-MD [@mystmd_2025], including “one-click” Binder links to interactive versions of every chapter. It features complete reproducibility: source materials are stored in a GitHub repository as unexecuted Jupyter notebooks, and all content is recreated in a bespoke computational environment during nightly builds and whenever the book pages are re-rendered. A full preview of the executed and rendered book is created whenever a change is proposed via a Pull Request. Development of the novel notebook publishing infrastructure enabling this full reproducibility was driven by the Pythia team’s need to collaborate on a large computational document. The build-and-preview automation that our team developed while authoring Foundations is now in wide use by the community of Cookbook creators. The automation notably includes the ability to route notebook execution through the same Binder environment offered to users, guaranteeing that the output of the automated builds are identical to those that users see when running code examples interactively. + +# Future plans +Pythia Foundations is a living document and is receiving continuous updates [@cockett_continuous_2024] and improvements, both from the core author team and the broader community of user-contributors. On the content side, Project Pythia is simultaneously fostering a growing collection of more advanced and domain-specific tutorials in our crowd-sourced community Cookbook gallery, with explicit links to prerequisites from Foundations. We anticipate periodic reviews of the Cookbook collection to identify cross-cutting content that should be abstracted back to Foundations, e.g., common data access patterns or analysis workflows. + +The computational and publishing infrastructure for Foundations is also continuously evolving. As of this writing, Foundations and all other Pythia content has just undergone a significant refresh and upgrade with the migration to JupyterBook 2 which is based on the MyST-MD publishing engine [@mystmd_2025]. Among the compelling new functionality unlocked by this transition is a rich content cross-referencing and embedding model that will enable more modular reuse and repacking of Foundations content tailored to specific courses or audiences. + +# Acknowledgements +The authors gratefully acknowledge support from the broad open geoscience communities of Project Pythia and Pangeo for their feedback, suggestions, pull requests, and enthusiasm. Development and maintenance of Pythia Foundations was supported by the U.S. National Science Foundation (NSF) awards 2026899, 2026863, 2324302, 2324303 and 2324304. The Pythia BinderHub is deployed on Jetstream2 [@hancock_jetstream2_2021] at Indiana University through allocations EES230007 and SEE240014 from the Advanced Cyberinfrastructure Coordination Ecosystem: Services & Support (ACCESS) program [@boerner_access_2023], which is supported by NSF grants 2138259, 2138286, 2138307, 2137603, and 2138296. + +# References