diff --git a/.gitignore b/.gitignore index 0a19790..64ba84b 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,4 @@ cython_debug/ # PyPI configuration file .pypirc +.Rproj.user diff --git a/README.md b/README.md index 2fc6584..2fce31f 100644 --- a/README.md +++ b/README.md @@ -1,73 +1,91 @@ # gen3-metadata User friendly tools for downloading and manipulating gen3 metadata - -## 1. Set up python venv -```bash -python3 -m venv venv -source venv/bin/activate -pip install -r requirements.txt -``` - -## 2. Create config file +## Python Installation ```bash -echo credentials_path=\"/path/to/credentials.json\" > .env +git clone https://github.com/AustralianBioCommons/gen3-metadata.git +bash build.sh ``` -## 3. Load library -```bash -pip install -e . -``` - - -## 4. Usage Example +## Usage Example +- Notebook can be found in the `example_notebook.ipynb` file +- Make sure to select .venv as the kernel in the notebook ```python -import os -from gen3_metadata.parser import Gen3MetadataParser - -# Set up credentials path -key_file = os.getenv('credentials_path') +from gen3_metadata.gen3_metadata_parser import Gen3MetadataParser -# Initialize the Gen3MetadataParser +# Initialise +key_file = "path/to/credentials.json" gen3metadata = Gen3MetadataParser(key_file) -# authenticate +# Authenticate gen3metadata.authenticate() -# Fetch data for different categories -gen3metadata.fetch_data("program1", "AusDiab_Simulated", "subject") -gen3metadata.fetch_data("program1", "AusDiab_Simulated", "demographic") -gen3metadata.fetch_data("program1", "AusDiab_Simulated", "medical_history") +# Fetching data and returning as dataframe +program_name = "program1" +project_code = "project1" +node_label="medical_history" +pd_data = gen3metadata.fetch_data_pd(program_name, project_code, node_label=node_label) +pd_data + +# Fetching data and returning as json +json_data = gen3metadata.fetch_data_json(program_name, project_code, node_label=node_label) +json_data +``` -# Convert fetched data to a pandas DataFrame -gen3metadata.data_to_pd() -# Print the keys of the data sets that have been fetched -print(gen3metadata.data_store.keys()) +## Running Tests -# Return a json of one of the datasets -gen3metadata.data_store["program1/AusDiab_Simulated/subject"] +The tests are written using the `pytest` framework. -# Return the pandas dataframe of one of the datasets -gen3metadata.data_store_pd["program1/AusDiab_Simulated/subject"] +```bash +pytest -vv tests/ ``` -The fetched data is stored in a dictionary within the `Gen3MetadataParser` instance. -Each category of data fetched is stored as a key-value pair in this dictionary, -where the key is the category name and the value is the corresponding data. -This allows for easy access and manipulation of the data after it has been fetched. +--- +# Installation of the R version of gen3-metadata +You can install the gen3metadata R tool from +[GitHub](https://github.com/) with: +``` r +if (!require("devtools")) install.packages("devtools") +devtools::install_github("AustralianBioCommons/gen3-metadata", subdir = "gen3metadata-R") +``` -## 5. Running Tests +The package depends on several other packages, which should hopefully be installed automatically. +If case this doesn't happen, run: +``` r +install.packages(c("httr", "jsonlite", "jose", "glue")) +``` -The tests are written using the `pytest` framework. +Then all you need to do is load the package. -```bash -pytest tests/ +``` r +library("gen3metadata") ``` +## Usage Example +This is a basic example to authenticate and load some data. +``` r +# Load the library +library("gen3metadata") + +# Set the path to the credentials file +key_file_path <- "path/to/credentials.json" + +# Create the Gen3 Metadata Parser object +gen3 <- Gen3MetadataParser(key_file_path) + +# Authenticate the object +gen3 <- authenticate(gen3) + +# Load some data +dat <- fetch_data(gen3, + program_name = "program1", + project_code = "AusDiab", + node_label = "subject") +``` diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..f0169bb --- /dev/null +++ b/build.sh @@ -0,0 +1,7 @@ + +#!/bin/bash +python -m venv .venv +source venv/bin/activate +pip install --upgrade pip +pip install -r requirements.txt +pip install -e . \ No newline at end of file diff --git a/example_notebook.ipynb b/example_notebook.ipynb new file mode 100644 index 0000000..c13225f --- /dev/null +++ b/example_notebook.ipynb @@ -0,0 +1,86 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example Workflow\n", + "- Make sure to run `bash build.sh` or follow the instructions in the README.md to build the package\n", + "- Make sure to select .venv as the kernel in the notebook" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from gen3_metadata.gen3_metadata_parser import Gen3MetadataParser" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialise\n", + "key_file = \"path/to/credentials.json\"\n", + "gen3metadata = Gen3MetadataParser(key_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Authenticate\n", + "gen3metadata.authenticate()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# fetching data and returning as dataframe\n", + "program_name= \"program1\"\n", + "project_code= \"AusDiab_Simulated\"\n", + "gen3metadata.fetch_data_pd(program_name, project_code, node_label= \"medical_history\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# fetching data and returning as json\n", + "gen3metadata.fetch_data_json(program_name, project_code, node_label= \"medical_history\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/gen3metadata-R/DESCRIPTION b/gen3metadata-R/DESCRIPTION new file mode 100644 index 0000000..22f3554 --- /dev/null +++ b/gen3metadata-R/DESCRIPTION @@ -0,0 +1,24 @@ +Package: gen3metadata +Type: Package +Title: Gen3 metadata tools +Version: 0.1.0 +Author: Corey Giles [aut, cre] +Authors@R: c(person("Corey", "Giles", role = c("aut", "cre"), + email = "Corey.Giles@Baker.edu.au", + comment = c(ORCID = "0000-0002-6050-1259"))) +Maintainer: Corey Giles +Description: User friendly tools for downloading and manipulating gen3 metadata +License: GPL-3 +Encoding: UTF-8 +LazyData: true +Language: en-AU +RoxygenNote: 7.3.2 +Imports: + httr, + jsonlite, + jose, + glue +Suggests: + testthat (>= 3.0.0), + webmockr +Config/testthat/edition: 3 diff --git a/gen3metadata-R/NAMESPACE b/gen3metadata-R/NAMESPACE new file mode 100644 index 0000000..555b84b --- /dev/null +++ b/gen3metadata-R/NAMESPACE @@ -0,0 +1,16 @@ +# Generated by roxygen2: do not edit by hand + +S3method(authenticate,gen3_metadata) +S3method(fetch_data,gen3_metadata) +S3method(print,gen3_metadata) +export(Gen3MetadataParser) +export(authenticate) +export(fetch_data) +importFrom(glue,glue) +importFrom(httr,GET) +importFrom(httr,POST) +importFrom(httr,add_headers) +importFrom(httr,content) +importFrom(httr,http_error) +importFrom(jose,jwt_split) +importFrom(jsonlite,fromJSON) diff --git a/gen3metadata-R/R/gen3_metadata.R b/gen3metadata-R/R/gen3_metadata.R new file mode 100644 index 0000000..f46e044 --- /dev/null +++ b/gen3metadata-R/R/gen3_metadata.R @@ -0,0 +1,40 @@ +#' Create a Gen3 metadata parser object +#' +#' This function creates a new Gen3 metadata parser object by loading +#' credentials from a key file. Use this object to interact with the Gen3 API. +#' +#' @param key_file Character string path to the JSON key file containing Gen3 credentials +#' +#' @return A gen3_metadata object with credentials and base URL configured +#' +#' @export +Gen3MetadataParser <- function(key_file) { + + # Create the object to store data + obj <- list( + key_file = key_file, + base_url = "", + credentials = list( + api_key = "", + key_id = "" + ), + header = NULL + ) + + # Load the key file + creds <- load_key_file(key_file) + + # Set the credentials in the object + obj$credentials$api_key <- creds$api_key + obj$credentials$key_id <- creds$key_id + + # Get the base URL from the API key + obj$base_url <- get_base_url(obj$credentials$api_key) + + # Set the class of the object + class(obj) <- "gen3_metadata" + + # Return the object + return(obj) + +} \ No newline at end of file diff --git a/gen3metadata-R/R/generics.R b/gen3metadata-R/R/generics.R new file mode 100644 index 0000000..8502803 --- /dev/null +++ b/gen3metadata-R/R/generics.R @@ -0,0 +1,31 @@ +#' Authenticate with Gen3 API +#' +#' Generic function to authenticate a gen3_metadata object with the Gen3 API +#' and obtain an access token for subsequent requests. +#' +#' @param gen3_metadata A gen3_metadata object +#' +#' @return The authenticated gen3_metadata object (invisibly) +#' +#' @export +authenticate <- function(gen3_metadata) { + UseMethod("authenticate") +} + +#' Fetch data from Gen3 API +#' +#' Generic function to fetch data from a specific node in the Gen3 submission API +#' for a given program and project. +#' +#' @param gen3_metadata An authenticated gen3_metadata object +#' @param program_name Character string name of the program +#' @param project_code Character string code of the project +#' @param node_label Character string label of the node to fetch data from +#' @param api_version Character string API version (default: "v0") +#' +#' @return Data frame containing the fetched data +#' +#' @export +fetch_data <- function(gen3_metadata, program_name, project_code, node_label, api_version) { + UseMethod("fetch_data") +} diff --git a/gen3metadata-R/R/get_base_url.R b/gen3metadata-R/R/get_base_url.R new file mode 100644 index 0000000..2ad0b34 --- /dev/null +++ b/gen3metadata-R/R/get_base_url.R @@ -0,0 +1,31 @@ +#' Extract base URL from Gen3 API key +#' +#' This function extracts the base URL from a Gen3 API key JWT token +#' by parsing the 'iss' (issuer) field from the payload. +#' +#' @param api_key Character string containing the Gen3 API key (JWT token) +#' +#' @return Character string containing the base URL +#' +#' @importFrom jose jwt_split +get_base_url <- function(api_key) { + + # Check if the API key is provided + if (is.null(api_key) || api_key == "") { + stop("API key must be provided.") + } + + # Extract the payload from the JWT + payload <- jose::jwt_split(api_key)$payload + + # Validate the payload + if (!"iss" %in% names(payload)) { + stop("The JWT payload must contain 'iss'.") + } + + # Extract the base URL from the payload + base_url <- sub("/user$", "", payload$iss) + + # Return the base url + return(base_url) +} \ No newline at end of file diff --git a/gen3metadata-R/R/load_key_file.R b/gen3metadata-R/R/load_key_file.R new file mode 100644 index 0000000..8b342fc --- /dev/null +++ b/gen3metadata-R/R/load_key_file.R @@ -0,0 +1,27 @@ +#' Load Gen3 credentials from key file +#' +#' This function reads a JSON key file containing Gen3 API credentials. +#' +#' @param key_file Character string path to the JSON key file +#' +#' @return List containing 'api_key' and 'key_id' credentials +#' +#' @importFrom jsonlite fromJSON +load_key_file <- function(key_file) { + + # Check if the key file exists + if (!file.exists(key_file)) { + stop("Key file does not exist: ", key_file) + } + + # Read the JSON file + creds <- jsonlite::fromJSON(key_file) + + # Validate the contents of the key file + if (!"api_key" %in% names(creds) || !"key_id" %in% names(creds)) { + stop("Key file must contain 'api_key' and 'key_id'.") + } + + # Return the credentials as a list + return(creds) +} \ No newline at end of file diff --git a/gen3metadata-R/R/methods-gen3_metadata.R b/gen3metadata-R/R/methods-gen3_metadata.R new file mode 100644 index 0000000..79c84d7 --- /dev/null +++ b/gen3metadata-R/R/methods-gen3_metadata.R @@ -0,0 +1,146 @@ +#' Authenticate gen3_metadata object with Gen3 API +#' +#' This method authenticates a gen3_metadata object by sending a POST request +#' to the Gen3 API with the provided credentials to obtain an access token. +#' +#' @param gen3_metadata A gen3_metadata object containing credentials +#' +#' @return The authenticated gen3_metadata object with token and headers set (invisibly) +#' +#' @method authenticate gen3_metadata +#' @rdname authenticate +#' @importFrom httr POST http_error content add_headers +#' @export +authenticate.gen3_metadata <- function(gen3_metadata) { + + # Check that the credentials are provided + if (is.null(gen3_metadata$credentials) || gen3_metadata$credentials$api_key == "") { + stop("Credentials must be provided to authenticate gen3_metadata.") + } + + # Send a POST request to the Gen3 API to get an access token + res <- httr::POST( + url = paste0(gen3_metadata$base_url, "/user/credentials/cdis/access_token"), + body = gen3_metadata$credentials, + encode = "json" + ) + + # Check for errors in the response + if (httr::http_error(res)) { + stop("Failed to authenticate with the Gen3 API. Please check your credentials.") + } + + # Get the content from the response + content <- httr::content(res, as = "parsed", type = "application/json") + + # Check if the access token is present in the response + if (is.null(content$access_token)) { + stop("Authentication failed: access token not found in the response.") + } + + # Extract the access token + gen3_metadata$token <- content$access_token + + # Set the headers for future requests + gen3_metadata$headers <- httr::add_headers( + Authorization = paste("Bearer", gen3_metadata$token) + ) + + # Return the updated gen3_metadata object + return(invisible(gen3_metadata)) +} + + +#' Fetch data from Gen3 submission API +#' +#' This method fetches data from a specific node in the Gen3 API +#' for a given program and project. The object must be authenticated first. +#' +#' @param gen3_metadata An authenticated gen3_metadata object +#' @param program_name Character string name of the program +#' @param project_code Character string code of the project +#' @param node_label Character string label of the node to fetch data from +#' @param api_version Character string API version (default: "v0") +#' +#' @return Data frame containing the fetched data from the specified node +#' +#' @method fetch_data gen3_metadata +#' @rdname fetch_data +#' @importFrom glue glue +#' @importFrom httr GET http_error content +#' @importFrom jsonlite fromJSON +#' @export +fetch_data.gen3_metadata <- function(gen3_metadata, + program_name, + project_code, + node_label, + api_version = "v0") { + + # Check that the gen3_metadata is authenticated + if (is.null(gen3_metadata$headers)) { + stop("Gen3 metadata object is not authenticated. Please authenticate first.") + } + + # Construct the URL for the API request + url <- glue::glue( + "{gen3_metadata$base_url}/api/{api_version}/submission", + "/{program_name}/{project_code}/export/" + ) + + # Make the GET request to the Gen3 API + res <- httr::GET( + url, + gen3_metadata$headers, + query = list(node_label = node_label, format = "json") + ) + + # Check for errors in the response + if (httr::http_error(res)) { + stop("Failed to fetch data from the Gen3 API. Please check your parameters and authentication.") + } + + # Extract the content from the response + content <- httr::content(res, as = "text", encoding = "UTF-8") + + # Parse the JSON content and return the data + data <- jsonlite::fromJSON(content, flatten = TRUE)$data + + # Return the fetched data + return(data) +} + + +#' Print gen3_metadata object summary +#' +#' This method prints a formatted summary of a gen3_metadata object. +#' +#' @param gen3_metadata A gen3_metadata object +#' +#' @return The gen3_metadata object (invisibly) +#' +#' @method print gen3_metadata +#' @rdname print +#' @export +print.gen3_metadata <- function(gen3_metadata) { + + # Print basic information about the gen3_metadata object + cat("Gen3 Metadata Parser\n") + cat("====================\n") + + # Display base URL + cat("Base URL:", gen3_metadata$base_url, "\n") + + # Display authentication status + if (!is.null(gen3_metadata$headers)) { + cat("Authentication: Authenticated\n") + } else { + cat("Authentication: Not authenticated\n") + } + + # Display key file path + cat("\n") + + # Return invisibly + return(invisible(gen3_metadata)) +} + diff --git a/gen3metadata-R/README.md b/gen3metadata-R/README.md new file mode 100644 index 0000000..a1063c2 --- /dev/null +++ b/gen3metadata-R/README.md @@ -0,0 +1,62 @@ +

+gen3metadata R tool +

+ +

+User friendly tools for downloading and manipulating gen3 metadata +

+ +
+ +R +Australian BioCommons +Baker Institute +
+ +

+Love our work? Visit our WebPortal. +

+ +
+ + +## Installation + +You can install the gen3metadata R tool from +[GitHub](https://github.com/) with: + +``` r +if (!require("devtools")) install.packages("devtools") +devtools::install_github("AustralianBioCommons/gen3-metadata", subdir = "gen3metadata-R") +``` + +The package depends on several other packages, which should hopefully be installed automatically. +If case this doesn't happen, run: +``` r +install.packages(c("httr", "jsonlite", "jose", "glue")) +``` + +Then all you need to do is load the package. + +``` r +library("gen3metadata") +``` + +## Example + +This is a basic example to authenticate and load some data. +You will need a credential file (stored in `key_file_path` in this example). + +``` r +# Create the Gen3 Metadata Parser object +gen3 <- Gen3MetadataParser(key_file_path) + +# Authenticate the object +gen3 <- authenticate(gen3) + +# Load some data +dat <- fetch_data(gen3, + program_name = "program1", + project_code = "AusDiab", + node_label = "subject") +``` diff --git a/gen3metadata-R/gen3-metadata-R.Rproj b/gen3metadata-R/gen3-metadata-R.Rproj new file mode 100644 index 0000000..9241cc8 --- /dev/null +++ b/gen3metadata-R/gen3-metadata-R.Rproj @@ -0,0 +1,16 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 4 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes diff --git a/gen3metadata-R/man/Gen3MetadataParser.Rd b/gen3metadata-R/man/Gen3MetadataParser.Rd new file mode 100644 index 0000000..0cd9f98 --- /dev/null +++ b/gen3metadata-R/man/Gen3MetadataParser.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gen3_metadata.R +\name{Gen3MetadataParser} +\alias{Gen3MetadataParser} +\title{Create a Gen3 metadata parser object} +\usage{ +Gen3MetadataParser(key_file) +} +\arguments{ +\item{key_file}{Character string path to the JSON key file containing Gen3 credentials} +} +\value{ +A gen3_metadata object with credentials and base URL configured +} +\description{ +This function creates a new Gen3 metadata parser object by loading +credentials from a key file. Use this object to interact with the Gen3 API. +} diff --git a/gen3metadata-R/man/authenticate.Rd b/gen3metadata-R/man/authenticate.Rd new file mode 100644 index 0000000..5c01fdd --- /dev/null +++ b/gen3metadata-R/man/authenticate.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics.R, R/methods-gen3_metadata.R +\name{authenticate} +\alias{authenticate} +\alias{authenticate.gen3_metadata} +\title{Authenticate with Gen3 API} +\usage{ +authenticate(gen3_metadata) + +\method{authenticate}{gen3_metadata}(gen3_metadata) +} +\arguments{ +\item{gen3_metadata}{A gen3_metadata object containing credentials} +} +\value{ +The authenticated gen3_metadata object (invisibly) + +The authenticated gen3_metadata object with token and headers set (invisibly) +} +\description{ +Generic function to authenticate a gen3_metadata object with the Gen3 API +and obtain an access token for subsequent requests. + +This method authenticates a gen3_metadata object by sending a POST request +to the Gen3 API with the provided credentials to obtain an access token. +} diff --git a/gen3metadata-R/man/fetch_data.Rd b/gen3metadata-R/man/fetch_data.Rd new file mode 100644 index 0000000..d4e37a6 --- /dev/null +++ b/gen3metadata-R/man/fetch_data.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics.R, R/methods-gen3_metadata.R +\name{fetch_data} +\alias{fetch_data} +\alias{fetch_data.gen3_metadata} +\title{Fetch data from Gen3 API} +\usage{ +fetch_data(gen3_metadata, program_name, project_code, node_label, api_version) + +\method{fetch_data}{gen3_metadata}( + gen3_metadata, + program_name, + project_code, + node_label, + api_version = "v0" +) +} +\arguments{ +\item{gen3_metadata}{An authenticated gen3_metadata object} + +\item{program_name}{Character string name of the program} + +\item{project_code}{Character string code of the project} + +\item{node_label}{Character string label of the node to fetch data from} + +\item{api_version}{Character string API version (default: "v0")} +} +\value{ +Data frame containing the fetched data + +Data frame containing the fetched data from the specified node +} +\description{ +Generic function to fetch data from a specific node in the Gen3 submission API +for a given program and project. + +This method fetches data from a specific node in the Gen3 API +for a given program and project. The object must be authenticated first. +} diff --git a/gen3metadata-R/man/get_base_url.Rd b/gen3metadata-R/man/get_base_url.Rd new file mode 100644 index 0000000..e8a1550 --- /dev/null +++ b/gen3metadata-R/man/get_base_url.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_base_url.R +\name{get_base_url} +\alias{get_base_url} +\title{Extract base URL from Gen3 API key} +\usage{ +get_base_url(api_key) +} +\arguments{ +\item{api_key}{Character string containing the Gen3 API key (JWT token)} +} +\value{ +Character string containing the base URL +} +\description{ +This function extracts the base URL from a Gen3 API key JWT token +by parsing the 'iss' (issuer) field from the payload. +} diff --git a/gen3metadata-R/man/load_key_file.Rd b/gen3metadata-R/man/load_key_file.Rd new file mode 100644 index 0000000..e6db863 --- /dev/null +++ b/gen3metadata-R/man/load_key_file.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_key_file.R +\name{load_key_file} +\alias{load_key_file} +\title{Load Gen3 credentials from key file} +\usage{ +load_key_file(key_file) +} +\arguments{ +\item{key_file}{Character string path to the JSON key file} +} +\value{ +List containing 'api_key' and 'key_id' credentials +} +\description{ +This function reads a JSON key file containing Gen3 API credentials. +} diff --git a/gen3metadata-R/man/print.Rd b/gen3metadata-R/man/print.Rd new file mode 100644 index 0000000..bb057c1 --- /dev/null +++ b/gen3metadata-R/man/print.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods-gen3_metadata.R +\name{print.gen3_metadata} +\alias{print.gen3_metadata} +\title{Print gen3_metadata object summary} +\usage{ +\method{print}{gen3_metadata}(gen3_metadata) +} +\arguments{ +\item{gen3_metadata}{A gen3_metadata object} +} +\value{ +The gen3_metadata object (invisibly) +} +\description{ +This method prints a formatted summary of a gen3_metadata object. +} diff --git a/gen3metadata-R/tests/testthat.R b/gen3metadata-R/tests/testthat.R new file mode 100644 index 0000000..7613d39 --- /dev/null +++ b/gen3metadata-R/tests/testthat.R @@ -0,0 +1,12 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview +# * https://testthat.r-lib.org/articles/special-files.html + +library(testthat) +library(gen3metadata) + +test_check("gen3metadata") diff --git a/gen3metadata-R/tests/testthat/test-authenticate.R b/gen3metadata-R/tests/testthat/test-authenticate.R new file mode 100644 index 0000000..eaec34d --- /dev/null +++ b/gen3metadata-R/tests/testthat/test-authenticate.R @@ -0,0 +1,122 @@ +#' Unit tests for authenticate + +# Load required packages +library(testthat) +library(webmockr) +library(gen3metadata) + +# Enable webmockr to intercept HTTP requests +webmockr::enable() + +## Fixture to provide a fake API key. Note: these credentials have been inactivated. +## This is a valid JWT and UUID, but is not active. +fake_api_key <- paste0( + "{\"api_key\":\"eyJhbGciOiJSUzI1NiIsImtpZCI6ImZlbmNlX2tleV9rZXkiLCJ0eX", + "AiOiJKV1QifQ.eyJwdXIiOiJhcGlfa2V5Iiwic3ViIjoiMjEiLCJpc3MiOiJodHRwczov", + "L2RhdGEudGVzdC5iaW9jb21tb25zLm9yZy5hdS91c2VyIiwiYXVkIjpbImh0dHBzOi8vZ", + "GF0YS50ZXN0LmJpb2NvbW1vbnMub3JnLmF1L3VzZXIiXSwiaWF0IjoxNzQyMjUzNDgwLC", + "JleHAiOjE3NDQ4NDU0ODAsImp0aSI6ImI5MDQyNzAxLWIwOGYtNDBkYS04OWEzLTc1M2J", + "lNGVkMTIyOSIsImF6cCI6IiIsInNjb3BlIjpbImdvb2dsZV9jcmVkZW50aWFscyIsIm9w", + "ZW5pZCIsImdvb2dsZV9zZXJ2aWNlX2FjY291bnQiLCJkYXRhIiwiZmVuY2UiLCJnb29nb", + "GVfbGluayIsImFkbWluIiwidXNlciIsImdhNGdoX3Bhc3Nwb3J0X3YxIl19.SGPjs6ljC", + "JbwDu-6WAnI5dN8o5467_ktcnsxRFrX_aCQNrOwSPgTCDvWEzamRmB5Oa0yB6cnjduhWR", + "KnPWIZDal86H0etm77wilCteHF_zFl1IV6LW23AfOVOG3zB9KL6o-ZYqpSRyo0FDj0vQJ", + "zrHXPjqvQ15S6Js2sIwIa3ONTeHbR6fRecfPaLK1uGIY5tJFeigXzrLzlifKCEnt_2gqp", + "MU2_b2QgW1315FixNIUOl8A7FZJ2-ddSMJPO0IYQ0QMSWV9-bbxie4Zjsaa1HtQYOhfXL", + "U3vSdUOBO0btSfd6-NnWfx_-lDo5V9lkSH_aecEyew0IHBx-e7rSR5cxA\",\"key_id\"", + ":\"b9042701-b08f-40da-89a3-753be4ed1229\"}" +) + + +test_that("authenticate method works correctly", { + + # Create a temporary key file + tmp_key_file <- tempfile(fileext = ".json") + writeLines(fake_api_key, tmp_key_file) + + # Create the Gen3 metadata parser object + gen3 <- Gen3MetadataParser(tmp_key_file) + + # Expect that headers and token are NULL initially + expect_null(gen3$headers) + expect_null(gen3$token) + + # Mock the POST request to the Gen3 API + mock_post <- stub_request("post", uri = "https://data.test.biocommons.org.au/user/credentials/cdis/access_token") + mock_post <- to_return(mock_post, + body = "{\"access_token\": \"fake_access_token\"}", + status = 200, + headers = list("Content-Type" = "application/json")) + + # Authenticate the Gen3 metadata object + gen3 <- authenticate(gen3) + + # Check that the token is set correctly + expect_equal(gen3$token, "fake_access_token") + + # Check that the headers are set correctly + expect_true(!is.null(gen3$headers)) + expect_equal(gen3$headers$headers, c("Authorization" = "Bearer fake_access_token")) + + # Clean up temporary file + unlink(tmp_key_file) + + # Clear the stub registry to remove the mock + webmockr::stub_registry_clear() +}) + + +test_that("authenticate method handles HTTP errors", { + + # Create a temporary key file + tmp_key_file <- tempfile(fileext = ".json") + writeLines(fake_api_key, tmp_key_file) + + # Create the Gen3 metadata parser object + gen3 <- Gen3MetadataParser(tmp_key_file) + + # Mock the POST request to return an error + mock_post <- stub_request("post", uri = "https://data.test.biocommons.org.au/user/credentials/cdis/access_token") + mock_post <- to_return(mock_post, + body = "{\"error\": \"invalid_grant\"}", + status = 400, + headers = list("Content-Type" = "application/json")) + + # Expect an error when trying to authenticate + expect_error(authenticate(gen3), "Failed to authenticate with the Gen3 API. Please check your credentials.") + + # Clean up temporary file + unlink(tmp_key_file) + + # Clear the stub registry to remove the mock + webmockr::stub_registry_clear() +}) + + +test_that("authenticate method handles missing access token", { + + # Create a temporary key file + tmp_key_file <- tempfile(fileext = ".json") + writeLines(fake_api_key, tmp_key_file) + + # Create the Gen3 metadata parser object + gen3 <- Gen3MetadataParser(tmp_key_file) + + # Mock the POST request to return an error + mock_post <- stub_request("post", uri = "https://data.test.biocommons.org.au/user/credentials/cdis/access_token") + mock_post <- to_return(mock_post, + body = "{}", + status = 200, + headers = list("Content-Type" = "application/json")) + + # Expect an error when trying to authenticate + expect_error(authenticate(gen3), "access token not found in the response.") + + # Clean up temporary file + unlink(tmp_key_file) + + # Clear the stub registry to remove the mock + webmockr::stub_registry_clear() + +}) + diff --git a/gen3metadata-R/tests/testthat/test-fetch_data.R b/gen3metadata-R/tests/testthat/test-fetch_data.R new file mode 100644 index 0000000..cfb505f --- /dev/null +++ b/gen3metadata-R/tests/testthat/test-fetch_data.R @@ -0,0 +1,115 @@ +#' Unit tests for fetch_data + +# Load required packages +library(testthat) +library(webmockr) +library(gen3metadata) + +# Enable webmockr to intercept HTTP requests +webmockr::enable() + +## Fixture to provide a fake API key. Note: these credentials have been inactivated. +## This is a valid JWT and UUID, but is not active. +fake_api_key <- paste0( + "{\"api_key\":\"eyJhbGciOiJSUzI1NiIsImtpZCI6ImZlbmNlX2tleV9rZXkiLCJ0eX", + "AiOiJKV1QifQ.eyJwdXIiOiJhcGlfa2V5Iiwic3ViIjoiMjEiLCJpc3MiOiJodHRwczov", + "L2RhdGEudGVzdC5iaW9jb21tb25zLm9yZy5hdS91c2VyIiwiYXVkIjpbImh0dHBzOi8vZ", + "GF0YS50ZXN0LmJpb2NvbW1vbnMub3JnLmF1L3VzZXIiXSwiaWF0IjoxNzQyMjUzNDgwLC", + "JleHAiOjE3NDQ4NDU0ODAsImp0aSI6ImI5MDQyNzAxLWIwOGYtNDBkYS04OWEzLTc1M2J", + "lNGVkMTIyOSIsImF6cCI6IiIsInNjb3BlIjpbImdvb2dsZV9jcmVkZW50aWFscyIsIm9w", + "ZW5pZCIsImdvb2dsZV9zZXJ2aWNlX2FjY291bnQiLCJkYXRhIiwiZmVuY2UiLCJnb29nb", + "GVfbGluayIsImFkbWluIiwidXNlciIsImdhNGdoX3Bhc3Nwb3J0X3YxIl19.SGPjs6ljC", + "JbwDu-6WAnI5dN8o5467_ktcnsxRFrX_aCQNrOwSPgTCDvWEzamRmB5Oa0yB6cnjduhWR", + "KnPWIZDal86H0etm77wilCteHF_zFl1IV6LW23AfOVOG3zB9KL6o-ZYqpSRyo0FDj0vQJ", + "zrHXPjqvQ15S6Js2sIwIa3ONTeHbR6fRecfPaLK1uGIY5tJFeigXzrLzlifKCEnt_2gqp", + "MU2_b2QgW1315FixNIUOl8A7FZJ2-ddSMJPO0IYQ0QMSWV9-bbxie4Zjsaa1HtQYOhfXL", + "U3vSdUOBO0btSfd6-NnWfx_-lDo5V9lkSH_aecEyew0IHBx-e7rSR5cxA\",\"key_id\"", + ":\"b9042701-b08f-40da-89a3-753be4ed1229\"}" +) + + +test_that("fetch_data method works correctly", { + + # Create a temporary key file + tmp_key_file <- tempfile(fileext = ".json") + writeLines(fake_api_key, tmp_key_file) + + # Create the Gen3 metadata parser object + gen3 <- Gen3MetadataParser(tmp_key_file) + + # Mock the POST request to the Gen3 API + mock_post <- stub_request("post", uri = "https://data.test.biocommons.org.au/user/credentials/cdis/access_token") + mock_post <- to_return(mock_post, + body = "{\"access_token\": \"fake_access_token\"}", + status = 200, + headers = list("Content-Type" = "application/json")) + + # Authenticate the Gen3 metadata object + gen3 <- authenticate(gen3) + + # Mock the GET request to the Gen3 API + mock_get <- stub_request("get", uri = "https://data.test.biocommons.org.au/api/v0/submission/program1/AusDiab/export/?node_label=subject&format=json") + mock_get <- to_return(mock_get, + body = "{\"data\": [{\"id\": 1, \"name\": \"test\"}]}", + status = 200, + headers = list("Content-Type" = "application/json")) + + # Call fetch_data and check the result + result <- fetch_data(gen3, "program1", "AusDiab", "subject") + + # Check that the result is a data frame + expect_s3_class(result, "data.frame") + + # Check that the data frame has the expected columns + expect_true("id" %in% colnames(result)) + expect_true("name" %in% colnames(result)) + + # Check that the data frame contains the expected data + expect_equal(nrow(result), 1) + expect_equal(result$id, 1) + expect_equal(result$name, "test") + + # Clean up temporary file + unlink(tmp_key_file) + + # Clear the stub registry to remove the mock + webmockr::stub_registry_clear() +}) + + +test_that("fetch_data handles HTTP errors", { + + # Create a temporary key file + tmp_key_file <- tempfile(fileext = ".json") + writeLines(fake_api_key, tmp_key_file) + + # Create the Gen3 metadata parser object + gen3 <- Gen3MetadataParser(tmp_key_file) + + # Mock the POST request to the Gen3 API + mock_post <- stub_request("post", uri = "https://data.test.biocommons.org.au/user/credentials/cdis/access_token") + mock_post <- to_return(mock_post, + body = "{\"access_token\": \"fake_access_token\"}", + status = 200, + headers = list("Content-Type" = "application/json")) + + # Authenticate the Gen3 metadata object + gen3 <- authenticate(gen3) + + # Mock the GET request to the Gen3 API + mock_get <- stub_request("get", uri = "https://data.test.biocommons.org.au/api/v0/submission/program1/AusDiab/export/?node_label=subject&format=json") + mock_get <- to_return(mock_get, + body = "{\"data\": [{\"id\": 1, \"name\": \"test\"}]}", + status = 400, + headers = list("Content-Type" = "application/json")) + + # Call fetch_data and check the result + expect_error(fetch_data(gen3, "program1", "AusDiab", "subject"), "Failed to fetch data from the Gen3 API") + + # Clean up temporary file + unlink(tmp_key_file) + + # Clear the stub registry to remove the mock + webmockr::stub_registry_clear() +}) + diff --git a/gen3metadata-R/tests/testthat/test-gen3_metadata.R b/gen3metadata-R/tests/testthat/test-gen3_metadata.R new file mode 100644 index 0000000..3ab35f5 --- /dev/null +++ b/gen3metadata-R/tests/testthat/test-gen3_metadata.R @@ -0,0 +1,56 @@ +#' Unit tests for gen3metadata + +# Load required packages +library(testthat) +library(gen3metadata) + +## Fixture to provide a fake API key. Note: these credentials have been inactivated. +## This is a valid JWT and UUID, but is not active. +fake_api_key <- paste0( + "{\"api_key\":\"eyJhbGciOiJSUzI1NiIsImtpZCI6ImZlbmNlX2tleV9rZXkiLCJ0eX", + "AiOiJKV1QifQ.eyJwdXIiOiJhcGlfa2V5Iiwic3ViIjoiMjEiLCJpc3MiOiJodHRwczov", + "L2RhdGEudGVzdC5iaW9jb21tb25zLm9yZy5hdS91c2VyIiwiYXVkIjpbImh0dHBzOi8vZ", + "GF0YS50ZXN0LmJpb2NvbW1vbnMub3JnLmF1L3VzZXIiXSwiaWF0IjoxNzQyMjUzNDgwLC", + "JleHAiOjE3NDQ4NDU0ODAsImp0aSI6ImI5MDQyNzAxLWIwOGYtNDBkYS04OWEzLTc1M2J", + "lNGVkMTIyOSIsImF6cCI6IiIsInNjb3BlIjpbImdvb2dsZV9jcmVkZW50aWFscyIsIm9w", + "ZW5pZCIsImdvb2dsZV9zZXJ2aWNlX2FjY291bnQiLCJkYXRhIiwiZmVuY2UiLCJnb29nb", + "GVfbGluayIsImFkbWluIiwidXNlciIsImdhNGdoX3Bhc3Nwb3J0X3YxIl19.SGPjs6ljC", + "JbwDu-6WAnI5dN8o5467_ktcnsxRFrX_aCQNrOwSPgTCDvWEzamRmB5Oa0yB6cnjduhWR", + "KnPWIZDal86H0etm77wilCteHF_zFl1IV6LW23AfOVOG3zB9KL6o-ZYqpSRyo0FDj0vQJ", + "zrHXPjqvQ15S6Js2sIwIa3ONTeHbR6fRecfPaLK1uGIY5tJFeigXzrLzlifKCEnt_2gqp", + "MU2_b2QgW1315FixNIUOl8A7FZJ2-ddSMJPO0IYQ0QMSWV9-bbxie4Zjsaa1HtQYOhfXL", + "U3vSdUOBO0btSfd6-NnWfx_-lDo5V9lkSH_aecEyew0IHBx-e7rSR5cxA\",\"key_id\"", + ":\"b9042701-b08f-40da-89a3-753be4ed1229\"}" +) + + +test_that("Gen3MetadataParser creates an object with correct class", { + + # Create a temporary key file + tmp_key_file <- tempfile(fileext = ".json") + writeLines(fake_api_key, tmp_key_file) + + # Create the Gen3 metadata parser object + obj <- Gen3MetadataParser(tmp_key_file) + + # Check that the object is of class 'gen3_metadata' + expect_s3_class(obj, "gen3_metadata") + + # Check that the key file path is set correctly + expect_equal(obj$key_file, tmp_key_file) + + # Check that the base URL is set correctly + expect_equal(obj$base_url, "https://data.test.biocommons.org.au") + + # Read the credentials from the key file + creds <- load_key_file(tmp_key_file) + + # Check that the credentials are set correctly + expect_equal(obj$credentials$api_key, creds$api_key) + expect_equal(obj$credentials$key_id, creds$key_id) + + # Clean up temporary file + unlink(tmp_key_file) +}) + + diff --git a/gen3metadata-R/tests/testthat/test-get_base_url.R b/gen3metadata-R/tests/testthat/test-get_base_url.R new file mode 100644 index 0000000..c637a89 --- /dev/null +++ b/gen3metadata-R/tests/testthat/test-get_base_url.R @@ -0,0 +1,43 @@ +#' Unit tests for get_base_url + +# Load required packages +library(testthat) +library(gen3metadata) + +## Fixture to provide a fake API key. Note: these credentials have been inactivated. +## This is a valid JWT and UUID, but is not active. +fake_api_key <- list( + "api_key" = paste0( + "eyJhbGciOiJSUzI1NiIsImtpZCI6ImZlbmNlX2tleV9rZXkiLCJ0eXAiOiJKV1QifQ.", + "eyJwdXIiOiJhcGlfa2V5Iiwic3ViIjoiMjEiLCJpc3MiOiJodHRwczovL2RhdGEudGVzdC5i", + "aW9jb21tb25zLm9yZy5hdS91c2VyIiwiYXVkIjpbImh0dHBzOi8vZGF0YS50ZXN0LmJpb2Nv", + "bW1vbnMub3JnLmF1L3VzZXIiXSwiaWF0IjoxNzQyMjUzNDgwLCJleHAiOjE3NDQ4NDU0ODAs", + "Imp0aSI6ImI5MDQyNzAxLWIwOGYtNDBkYS04OWEzLTc1M2JlNGVkMTIyOSIsImF6cCI6IiIs", + "InNjb3BlIjpbImdvb2dsZV9jcmVkZW50aWFscyIsIm9wZW5pZCIsImdvb2dsZV9zZXJ2aWNl", + "X2FjY291bnQiLCJkYXRhIiwiZmVuY2UiLCJnb29nbGVfbGluayIsImFkbWluIiwidXNlciIs", + "ImdhNGdoX3Bhc3Nwb3J0X3YxIl19.", + "SGPjs6ljCJbwDu-6WAnI5dN8o5467_ktcnsxRFrX_aCQNrOwSPgTCDvWEzamRmB5Oa0yB6cn", + "jduhWRKnPWIZDal86H0etm77wilCteHF_zFl1IV6LW23AfOVOG3zB9KL6o-ZYqpSRyo0FDj0", + "vQJzrHXPjqvQ15S6Js2sIwIa3ONTeHbR6fRecfPaLK1uGIY5tJFeigXzrLzlifKCEnt_2gqp", + "MU2_b2QgW1315FixNIUOl8A7FZJ2-ddSMJPO0IYQ0QMSWV9-bbxie4Zjsaa1HtQYOhfXLU3v", + "SdUOBO0btSfd6-NnWfx_-lDo5V9lkSH_aecEyew0IHBx-e7rSR5cxA"), + "key_id" = "b9042701-b08f-40da-89a3-753be4ed1229" +) + + +test_that("get_base_url can get the data commons url from JWT token", { + + # Call get_base_url with the fake API key + base_url <- gen3metadata:::get_base_url(fake_api_key$api_key) + + # Check that the base URL is as expected + expect_equal(base_url, "https://data.test.biocommons.org.au") +}) + + +test_that("get_base_url errors when passing empty string", { + + # Test that get_base_url raises an error when the API key is an empty string + expect_error(gen3metadata:::get_base_url(""), "API key must be provided.") +}) + diff --git a/gen3metadata-R/tests/testthat/test-load_key_file.R b/gen3metadata-R/tests/testthat/test-load_key_file.R new file mode 100644 index 0000000..5440432 --- /dev/null +++ b/gen3metadata-R/tests/testthat/test-load_key_file.R @@ -0,0 +1,69 @@ +#' Unit tests for load_key_file + +# Load required packages +library(testthat) +library(gen3metadata) + + +test_that("load_key_file reads valid JSON file", { + + # Create a temporary file + tmp <- tempfile(fileext = ".json") + + # Write valid JSON to the temporary file + writeLines('{"api_key":"abc.def.ghi","key_id":"18b018"}', tmp) + + # Call load_key_file and check the result + creds <- load_key_file(tmp) + + # Check that the credentials are as expected + expect_equal(creds, + list(api_key = "abc.def.ghi", + key_id = "18b018")) + + # Clean up temporary file + unlink(tmp) +}) + + +test_that("load_key_file errors when file missing or no required fields", { + + # Test that load_key_file raises an error when the file does not exist + expect_error(load_key_file("no/such/file.json"), "does not exist") + + # Create a temporary file + tmp <- tempfile(fileext = ".json") + + # Write JSON without required fields + writeLines('{"api_key":"abc.def.ghi"}', tmp) + + # Test that load_key_file raises an error when the file is missing required fields + expect_error(load_key_file(tmp), "must contain 'api_key' and 'key_id'") + + # Clean up temporary file + unlink(tmp) +}) + + +test_that("load_key_file handles non-existent file", { + + # Test that load_key_file raises an error when the file does not exist + expect_error(load_key_file("non_existent_file.json"), "does not exist") +}) + + +test_that("load_key_file handles malformed JSON", { + + # Create a temporary file + tmp <- tempfile(fileext = ".json") + + # Write malformed JSON to the temporary file + writeLines('{"api_key":"abc.def.ghi", "key_id":18b018}', tmp) + + # Test that load_key_file raises an error for malformed JSON + expect_error(gen3metadata:::load_key_file(tmp), "invalid char in json") + + # Clean up temporary file + unlink(tmp) +}) + diff --git a/protyping.ipynb b/protyping.ipynb index 9725256..9ea4df2 100644 --- a/protyping.ipynb +++ b/protyping.ipynb @@ -19,14 +19,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import os\n", + "from dotenv import load_dotenv\n", "from gen3_metadata.gen3_metadata_parser import Gen3MetadataParser\n", + "load_dotenv()\n", + "key_file = os.getenv('credentials_path')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "\n", - "key_file = os.getenv('credentials_path')\n", "gen3metadata = Gen3MetadataParser(key_file)\n", "gen3metadata.authenticate()" ] diff --git a/requirements.txt b/requirements.txt index b084c45..522f559 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,6 @@ requests pandas setuptools pytest -PyJWT \ No newline at end of file +PyJWT +ipykernel +dotenv \ No newline at end of file diff --git a/src/gen3_metadata/gen3_metadata_parser.py b/src/gen3_metadata/gen3_metadata_parser.py index 4499b96..8c0ddcc 100644 --- a/src/gen3_metadata/gen3_metadata_parser.py +++ b/src/gen3_metadata/gen3_metadata_parser.py @@ -2,6 +2,7 @@ import requests import pandas as pd import jwt +import re class Gen3MetadataParser: @@ -20,6 +21,20 @@ def __init__(self, key_file_path): self.headers = {} self.data_store = {} self.data_store_pd = {} + + def _add_quotes_to_json(self, input_str): + try: + # Try parsing as-is + return json.loads(input_str) + except json.JSONDecodeError: + # Add quotes around keys + fixed = re.sub(r'([{,]\s*)(\w+)\s*:', r'\1"\2":', input_str) + # Add quotes around simple string values (skip existing quoted values) + fixed = re.sub(r':\s*([A-Za-z0-9._:@/-]+)(?=\s*[},])', r': "\1"', fixed) + try: + return json.loads(fixed) + except json.JSONDecodeError as e: + raise ValueError(f"Could not fix JSON: {e}") def _load_api_key(self) -> dict: """ @@ -28,8 +43,27 @@ def _load_api_key(self) -> dict: Returns: dict: The API key loaded from the JSON file. """ - with open(self.key_file_path) as json_file: - return json.load(json_file) + try: + # Read the file as plain text + with open(self.key_file_path, "r") as f: + content = f.read() + # If the content does not contain any double or single quotes, try to fix it + if '"' not in content and "'" not in content: + return self._add_quotes_to_json(content) + + # Read the file as JSON + with open(self.key_file_path) as json_file: + return json.load(json_file) + except FileNotFoundError as fnf_err: + print(f"File not found: {fnf_err}") + raise + except json.JSONDecodeError as json_err: + print(f"JSON decode error: {json_err}") + print("Please make sure the file contains valid JSON with quotes and proper formatting.") + raise + except Exception as err: + print(f"An unexpected error occurred while loading API key: {err}") + raise def _url_from_jwt(self, cred: dict) -> str: """ @@ -148,3 +182,31 @@ def data_to_pd(self) -> None: print(f"Converting {key} to pandas dataframe...") self.data_store_pd[key] = self.json_to_pd(value['data']) return + + def fetch_data_pd(self, program_name, project_code, node_label, api_version="v0"): + """ + Fetches data from the Gen3 API for a specific program, project, and node label, + and converts it to a pandas DataFrame. + + Args: + program_name (str): The name of the program. + project_code (str): The code of the project. + node_label (str): The label of the node. + api_version (str, optional): The version of the API to use. + Defaults to "v0". + """ + data = self.fetch_data(program_name, project_code, node_label, api_version=api_version, return_data=True) + return self.json_to_pd(data['data']) + + def fetch_data_json(self, program_name, project_code, node_label, api_version="v0"): + """ + Fetches data from the Gen3 API for a specific program, project, and node label. + + Args: + program_name (str): The name of the program. + project_code (str): The code of the project. + node_label (str): The label of the node. + api_version (str, optional): The version of the API to use. + Defaults to "v0". + """ + return self.fetch_data(program_name, project_code, node_label, api_version=api_version, return_data=True) diff --git a/tests/test_gen3_metadata_parser.py b/tests/test_gen3_metadata_parser.py index dbc553e..2220fbd 100644 --- a/tests/test_gen3_metadata_parser.py +++ b/tests/test_gen3_metadata_parser.py @@ -9,27 +9,89 @@ @pytest.fixture def fake_api_key(): - """Fixture to provide a fake API key. Note: these credentials have been inactivated""" + """Fixture to provide a fake API key. Note: these credentials have been inactivated.""" + # This is a valid JWT and UUID, but is not active. return { - "api_key": "eyJhbGciOiJSUzI1NiIsImtpZCI6ImZlbmNlX2tleV9rZXkiLCJ0eXAiOiJKV1QifQ.eyJwdXIiOiJhcGlfa2V5Iiwic3ViIjoiMjEiLCJpc3MiOiJodHRwczovL2RhdGEudGVzdC5iaW9jb21tb25zLm9yZy5hdS91c2VyIiwiYXVkIjpbImh0dHBzOi8vZGF0YS50ZXN0LmJpb2NvbW1vbnMub3JnLmF1L3VzZXIiXSwiaWF0IjoxNzQyMjUzNDgwLCJleHAiOjE3NDQ4NDU0ODAsImp0aSI6ImI5MDQyNzAxLWIwOGYtNDBkYS04OWEzLTc1M2JlNGVkMTIyOSIsImF6cCI6IiIsInNjb3BlIjpbImdvb2dsZV9jcmVkZW50aWFscyIsIm9wZW5pZCIsImdvb2dsZV9zZXJ2aWNlX2FjY291bnQiLCJkYXRhIiwiZmVuY2UiLCJnb29nbGVfbGluayIsImFkbWluIiwidXNlciIsImdhNGdoX3Bhc3Nwb3J0X3YxIl19.SGPjs6ljCJbwDu-6WAnI5dN8o5467_ktcnsxRFrX_aCQNrOwSPgTCDvWEzamRmB5Oa0yB6cnjduhWRKnPWIZDal86H0etm77wilCteHF_zFl1IV6LW23AfOVOG3zB9KL6o-ZYqpSRyo0FDj0vQJzrHXPjqvQ15S6Js2sIwIa3ONTeHbR6fRecfPaLK1uGIY5tJFeigXzrLzlifKCEnt_2gqpMU2_b2QgW1315FixNIUOl8A7FZJ2-ddSMJPO0IYQ0QMSWV9-bbxie4Zjsaa1HtQYOhfXLU3vSdUOBO0btSfd6-NnWfx_-lDo5V9lkSH_aecEyew0IHBx-e7rSR5cxA", + "api_key": ( + "eyJhbGciOiJSUzI1NiIsImtpZCI6ImZlbmNlX2tleV9rZXkiLCJ0eXAiOiJKV1QifQ." + "eyJwdXIiOiJhcGlfa2V5Iiwic3ViIjoiMjEiLCJpc3MiOiJodHRwczovL2RhdGEudGVzdC5i" + "aW9jb21tb25zLm9yZy5hdS91c2VyIiwiYXVkIjpbImh0dHBzOi8vZGF0YS50ZXN0LmJpb2Nv" + "bW1vbnMub3JnLmF1L3VzZXIiXSwiaWF0IjoxNzQyMjUzNDgwLCJleHAiOjE3NDQ4NDU0ODAs" + "Imp0aSI6ImI5MDQyNzAxLWIwOGYtNDBkYS04OWEzLTc1M2JlNGVkMTIyOSIsImF6cCI6IiIs" + "InNjb3BlIjpbImdvb2dsZV9jcmVkZW50aWFscyIsIm9wZW5pZCIsImdvb2dsZV9zZXJ2aWNl" + "X2FjY291bnQiLCJkYXRhIiwiZmVuY2UiLCJnb29nbGVfbGluayIsImFkbWluIiwidXNlciIs" + "ImdhNGdoX3Bhc3Nwb3J0X3YxIl19." + "SGPjs6ljCJbwDu-6WAnI5dN8o5467_ktcnsxRFrX_aCQNrOwSPgTCDvWEzamRmB5Oa0yB6cn" + "jduhWRKnPWIZDal86H0etm77wilCteHF_zFl1IV6LW23AfOVOG3zB9KL6o-ZYqpSRyo0FDj0" + "vQJzrHXPjqvQ15S6Js2sIwIa3ONTeHbR6fRecfPaLK1uGIY5tJFeigXzrLzlifKCEnt_2gqp" + "MU2_b2QgW1315FixNIUOl8A7FZJ2-ddSMJPO0IYQ0QMSWV9-bbxie4Zjsaa1HtQYOhfXLU3v" + "SdUOBO0btSfd6-NnWfx_-lDo5V9lkSH_aecEyew0IHBx-e7rSR5cxA" + ), "key_id": "b9042701-b08f-40da-89a3-753be4ed1229" } @pytest.fixture def gen3_metadata_parser(): """Fixture to create a Gen3MetadataParser instance.""" - return Gen3MetadataParser( - key_file_path="fake_credentials.json" - ) + return Gen3MetadataParser(key_file_path="fake_credentials.json") +@pytest.fixture +def malformed_json_credentials(): + """Fixture for malformed JSON credentials (no quotes, not valid Python dict).""" + # This is a string, not a dict, to simulate malformed file content. + return '{api_key: abc.def.ghi, key_id: 18bdaa-b018}' + +def test_add_quotes_to_json_valid(gen3_metadata_parser): + """Test _add_quotes_to_json with valid JSON (should parse as-is).""" + valid_json = '{"api_key": "abc.def.ghi", "key_id": "18bdaa-b018"}' + result = gen3_metadata_parser._add_quotes_to_json(valid_json) + assert result == {"api_key": "abc.def.ghi", "key_id": "18bdaa-b018"} + +def test_add_quotes_to_json_malformed(gen3_metadata_parser): + """Test _add_quotes_to_json with malformed JSON (no quotes).""" + malformed = '{api_key: abc.def.ghi, key_id: 18bdaa-b018}' + result = gen3_metadata_parser._add_quotes_to_json(malformed) + assert result == {"api_key": "abc.def.ghi", "key_id": "18bdaa-b018"} + +def test_add_quotes_to_json_url_and_uuid(gen3_metadata_parser): + """Test _add_quotes_to_json with keys/values including url and uuid.""" + malformed = '{key1: value1, key2:123, url: https://example.com, uuid: 18bdaa-b018}' + result = gen3_metadata_parser._add_quotes_to_json(malformed) + assert result == { + "key1": "value1", + "key2": "123", + "url": "https://example.com", + "uuid": "18bdaa-b018" + } + +def test_add_quotes_to_json_invalid(gen3_metadata_parser): + """Test _add_quotes_to_json with unrecoverable malformed JSON.""" + bad = '{key1 value1, key2:}' + with pytest.raises(ValueError): + gen3_metadata_parser._add_quotes_to_json(bad) -def test_load_api_key(gen3_metadata_parser, fake_api_key): - """Test the _load_api_key method.""" - # Mock open() to simulate reading the fake API key from a file +def test_load_api_key_valid_json(gen3_metadata_parser, fake_api_key): + """Test the _load_api_key method with valid JSON file content.""" + # Simulate reading a valid JSON file with patch("builtins.open", mock_open(read_data=json.dumps(fake_api_key))): result = gen3_metadata_parser._load_api_key() assert result == fake_api_key +def test_load_api_key_malformed_json(gen3_metadata_parser, malformed_json_credentials): + """Test the _load_api_key method with malformed JSON (no quotes).""" + # Simulate reading a malformed JSON file (no quotes) + with patch("builtins.open", mock_open(read_data=malformed_json_credentials)): + result = gen3_metadata_parser._load_api_key() + assert result == {"api_key": "abc.def.ghi", "key_id": "18bdaa-b018"} + +def test_load_api_key_invalid_json(gen3_metadata_parser): + """Test the _load_api_key method with unrecoverable malformed JSON.""" + # Simulate reading a badly malformed JSON file + bad_content = '{key1 value1, key2:}' + with patch("builtins.open", mock_open(read_data=bad_content)): + with pytest.raises(ValueError): + gen3_metadata_parser._load_api_key() + def test_url_from_jwt(gen3_metadata_parser, fake_api_key): """Test if you can infer the data commons url from the JWT token""" url = gen3_metadata_parser._url_from_jwt(fake_api_key) @@ -150,3 +212,40 @@ def test_data_to_pd(gen3_metadata_parser, data_store): # Verify conversion assert test_key in gen3_metadata_parser.data_store_pd pd.testing.assert_frame_equal(gen3_metadata_parser.data_store_pd[test_key], expected_df) + + +@patch("requests.get") +def test_fetch_data_pd(mock_get, gen3_metadata_parser, fake_api_key): + """Test fetch_data for successful API response.""" + fake_response = {"data": [{"id": 1, "name": "test"}]} + mock_get.return_value.status_code = 200 + mock_get.return_value.json.return_value = fake_response + + program_name = "test_program" + project_code = "test_project" + node_label = "subjects" + + with patch("builtins.open", mock_open(read_data=json.dumps(fake_api_key))): + result = gen3_metadata_parser.fetch_data_pd(program_name, project_code, node_label) + key = f"{program_name}/{project_code}/{node_label}" + assert key in gen3_metadata_parser.data_store + assert isinstance(result, pd.DataFrame) + assert result.equals(pd.DataFrame(fake_response['data'])) + + +@patch("requests.get") +def test_fetch_data_json(mock_get, gen3_metadata_parser, fake_api_key): + """Test fetch_data_json for successful API response.""" + fake_response = {"data": [{"id": 1, "name": "test"}]} + mock_get.return_value.status_code = 200 + mock_get.return_value.json.return_value = fake_response + + program_name = "test_program" + project_code = "test_project" + node_label = "subjects" + + with patch("builtins.open", mock_open(read_data=json.dumps(fake_api_key))): + result = gen3_metadata_parser.fetch_data_json(program_name, project_code, node_label) + key = f"{program_name}/{project_code}/{node_label}" + assert key in gen3_metadata_parser.data_store + assert result == fake_response