Skip to content

Commit d021685

Browse files
created the first version of the repo
1 parent b3b0538 commit d021685

File tree

6 files changed

+211
-0
lines changed

6 files changed

+211
-0
lines changed

README.md

+59
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,61 @@
11
# gen3-metadata
22
User friendly tools for downloading and manipulating gen3 metadata
3+
4+
5+
## 1. Set up python venv
6+
```bash
7+
python3 -m venv venv
8+
source venv/bin/activate
9+
pip install -r requirements.txt
10+
```
11+
12+
## 2. Create config file
13+
```bash
14+
echo credentials_path=\"/path/to/credentials.json\" > .env
15+
```
16+
17+
## 3. Load library
18+
```bash
19+
pip install -e .
20+
```
21+
22+
23+
## 4. Usage Example
24+
25+
```python
26+
import os
27+
from gen3_metadata.parser import Gen3MetadataParser
28+
29+
# Set up API and credentials
30+
api = "https://data.test.biocommons.org.au"
31+
key_file = os.getenv('credentials_path')
32+
33+
# Initialize the Gen3MetadataParser
34+
gen3metadata = Gen3MetadataParser(api, key_file)
35+
36+
# Fetch data for different categories
37+
gen3metadata.fetch_data("program1", "AusDiab_Simulated", "subject")
38+
gen3metadata.fetch_data("program1", "AusDiab_Simulated", "demographic")
39+
gen3metadata.fetch_data("program1", "AusDiab_Simulated", "medical_history")
40+
41+
# Convert fetched data to a pandas DataFrame
42+
gen3metadata.data_to_pd()
43+
44+
# Print the keys of the data sets that have been fetched
45+
print(gen3metadata.data_store.keys())
46+
47+
# Return a json of one of the datasets
48+
gen3metadata.data_store["program1/AusDiab_Simulated/subject"]
49+
50+
# Return the pandas dataframe of one of the datasets
51+
gen3metadata.data_store_pd["program1/AusDiab_Simulated/subject"]
52+
```
53+
54+
The fetched data is stored in a dictionary within the `Gen3MetadataParser` instance.
55+
Each category of data fetched is stored as a key-value pair in this dictionary,
56+
where the key is the category name and the value is the corresponding data.
57+
This allows for easy access and manipulation of the data after it has been fetched.
58+
59+
60+
61+

protyping.ipynb

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# testing and prototyping"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 1,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"import gen3_metadata\n",
17+
"from gen3_metadata.parser import Gen3MetadataParser"
18+
]
19+
},
20+
{
21+
"cell_type": "code",
22+
"execution_count": 2,
23+
"metadata": {},
24+
"outputs": [
25+
{
26+
"name": "stdout",
27+
"output_type": "stream",
28+
"text": [
29+
"status code: 200\n",
30+
"Data for program1/AusDiab_Simulated/subject has been fetched and stored.\n",
31+
"status code: 200\n",
32+
"Data for program1/AusDiab_Simulated/demographic has been fetched and stored.\n",
33+
"status code: 200\n",
34+
"Data for program1/AusDiab_Simulated/medical_history has been fetched and stored.\n",
35+
"Converting program1/AusDiab_Simulated/subject to pandas dataframe...\n",
36+
"Converting program1/AusDiab_Simulated/demographic to pandas dataframe...\n",
37+
"Converting program1/AusDiab_Simulated/medical_history to pandas dataframe...\n"
38+
]
39+
}
40+
],
41+
"source": [
42+
"import os\n",
43+
"from gen3_metadata.parser import Gen3MetadataParser\n",
44+
"\n",
45+
"api = \"https://data.test.biocommons.org.au\"\n",
46+
"key_file = os.getenv('credentials_path')\n",
47+
"gen3metadata = Gen3MetadataParser(api, key_file)\n",
48+
"\n",
49+
"\n",
50+
"gen3metadata.fetch_data(\"program1\", \"AusDiab_Simulated\", \"subject\")\n",
51+
"gen3metadata.fetch_data(\"program1\", \"AusDiab_Simulated\", \"demographic\")\n",
52+
"gen3metadata.fetch_data(\"program1\", \"AusDiab_Simulated\", \"medical_history\")\n",
53+
"\n",
54+
"gen3metadata.data_to_pd()"
55+
]
56+
}
57+
],
58+
"metadata": {
59+
"kernelspec": {
60+
"display_name": ".venv",
61+
"language": "python",
62+
"name": "python3"
63+
},
64+
"language_info": {
65+
"codemirror_mode": {
66+
"name": "ipython",
67+
"version": 3
68+
},
69+
"file_extension": ".py",
70+
"mimetype": "text/x-python",
71+
"name": "python",
72+
"nbconvert_exporter": "python",
73+
"pygments_lexer": "ipython3",
74+
"version": "3.9.5"
75+
}
76+
},
77+
"nbformat": 4,
78+
"nbformat_minor": 2
79+
}

requirements.txt

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
requests
2+
pandas
3+
setuptools

setup.py

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from setuptools import setup, find_packages
2+
3+
setup(
4+
name="gen3_metadata", # Name of your package
5+
version="0.1", # Version number
6+
packages=find_packages(where="src"), # Automatically find packages in src/
7+
package_dir={"": "src"}, # Tell setuptools that packages are in src/
8+
install_requires=open("requirements.txt").read().splitlines(), # Add dependencies from requirements.txt
9+
description="A library for downloading and manipulating gen3 metadata",
10+
author="Joshua Harris",
11+
author_email="[email protected]",
12+
url="https://github.com/AustralianBioCommons/gen3-metadata", # Optional: GitHub or project URL
13+
)

src/gen3_metadata/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .parser import *

src/gen3_metadata/parser.py

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import json
2+
import requests
3+
import os
4+
5+
class Gen3MetadataParser:
6+
def __init__(self, api_url, key_file_path):
7+
self.api_url = api_url
8+
self.key_file_path = key_file_path
9+
self.headers = self._authenticate()
10+
self.data_store = {}
11+
self.data_store_pd = {} # Initialize a dictionary to store fetched data
12+
13+
def _load_api_key(self):
14+
with open(self.key_file_path) as json_file:
15+
return json.load(json_file)
16+
17+
def _authenticate(self):
18+
key = self._load_api_key()
19+
response = requests.post(f"{self.api_url}/user/credentials/cdis/access_token", json=key)
20+
response.raise_for_status() # Ensure any HTTP errors are raised
21+
access_token = response.json()['access_token']
22+
return {'Authorization': f"bearer {access_token}"}
23+
24+
def json_to_pdf(self, json_data):
25+
import pandas as pd
26+
return pd.json_normalize(json_data)
27+
28+
def fetch_data(self, program_name, project_code, node_label, return_data=False):
29+
try:
30+
url = f"{self.api_url}/api/v0/submission/{program_name}/{project_code}/export/?node_label={node_label}&format=json"
31+
response = requests.get(url, headers=self.headers)
32+
print(f"status code: {response.status_code}")
33+
response.raise_for_status() # Ensure any HTTP errors are raised
34+
data = response.json()
35+
36+
# Create a key from program_name, project_code, and node_label
37+
key = f"{program_name}/{project_code}/{node_label}"
38+
39+
# Store the data in the dictionary with the created key
40+
self.data_store[key] = data
41+
42+
if return_data:
43+
return data
44+
else:
45+
print(f"Data for {key} has been fetched and stored.")
46+
except requests.exceptions.HTTPError as http_err:
47+
print(f"HTTP error occurred: {http_err} - Status Code: {response.status_code}")
48+
except Exception as err:
49+
print(f"An error occurred: {err}")
50+
51+
def data_to_pd(self):
52+
import pandas as pd
53+
for key, value in self.data_store.items():
54+
print(f"Converting {key} to pandas dataframe...")
55+
self.data_store_pd[key] = self.json_to_pdf(value['data'])
56+
return

0 commit comments

Comments
 (0)