Skip to content
This repository was archived by the owner on Jul 20, 2025. It is now read-only.

Commit 24a2bf1

Browse files
committed
Add a detailed comparison with the AFLOW-ML model
1 parent 0bd2d10 commit 24a2bf1

File tree

2 files changed

+389
-0
lines changed

2 files changed

+389
-0
lines changed

cmp_ml_aflow_mpds.py

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
#!/usr/bin/env python2
2+
"""
3+
Given the folder with the POSCARs, this script employs two predictive ML models:
4+
* AFLOW-ML PLMF (http://aflowlib.org/aflow-ml)
5+
* MPDS ML (http://mpds.io/ml)
6+
validating them using the experimental data from the core MPDS database
7+
(a subscription is required), and outputs a CSV table for a detailed comparison
8+
"""
9+
from __future__ import division
10+
import os
11+
import sys
12+
import time
13+
14+
import httplib2
15+
import numpy as np
16+
from ase.units import _Nav, _k
17+
from mpds_client import MPDSDataRetrieval, APIError
18+
19+
from mpds_ml_labs.prediction import prop_models
20+
from mpds_ml_labs.struct_utils import detect_format, poscar_to_ase, refine, get_formula, sgn_to_crsystem
21+
from mpds_ml_labs.common import API_KEY, API_ENDPOINT, make_request
22+
from mpds_ml_labs.aflowml_client import AFLOWmlAPI
23+
24+
25+
RESULT_FILE = 'aflow_mpds_comparison_070219.csv'
26+
LABS_SERVER_ADDR = 'https://labs.mpds.io/predict' # http://127.0.0.1:5000/predict
27+
MPDS_AFLOW_CORR = {
28+
'z': 'ml_ael_bulk_modulus_vrh',
29+
'd': 'ml_agl_debye',
30+
't': 'ml_agl_thermal_expansion_300K',
31+
'x': 'ml_agl_heat_capacity_Cp_300K',
32+
'w': 'ml_egap'
33+
}
34+
35+
def kbcell_to_jkmol(value, n_at_cell):
36+
return value * _k * _Nav / n_at_cell
37+
38+
assert not os.path.exists(RESULT_FILE)
39+
40+
try:
41+
given = sys.argv[1]
42+
except IndexError:
43+
sys.exit("Structure file or folder with files must be given!")
44+
45+
tasks = []
46+
if os.path.isdir(given):
47+
for filename in os.listdir(given):
48+
if not os.path.isfile(given + os.sep + filename):
49+
continue
50+
tasks.append(given + os.sep + filename)
51+
else:
52+
tasks.append(given)
53+
54+
mpds_ml_remote = httplib2.Http()
55+
mpds_api = MPDSDataRetrieval(api_key=API_KEY, endpoint=API_ENDPOINT, verbose=False)
56+
aflowml = AFLOWmlAPI()
57+
result_db = []
58+
59+
start_time = time.time()
60+
61+
for task in tasks:
62+
title = task.split(os.sep)[-1]
63+
structure = open(task).read()
64+
if detect_format(structure) != 'poscar':
65+
continue
66+
ase_obj, error = poscar_to_ase(structure)
67+
if error:
68+
continue
69+
if 'disordered' in ase_obj.info:
70+
continue
71+
ase_obj, error = refine(ase_obj)
72+
if error:
73+
continue
74+
formula, n_atoms_cell = get_formula(ase_obj), len(ase_obj)
75+
76+
print("*"*20 + ("%s %s, %s" % (title, formula, n_atoms_cell)) + "*"*20)
77+
78+
tpl_query = {
79+
'formulae': formula,
80+
'lattices': sgn_to_crsystem(ase_obj.info['spacegroup'].no)
81+
}
82+
83+
results_conductor = 0
84+
try:
85+
outdf = mpds_api.get_dataframe(dict(classes='conductor', **tpl_query), fields={'P': [
86+
'sample.measurement[0].condition[0].name',
87+
'sample.measurement[0].condition[0].scalar',
88+
'sample.measurement[0].condition[0].units'
89+
], 'S': [ # NB mockup, temperature to be released for S-entries soon
90+
lambda: 'Temperature',
91+
lambda: 300,
92+
lambda: 'K'
93+
]}, columns=['Cname', 'Cvalue', 'Cunits'])
94+
to_drop = outdf[
95+
(outdf['Cname'] == 'Temperature') & (outdf['Cunits'] == 'K') & ((outdf['Cvalue'] < 200) | (outdf['Cvalue'] > 400))
96+
]
97+
outdf.drop(to_drop.index, inplace=True)
98+
results_conductor = len(outdf)
99+
except APIError:
100+
pass
101+
102+
time.sleep(1)
103+
104+
mpds_output = make_request(mpds_ml_remote, LABS_SERVER_ADDR, {'structure': structure})
105+
if 'error' in mpds_output:
106+
continue
107+
108+
aflow_output = aflowml.get_prediction(structure, 'plmf')
109+
110+
for prop_id in MPDS_AFLOW_CORR.keys():
111+
try:
112+
outdf = mpds_api.get_dataframe(dict(props=prop_models[prop_id]['name'], **tpl_query), fields={'P': [
113+
'sample.material.chemical_formula',
114+
'sample.material.phase_id',
115+
'sample.measurement[0].property.scalar',
116+
'sample.measurement[0].property.units',
117+
'sample.measurement[0].condition[0].units',
118+
'sample.measurement[0].condition[0].name',
119+
'sample.measurement[0].condition[0].scalar'
120+
]}, columns=['Compound', 'Phase', 'Value', 'Units', 'Cunits', 'Cname', 'Cvalue'])
121+
except APIError as e:
122+
prop_models[prop_id]['factual'] = None
123+
if e.code != 204: # NB standard code for the empty result
124+
print("While checking against the MPDS an error %s occured" % e.code)
125+
continue
126+
127+
outdf = outdf[outdf['Units'] == prop_models[prop_id]['units']]
128+
outdf = outdf[
129+
(outdf['Value'] > prop_models[prop_id]['interval'][0]) & \
130+
(outdf['Value'] < prop_models[prop_id]['interval'][1])
131+
]
132+
if prop_id not in ['m', 'd']:
133+
to_drop = outdf[
134+
(outdf['Cname'] == 'Temperature') & (outdf['Cunits'] == 'K') & ((outdf['Cvalue'] < 200) | (outdf['Cvalue'] > 400))
135+
]
136+
outdf.drop(to_drop.index, inplace=True)
137+
if outdf.empty:
138+
prop_models[prop_id]['factual'] = None
139+
continue
140+
outdf['Value'] = outdf['Value'].astype('float64') # NB to treat values out of JSON bounds given as str
141+
prop_models[prop_id]['factual'] = np.median(outdf['Value'])
142+
143+
# units conversion
144+
mpds_output['prediction']['t']['value'] /= 100000
145+
aflow_output[MPDS_AFLOW_CORR['x']] = kbcell_to_jkmol(aflow_output[MPDS_AFLOW_CORR['x']], n_atoms_cell)
146+
147+
# remark on conductivity
148+
results_insulator = prop_models['w']['factual'] and np.isfinite(prop_models['w']['factual'])
149+
if results_insulator and results_conductor:
150+
remark = 'Semiconductor'
151+
elif results_insulator:
152+
remark = 'Insulator'
153+
elif results_conductor:
154+
remark = 'Conductor'
155+
else:
156+
remark = 'Unknown'
157+
158+
result_db.append([
159+
title, formula, n_atoms_cell,
160+
prop_models['z']['name'], prop_models['z']['factual'], aflow_output[MPDS_AFLOW_CORR['z']], mpds_output['prediction']['z']['value'], '', '',
161+
prop_models['d']['name'], prop_models['d']['factual'], aflow_output[MPDS_AFLOW_CORR['d']], mpds_output['prediction']['d']['value'], '', '',
162+
prop_models['t']['name'], prop_models['t']['factual'], aflow_output[MPDS_AFLOW_CORR['t']], mpds_output['prediction']['t']['value'], '', '',
163+
prop_models['x']['name'], prop_models['x']['factual'], aflow_output[MPDS_AFLOW_CORR['x']], mpds_output['prediction']['x']['value'], '', '',
164+
prop_models['w']['name'], prop_models['w']['factual'], aflow_output[MPDS_AFLOW_CORR['w']], mpds_output['prediction']['w']['value'], '', '',
165+
remark
166+
])
167+
168+
print("Done in %1.2f sc" % (time.time() - start_time))
169+
170+
f_result = open(RESULT_FILE, "w")
171+
for row in result_db:
172+
f_result.write(",".join([str(item) for item in row]) + "\n")
173+
f_result.close()

mpds_ml_labs/aflowml_client.py

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
"""
2+
Taken from the AFLOW webpage
3+
http://aflow.org/src/aflow-ml
4+
due to absense of the PyPI package
5+
"""
6+
import json
7+
import sys
8+
from time import sleep
9+
10+
# Import proper urllib versions depending on Python version
11+
if sys.version_info >= (3,0):
12+
from urllib.parse import urlencode
13+
from urllib.request import urlopen
14+
from urllib.request import Request
15+
from urllib.error import HTTPError
16+
else:
17+
from urllib2 import Request
18+
from urllib2 import urlopen
19+
from urllib import urlencode
20+
from urllib2 import HTTPError
21+
22+
23+
class AFLOWmlAPIError(Exception):
24+
def __init__(self, error_message, status_code=None):
25+
self.status_code = status_code
26+
self.error_message = error_message
27+
28+
def __str__(self):
29+
if self.status_code:
30+
return '(%s) %s' % (self.status_code, self.error_message)
31+
else:
32+
return self.error_message
33+
34+
def urlencoder(query):
35+
if sys.version_info >= (3,0):
36+
return urlencode(query).encode('utf-8')
37+
else:
38+
return urlencode(query)
39+
40+
def json_loader(content):
41+
if sys.version_info >= (3,0):
42+
return json.loads(content.decode('utf-8'))
43+
else:
44+
return json.loads(content)
45+
46+
47+
class AFLOWmlAPI:
48+
49+
def __init__(self):
50+
self._base_url = 'http://aflow.org/API/aflow-ml/v1.1'
51+
self.res_data = {}
52+
self.model = None
53+
self.supported_models = [
54+
'plmf',
55+
'mfd',
56+
'asc'
57+
]
58+
self.plmf_fields = [
59+
'ml_egap_type',
60+
'ml_egap',
61+
'ml_energy_per_atom',
62+
'ml_ael_bulk_modulus_vrh',
63+
'ml_ael_shear_modulus_vrh',
64+
'ml_agl_debye',
65+
'ml_agl_heat_capacity_Cp_300K',
66+
'ml_agl_heat_capacity_Cp_300K_per_atom',
67+
'ml_agl_heat_capacity_Cv_300K',
68+
'ml_agl_heat_capacity_Cv_300K_per_atom',
69+
'ml_agl_thermal_conductivity_300K',
70+
'ml_agl_thermal_expansion_300K'
71+
]
72+
self.mfd_fields = [
73+
'ml_Cv',
74+
'ml_Fvib',
75+
'ml_Svib'
76+
]
77+
78+
self.asc_fields = [
79+
'ml_Tc_5K',
80+
'ml_Tc_10K'
81+
]
82+
83+
def submit_job(self, post_data, model):
84+
'''
85+
Post the contents of post_data to the API endpoint
86+
<model>/prediction.
87+
88+
Returns the task id used to poll the job.
89+
90+
Throws AFLOWmlError if invalid model, HTTPError or invalid response.
91+
'''
92+
if model not in self.supported_models:
93+
raise AFLOWmlAPIError(
94+
'The model you specified is not valid. Please select from' +
95+
' the following: \n' + '\n'.join(
96+
[' ' + s for s in self.supported_models]
97+
)
98+
)
99+
self.model = model
100+
if model != 'asc':
101+
encoded_data = urlencoder({
102+
'file': post_data,
103+
})
104+
else:
105+
encoded_data = urlencoder({
106+
'composition': post_data
107+
})
108+
url = self._base_url + '/' + self.model + '/prediction'
109+
req = Request(url, encoded_data)
110+
res = None
111+
try:
112+
res = urlopen(req).read()
113+
except HTTPError as e:
114+
raise AFLOWmlAPIError(
115+
'Failed to submit job: {}'.format(e.code)
116+
)
117+
118+
res_json = None
119+
try:
120+
res_json = json_loader(res)
121+
except ValueError:
122+
raise AFLOWmlAPIError(
123+
'Unable to parse response, invalid JSON'
124+
)
125+
126+
self.res_data = {}
127+
return res_json['id']
128+
129+
def poll_job(self, job_id, fields=[]):
130+
'''
131+
From the job id, polls the API enpoint /prediction/result/<job_id> to
132+
check the status of the job. Polls until status = SUCCESS.
133+
134+
Returns prediction object as a dictionary.
135+
136+
Throws AFLOWmlAPIError if unable to poll job, status = FAILURE,
137+
HTTPError or invalid response.
138+
'''
139+
if fields:
140+
valid_field = False
141+
if self.model == 'plmf':
142+
valid_field = set(fields).issubset(set(self.plmf_fields))
143+
if self.model == 'mfd':
144+
valid_field = set(fields).issubset(set(self.mfd_fields))
145+
if self.model == 'asc':
146+
valid_field = set(fields).issubset(set(self.asc_fields))
147+
if not valid_field:
148+
raise AFLOWmlAPIError(
149+
'invalid fields specified, must be from the following \n' +
150+
' plmf: ' + ', '.join(self.plmf_fields) + '\n' +
151+
' mfd: ' + ', '.join(self.mfd_fields) + '\n' +
152+
' asc: ' + ', '.join(self.asc_fields) + '\n'
153+
)
154+
else:
155+
if self.model == 'plmf':
156+
fields = self.plmf_fields
157+
if self.model == 'mfd':
158+
fields = self.mfd_fields
159+
if self.model == 'asc':
160+
fields = self.asc_fields
161+
162+
if self.model is None:
163+
raise AFLOWmlAPIError(
164+
'The ML model has not been specified. Please make sure' +
165+
' to call the submit_job method before polling.'
166+
)
167+
168+
url = self._base_url + '/prediction/result/' + job_id
169+
req = Request(url)
170+
res = None
171+
172+
try:
173+
res = urlopen(req).read()
174+
except HTTPError as e:
175+
raise AFLOWmlAPIError(
176+
'Failed to poll job: {}'.format(job_id),
177+
status_code=e.code
178+
)
179+
180+
res_json = None
181+
try:
182+
res_json = json_loader(res)
183+
except ValueError:
184+
raise AFLOWmlAPIError(
185+
'Unable to parse response, invalid JSON'
186+
)
187+
188+
if res_json['status'] == 'SUCCESS':
189+
self.res_data = {key: res_json[key] for key in fields}
190+
return self.res_data
191+
elif res_json['status'] == 'PENDING':
192+
sleep(3)
193+
return self.poll_job(job_id, fields=fields)
194+
elif res_json['status'] == 'STARTED':
195+
sleep(10)
196+
return self.poll_job(job_id, fields=fields)
197+
elif res_json['status'] == 'FAILURE':
198+
raise AFLOWmlAPIError(
199+
'The job has failed, please make sure you have a ' +
200+
'valid POSCAR, composition or job id'
201+
)
202+
else:
203+
raise AFLOWmlAPIError(
204+
'Failed to poll job: {}'.format(job_id)
205+
)
206+
207+
def get_prediction(self, post_data, model, fields=[]):
208+
'''
209+
Calls submit_job and poll_job methods to get a prediction.
210+
211+
Takes the contents of post_data and the model as arguements.
212+
213+
Returns the prediction results as a dictionary.
214+
'''
215+
job_id = self.submit_job(post_data, model)
216+
return self.poll_job(job_id, fields=fields)

0 commit comments

Comments
 (0)