|
| 1 | +#!/usr/bin/env python2 |
| 2 | +""" |
| 3 | +Given the folder with the POSCARs, this script employs two predictive ML models: |
| 4 | +* AFLOW-ML PLMF (http://aflowlib.org/aflow-ml) |
| 5 | +* MPDS ML (http://mpds.io/ml) |
| 6 | +validating them using the experimental data from the core MPDS database |
| 7 | +(a subscription is required), and outputs a CSV table for a detailed comparison |
| 8 | +""" |
| 9 | +from __future__ import division |
| 10 | +import os |
| 11 | +import sys |
| 12 | +import time |
| 13 | + |
| 14 | +import httplib2 |
| 15 | +import numpy as np |
| 16 | +from ase.units import _Nav, _k |
| 17 | +from mpds_client import MPDSDataRetrieval, APIError |
| 18 | + |
| 19 | +from mpds_ml_labs.prediction import prop_models |
| 20 | +from mpds_ml_labs.struct_utils import detect_format, poscar_to_ase, refine, get_formula, sgn_to_crsystem |
| 21 | +from mpds_ml_labs.common import API_KEY, API_ENDPOINT, make_request |
| 22 | +from mpds_ml_labs.aflowml_client import AFLOWmlAPI |
| 23 | + |
| 24 | + |
| 25 | +RESULT_FILE = 'aflow_mpds_comparison_070219.csv' |
| 26 | +LABS_SERVER_ADDR = 'https://labs.mpds.io/predict' # http://127.0.0.1:5000/predict |
| 27 | +MPDS_AFLOW_CORR = { |
| 28 | + 'z': 'ml_ael_bulk_modulus_vrh', |
| 29 | + 'd': 'ml_agl_debye', |
| 30 | + 't': 'ml_agl_thermal_expansion_300K', |
| 31 | + 'x': 'ml_agl_heat_capacity_Cp_300K', |
| 32 | + 'w': 'ml_egap' |
| 33 | +} |
| 34 | + |
| 35 | +def kbcell_to_jkmol(value, n_at_cell): |
| 36 | + return value * _k * _Nav / n_at_cell |
| 37 | + |
| 38 | +assert not os.path.exists(RESULT_FILE) |
| 39 | + |
| 40 | +try: |
| 41 | + given = sys.argv[1] |
| 42 | +except IndexError: |
| 43 | + sys.exit("Structure file or folder with files must be given!") |
| 44 | + |
| 45 | +tasks = [] |
| 46 | +if os.path.isdir(given): |
| 47 | + for filename in os.listdir(given): |
| 48 | + if not os.path.isfile(given + os.sep + filename): |
| 49 | + continue |
| 50 | + tasks.append(given + os.sep + filename) |
| 51 | +else: |
| 52 | + tasks.append(given) |
| 53 | + |
| 54 | +mpds_ml_remote = httplib2.Http() |
| 55 | +mpds_api = MPDSDataRetrieval(api_key=API_KEY, endpoint=API_ENDPOINT, verbose=False) |
| 56 | +aflowml = AFLOWmlAPI() |
| 57 | +result_db = [] |
| 58 | + |
| 59 | +start_time = time.time() |
| 60 | + |
| 61 | +for task in tasks: |
| 62 | + title = task.split(os.sep)[-1] |
| 63 | + structure = open(task).read() |
| 64 | + if detect_format(structure) != 'poscar': |
| 65 | + continue |
| 66 | + ase_obj, error = poscar_to_ase(structure) |
| 67 | + if error: |
| 68 | + continue |
| 69 | + if 'disordered' in ase_obj.info: |
| 70 | + continue |
| 71 | + ase_obj, error = refine(ase_obj) |
| 72 | + if error: |
| 73 | + continue |
| 74 | + formula, n_atoms_cell = get_formula(ase_obj), len(ase_obj) |
| 75 | + |
| 76 | + print("*"*20 + ("%s %s, %s" % (title, formula, n_atoms_cell)) + "*"*20) |
| 77 | + |
| 78 | + tpl_query = { |
| 79 | + 'formulae': formula, |
| 80 | + 'lattices': sgn_to_crsystem(ase_obj.info['spacegroup'].no) |
| 81 | + } |
| 82 | + |
| 83 | + results_conductor = 0 |
| 84 | + try: |
| 85 | + outdf = mpds_api.get_dataframe(dict(classes='conductor', **tpl_query), fields={'P': [ |
| 86 | + 'sample.measurement[0].condition[0].name', |
| 87 | + 'sample.measurement[0].condition[0].scalar', |
| 88 | + 'sample.measurement[0].condition[0].units' |
| 89 | + ], 'S': [ # NB mockup, temperature to be released for S-entries soon |
| 90 | + lambda: 'Temperature', |
| 91 | + lambda: 300, |
| 92 | + lambda: 'K' |
| 93 | + ]}, columns=['Cname', 'Cvalue', 'Cunits']) |
| 94 | + to_drop = outdf[ |
| 95 | + (outdf['Cname'] == 'Temperature') & (outdf['Cunits'] == 'K') & ((outdf['Cvalue'] < 200) | (outdf['Cvalue'] > 400)) |
| 96 | + ] |
| 97 | + outdf.drop(to_drop.index, inplace=True) |
| 98 | + results_conductor = len(outdf) |
| 99 | + except APIError: |
| 100 | + pass |
| 101 | + |
| 102 | + time.sleep(1) |
| 103 | + |
| 104 | + mpds_output = make_request(mpds_ml_remote, LABS_SERVER_ADDR, {'structure': structure}) |
| 105 | + if 'error' in mpds_output: |
| 106 | + continue |
| 107 | + |
| 108 | + aflow_output = aflowml.get_prediction(structure, 'plmf') |
| 109 | + |
| 110 | + for prop_id in MPDS_AFLOW_CORR.keys(): |
| 111 | + try: |
| 112 | + outdf = mpds_api.get_dataframe(dict(props=prop_models[prop_id]['name'], **tpl_query), fields={'P': [ |
| 113 | + 'sample.material.chemical_formula', |
| 114 | + 'sample.material.phase_id', |
| 115 | + 'sample.measurement[0].property.scalar', |
| 116 | + 'sample.measurement[0].property.units', |
| 117 | + 'sample.measurement[0].condition[0].units', |
| 118 | + 'sample.measurement[0].condition[0].name', |
| 119 | + 'sample.measurement[0].condition[0].scalar' |
| 120 | + ]}, columns=['Compound', 'Phase', 'Value', 'Units', 'Cunits', 'Cname', 'Cvalue']) |
| 121 | + except APIError as e: |
| 122 | + prop_models[prop_id]['factual'] = None |
| 123 | + if e.code != 204: # NB standard code for the empty result |
| 124 | + print("While checking against the MPDS an error %s occured" % e.code) |
| 125 | + continue |
| 126 | + |
| 127 | + outdf = outdf[outdf['Units'] == prop_models[prop_id]['units']] |
| 128 | + outdf = outdf[ |
| 129 | + (outdf['Value'] > prop_models[prop_id]['interval'][0]) & \ |
| 130 | + (outdf['Value'] < prop_models[prop_id]['interval'][1]) |
| 131 | + ] |
| 132 | + if prop_id not in ['m', 'd']: |
| 133 | + to_drop = outdf[ |
| 134 | + (outdf['Cname'] == 'Temperature') & (outdf['Cunits'] == 'K') & ((outdf['Cvalue'] < 200) | (outdf['Cvalue'] > 400)) |
| 135 | + ] |
| 136 | + outdf.drop(to_drop.index, inplace=True) |
| 137 | + if outdf.empty: |
| 138 | + prop_models[prop_id]['factual'] = None |
| 139 | + continue |
| 140 | + outdf['Value'] = outdf['Value'].astype('float64') # NB to treat values out of JSON bounds given as str |
| 141 | + prop_models[prop_id]['factual'] = np.median(outdf['Value']) |
| 142 | + |
| 143 | + # units conversion |
| 144 | + mpds_output['prediction']['t']['value'] /= 100000 |
| 145 | + aflow_output[MPDS_AFLOW_CORR['x']] = kbcell_to_jkmol(aflow_output[MPDS_AFLOW_CORR['x']], n_atoms_cell) |
| 146 | + |
| 147 | + # remark on conductivity |
| 148 | + results_insulator = prop_models['w']['factual'] and np.isfinite(prop_models['w']['factual']) |
| 149 | + if results_insulator and results_conductor: |
| 150 | + remark = 'Semiconductor' |
| 151 | + elif results_insulator: |
| 152 | + remark = 'Insulator' |
| 153 | + elif results_conductor: |
| 154 | + remark = 'Conductor' |
| 155 | + else: |
| 156 | + remark = 'Unknown' |
| 157 | + |
| 158 | + result_db.append([ |
| 159 | + title, formula, n_atoms_cell, |
| 160 | + prop_models['z']['name'], prop_models['z']['factual'], aflow_output[MPDS_AFLOW_CORR['z']], mpds_output['prediction']['z']['value'], '', '', |
| 161 | + prop_models['d']['name'], prop_models['d']['factual'], aflow_output[MPDS_AFLOW_CORR['d']], mpds_output['prediction']['d']['value'], '', '', |
| 162 | + prop_models['t']['name'], prop_models['t']['factual'], aflow_output[MPDS_AFLOW_CORR['t']], mpds_output['prediction']['t']['value'], '', '', |
| 163 | + prop_models['x']['name'], prop_models['x']['factual'], aflow_output[MPDS_AFLOW_CORR['x']], mpds_output['prediction']['x']['value'], '', '', |
| 164 | + prop_models['w']['name'], prop_models['w']['factual'], aflow_output[MPDS_AFLOW_CORR['w']], mpds_output['prediction']['w']['value'], '', '', |
| 165 | + remark |
| 166 | + ]) |
| 167 | + |
| 168 | +print("Done in %1.2f sc" % (time.time() - start_time)) |
| 169 | + |
| 170 | +f_result = open(RESULT_FILE, "w") |
| 171 | +for row in result_db: |
| 172 | + f_result.write(",".join([str(item) for item in row]) + "\n") |
| 173 | +f_result.close() |
0 commit comments