Source code for jarvis.core.specie

"""Modules related to chemistry of periodic-table elements."""

import os
import json
import numpy as np
import functools
from jarvis.core.utils import digitize_array
from collections import defaultdict
from collections.abc import Iterable
from jarvis.db.jsonutils import loadjson

el_chem_json_file = str(
    os.path.join(os.path.dirname(__file__), "Elements.json")
)
el_chem_json = open(el_chem_json_file, "r")
chem_data = json.load(el_chem_json)
el_chem_json.close()

chem_data_magpie = loadjson(
    os.path.join(os.path.dirname(__file__), "magpie.json")
)

el_chrg_json_file = str(
    os.path.join(os.path.dirname(__file__), "element_charge.json")
)
el_chrg_json = open(el_chrg_json_file, "r")
chrg_data = json.load(el_chrg_json)
el_chrg_json.close()
cgcnn_feature_json = os.path.join(os.path.dirname(__file__), "atom_init.json")


[docs]def get_descrp_arr_name(elm="Al"): """ Get chemical descriptors for an element. Can be used in JARVIS-ML. Args: elm: element name Returns: arr: array value """ arr = [] try: dat = chem_data d = dat[elm] arr = [] for k, v in d.items(): arr.append(k) except Exception: pass return arr
[docs]class Specie(object): """ Specie object for chemistry information. Used in defining chemistry of a material. >>> el = Specie('Al') >>> el.Z 13 >>> round(el.atomic_mass,2) 26.98 >>> el.symbol 'Al' >>> round(el.get_chgdescrp_arr[1],2) 12.17 >>> round(el.get_descrp_arr[1],2) 2792.11 >>> el = Specie('asdfg') >>> el.element_property("asdfg") nan """ def __init__(self, symbol="", source="cfid"): """Initialize with periodic table element.""" self.symbol = symbol if source == "cfid": # Cite reference: # https://doi.org/10.1103/PhysRevMaterials.2.083801 self._data = chem_data elif source == "magpie": # Cite reference: # https://doi.org/10.1038/npjcompumats.2016.28 self._data = chem_data_magpie else: raise ValueError("Option not available.", source) @property def Z(self): """Get atomic number.""" return self.element_property("Z") @property def atomic_mass(self): """Get atomic mass.""" return self.element_property("atom_mass") @property def get_chgdescrp_arr(self): """ Get charge descriptors for an element. Gives 378 array data. Args: elm: element name Returns: arr: array value """ arr = [] # try: arr = chrg_data[self.symbol][0][1] # except: # pass return arr @property def get_descrp_arr(self): """ Get chemical descriptors for an element. Gives 438 array data. Args: elm: element name Returns: arr: array value """ arr = [] # print ('self._data') d = self._data[self.symbol] # d = chem_data[self.symbol] arr = [] for k, v in d.items(): arr.append(v) arr = np.array(arr).astype(float) return arr @property def atomic_rad(self): """Get atomic radii.""" return self.element_property("atom_rad") @property def X(self): """Get electronegativity.""" return self.element_property("X")
[docs] def element_property(self, key=""): """ Get element property from the list of keys. These 84 keys are: keys = [ "is_halogen", "row", "GV", "nfunfill", "C-9", "C-8", "C-7", "C-6", "C-5", "C-4", "C-3", "C-2", "C-1", "C-0", "me1", "me3", "me2", "max_oxid_s", "npvalence", "mp", "first_ion_en", "ndunfill", "op_eg", "jv_enp", "nfvalence", "polzbl", "oq_bg", "atom_rad", "atom_mass", "is_alkali", "C-13", "C-12", "C-11", "C-10", "C-17", "C-16", "C-15", "C-14", "C-19", "C-18", "voro_coord", "is_noble_gas", "e1", "e3", "e2", "is_lanthanoid", "ndvalence", "KV", "min_oxid_s", "nsunfill", "C-26", "X", "is_actinoid", "C-28", "C-29", "C-27", "C-24", "C-25", "C-22", "C-23", "C-20", "C-21", "avg_ion_rad", "nsvalence", "is_metalloid", "elec_aff", "coulmn", "mol_vol", "bp", "C-31", "C-30", "C-33", "C-32", "C-35", "C-34", "is_transition_metal", "block", "therm_cond", "Z", "is_alkaline", "npunfill", "oq_enp", "mop_eg", "hfus", ] """ val = np.nan try: val = self._data[self.symbol][key] except Exception: pass return val
BASIC_FEATURES = [ "Z", "coulmn", "row", "X", "atom_rad", "nsvalence", "npvalence", "ndvalence", "nfvalence", "first_ion_en", "elec_aff", ]
[docs]@functools.lru_cache(maxsize=None) def get_node_attributes(species, atom_features="atomic_number"): """Get specific node features for an element.""" feature_sets = ("atomic_number", "basic", "cfid", "cgcnn") if isinstance(atom_features, str): if atom_features not in feature_sets: raise NotImplementedError( f"atom features must be one of {feature_sets}" ) elif isinstance(atom_features, Iterable): # allow custom list of features for prop in atom_features: if prop not in keys: raise NotImplementedError( f"{prop} not supported in custom atom feature list" ) return [ Specie(species).element_property(prop) for prop in atom_features ] if atom_features == "cfid": return Specie(species).get_descrp_arr elif atom_features == "atomic_number": return [Specie(species).element_property("Z")] elif atom_features == "basic": return [ Specie(species).element_property(prop) for prop in BASIC_FEATURES ] elif atom_features == "cgcnn": # load from json, key by atomic number key = str(Specie(species).element_property("Z")) with open(cgcnn_feature_json, "r") as f: # For alternative features use # get_digitized_feats_hot_encoded() i = json.load(f) try: return i[key] except KeyError: print(f"warning: could not load CGCNN features for {key}") print("Setting it to max atomic number available here, 103") # TODO Check for the error in oqmd_3d_no_cfid dataset # return i['Lr'] return i["100"]
keys = [ "is_halogen", "row", "GV", "nfunfill", "C-9", "C-8", "C-7", "C-6", "C-5", "C-4", "C-3", "C-2", "C-1", "C-0", "me1", "me3", "me2", "max_oxid_s", "npvalence", "mp", "first_ion_en", "ndunfill", "op_eg", "jv_enp", "nfvalence", "polzbl", "oq_bg", "atom_rad", "atom_mass", "is_alkali", "C-13", "C-12", "C-11", "C-10", "C-17", "C-16", "C-15", "C-14", "C-19", "C-18", "voro_coord", "is_noble_gas", "e1", "e3", "e2", "is_lanthanoid", "ndvalence", "KV", "min_oxid_s", "nsunfill", "C-26", "X", "is_actinoid", "C-28", "C-29", "C-27", "C-24", "C-25", "C-22", "C-23", "C-20", "C-21", "avg_ion_rad", "nsvalence", "is_metalloid", "elec_aff", "coulmn", "mol_vol", "bp", "C-31", "C-30", "C-33", "C-32", "C-35", "C-34", "is_transition_metal", "block", "therm_cond", "Z", "is_alkaline", "npunfill", "oq_enp", "mop_eg", "hfus", ]
[docs]def get_specie_data(): """Get the json and key data from Specie.""" return keys, chem_data, chrg_data
[docs]def get_digitized_feats_hot_encoded( feature_names=keys, filename="feats_encoded.json" ): """Get OneHotEncoded features with digitized features.""" from sklearn.preprocessing import OneHotEncoder import pandas as pd encoder = OneHotEncoder(categories="auto", sparse=False) dat = defaultdict() for i, j in chem_data.items(): tmp = defaultdict() for r, s in j.items(): if r in feature_names: tmp[r] = s dat[Specie(i).Z] = tmp # j.values() df = pd.DataFrame(dat) df = df.T.replace(-9999.0, 0).replace(-0.0, 0).astype("float") for i in df.columns: df[i] = digitize_array(df[i]) df = df.T vals = [] for i in range(len(df.values)): output = encoder.fit_transform( np.array(df.values[i], dtype="float").reshape(-1, 1) ) # .toarray() vals.extend(output.T) vals = np.array(vals, dtype="float").T cols = df.columns.tolist() new_dat = {} for i, j in zip(cols, vals): new_dat[int(i)] = list([int(m) for m in j]) if filename is not None: from jarvis.db.jsonutils import dumpjson dumpjson(data=new_dat, filename=filename) return new_dat
[docs]def get_feats_hot_encoded(feature_names=keys, filename="feats_encoded.json"): """Get OneHotEncoded features.""" # Deprecated # Kept for reference only from sklearn.preprocessing import OneHotEncoder import pandas as pd encoder = OneHotEncoder(categories="auto", sparse=False) dat = {} for i, j in chem_data.items(): tmp = [] for r, s in j.items(): if r in feature_names: tmp.append(s) dat[Specie(i).Z] = tmp # j.values() df = pd.DataFrame(dat) vals = [] for i in range(len(df.values)): output = encoder.fit_transform( np.array(df.values[i], dtype="float").reshape(-1, 1) ) # .toarray() vals.extend(output.T) vals = np.array(vals, dtype="float").T cols = df.columns.tolist() new_dat = {} for i, j in zip(cols, vals): new_dat[i] = list(j) if filename is not None: from jarvis.db.jsonutils import dumpjson dumpjson(data=new_dat, filename=filename) return new_dat
x, y, z = get_specie_data() info_z = {} for i, j in y.items(): info_z[j["Z"]] = i
[docs]def atomic_numbers_to_symbols(numbers=[1, 2, 3, 4]): """Convert atomic number array to atomic symbols.""" symbs = [] for i in numbers: symbs.append(info_z[i]) return symbs
# get_digitized_feats_hot_encoded() """ if __name__ == "__main__": el = Specie("Al") #print(el.get_chgdescrp_arr) print(len(el.get_descrp_arr)) #print (get_descrp_arr_name()) """