Source code for natorgms.brutto

#    Copyright 2022 Volikov Alexander <ab.volikov@gmail.com>
#
#    This file is part of natorgms. 
#
#    natorgms is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    natorgms is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with natorgms.  If not, see <http://www.gnu.org/licenses/>.

from typing import Sequence, Optional
from functools import wraps
import numpy as np
import pandas as pd
import copy
from functools import lru_cache
from frozendict import frozendict

def _freeze(func):
    """
    freeze dict in func
    """
    @wraps(func)
    def wrapped(*args, **kwargs):
        args = tuple([frozendict(arg) if isinstance(arg, dict) else arg for arg in args])
        kwargs = {k: frozendict(v) if isinstance(v, dict) else v for k, v in kwargs.items()}
        return func(*args, **kwargs)
    return wrapped

[docs]@_freeze @lru_cache(maxsize=None) def brutto_gen(elems: Optional[dict] = None, rules: bool = True) -> pd.DataFrame: """ Generete brutto formula dataframe Parameters ---------- elems: dict Dictonary with elements and their range for generate brutto table Example: {'C':(1,60),'O_18':(0,3)} - content of carbon (main isotope) from 1 to 59, conent of isotope 18 oxygen from 0 to 2. By default it is {'C':(4, 51),'H':(4, 101),'O':(0,26), 'N':(0,4), 'S':(0,3)} rules: bool Rules: 0.25<H/C<2.2, O/C < 1, nitogen parity, DBE-O <= 10. By default it is on, but for tmds should be off Returns ------- pandas Dataframe Dataframe with masses for elements content """ if elems is None: elems = {'C':(4, 51),'H':(4, 101),'O':(0,26), 'N':(0,4), 'S':(0,3)} #load elements table. Generatete in mass folder elems_mass_table = elements_table() elems_arr = [] elems_dict = {} for el in elems: elems_arr.append(np.array(range(elems[el][0],elems[el][1]))) if '_' not in el: temp = elems_mass_table.loc[elems_mass_table['element']==el].sort_values(by='abundance',ascending=False).reset_index(drop=True) elems_dict[el] = temp.loc[0,'mass'] else: temp = elems_mass_table.loc[elems_mass_table['element_isotop']==el].reset_index(drop=True) elems_dict[el] = temp.loc[0,'mass'] #generate grid with all possible combination of elements in their ranges t = np.array(np.meshgrid(*elems_arr)).T.reshape(-1,len(elems_arr)) gdf = pd.DataFrame(t,columns=list(elems_dict.keys())) #do rules H/C, O/C, and parity if rules: temp = copy.deepcopy(gdf) temp=_merge_isotopes(temp) if 'C' not in temp or 'H' not in temp or 'O' not in temp: raise Exception('For applying rules in brutto must be CHO elements or their isotopes') temp['H/C'] = temp['H']/temp['C'] temp['O/C'] = temp['O']/temp['C'] gdf = gdf.loc[(temp['H/C'] < 2.2) & (temp['H/C'] > 0.25) & (temp['O/C'] < 1)] if 'N' not in temp: temp['N'] = 0 temp['DBE-O'] = 1.0 + temp["C"] - 0.5 * temp["H"] + 0.5 * temp['N'] - temp['O'] gdf = gdf.loc[temp['DBE-O'] <= 10] if 'N' in temp: temp['parity'] = (temp['H'] + temp['N'])%2 gdf = gdf.loc[temp['parity']==0] #calculate mass masses = np.array(list(elems_dict.values())) gdf['mass'] = gdf.multiply(masses).sum(axis=1) gdf['mass'] = np.round(gdf['mass'], 6) gdf = gdf.sort_values("mass").reset_index(drop=True) return gdf
def _merge_isotopes(gdf: pd.DataFrame) -> pd.DataFrame: """ All isotopes will be merged and title as main. Return ------ pandas Dataframe """ for el in gdf: res = el.split('_') if len(res) == 2: if res[0] not in gdf: gdf[res[0]] = 0 gdf[res[0]] = gdf[res[0]] + gdf[el] gdf = gdf.drop(columns=[el]) return gdf
[docs]def get_elements_masses(elems: Sequence[str]) -> np.array : """ Get elements masses from list Parameters ---------- elems: Sequence[str] List of elements. Example: ['C', 'H', 'N', 'C_13', 'O'] Return ------ numpy array """ elements = elements_table() elems_masses = [] for el in elems: if '_' not in el: temp = elements.loc[elements['element']==el].sort_values(by='abundance',ascending=False).reset_index(drop=True) elems_masses.append(temp.loc[0,'mass']) else: temp = elements.loc[elements['element_isotop']==el].reset_index(drop=True) elems_masses.append(temp.loc[0,'mass']) return np.array(elems_masses)
[docs]def gen_from_brutto(table: pd.DataFrame) -> pd.DataFrame: """ Generate mass from brutto table Parameters ---------- table: pandas Dataframe table with elemnt contnent Return ------ pandas DataFrame Dataframe with elements and masses """ masses = get_elements_masses(table.columns) table["calc_mass"] = table.multiply(masses).sum(axis=1) table["calc_mass"] = np.round(table["calc_mass"], 6) table.loc[table["calc_mass"] == 0, "calc_mass"] = np.NaN return table
[docs]def elements_table() -> pd.DataFrame: """ Table with exact mass of element and their isotop abundance Return ------ Pandas DataFrame Dataframe with exact mass of element and their isotop abundance """ return pd.DataFrame( columns = ['element', 'mass', 'abundance', 'isotop', 'element_isotop'], data = [["Al",26.981538,100.0,26,"Al_26"], ["Sb",120.903818,57.21,120,"Sb_120"], ["Sb",122.904216,42.79,122,"Sb_122"], ["Ar",35.967546,0.3365,35,"Ar_35"], ["Ar",37.962732,0.0632,37,"Ar_37"], ["Ar",39.962383,99.6003,39,"Ar_39"], ["As",74.921596,100.0,74,"As_74"], ["Ba",129.90631,0.106,129,"Ba_129"], ["Ba",131.905056,0.101,131,"Ba_131"], ["Ba",133.904503,2.417,133,"Ba_133"], ["Ba",134.905683,6.592,134,"Ba_134"], ["Ba",135.90457,7.854,135,"Ba_135"], ["Ba",136.905821,11.232,136,"Ba_136"], ["Ba",137.905241,71.698,137,"Ba_137"], ["Be",9.012182,100.0,9,"Be_9"], ["Bi",208.980383,100.0,208,"Bi_208"], ["B",10.012937,19.9,10,"B_10"], ["B",11.009305,80.1,11,"B_11"], ["Br",78.918338,50.69,78,"Br_78"], ["Br",80.916291,49.31,80,"Br_80"], ["Cd",105.906458,1.25,105,"Cd_105"], ["Cd",107.904183,0.89,107,"Cd_107"], ["Cd",109.903006,12.49,109,"Cd_109"], ["Cd",110.904182,12.8,110,"Cd_110"], ["Cd",111.902757,24.13,111,"Cd_111"], ["Cd",112.904401,12.22,112,"Cd_112"], ["Cd",113.903358,28.73,113,"Cd_113"], ["Cd",115.904755,7.49,115,"Cd_115"], ["Ca",39.962591,96.941,39,"Ca_39"], ["Ca",41.958618,0.647,41,"Ca_41"], ["Ca",42.958767,0.135,42,"Ca_42"], ["Ca",43.955481,2.086,43,"Ca_43"], ["Ca",45.953693,0.004,45,"Ca_45"], ["Ca",47.952534,0.187,47,"Ca_47"], ["C",12.0,98.93,12,"C_12"], ["C",13.003355,1.07,13,"C_13"], ["Ce",135.907144,0.185,135,"Ce_135"], ["Ce",137.905986,0.251,137,"Ce_137"], ["Ce",139.905434,88.45,139,"Ce_139"], ["Ce",141.90924,11.114,141,"Ce_141"], ["Cs",132.905447,100.0,132,"Cs_132"], ["Cl",34.968853,75.78,34,"Cl_34"], ["Cl",36.965903,24.22,36,"Cl_36"], ["Cr",49.94605,4.345,49,"Cr_49"], ["Cr",51.940512,83.789,51,"Cr_51"], ["Cr",52.940654,9.501,52,"Cr_52"], ["Cr",53.938885,2.365,53,"Cr_53"], ["Co",58.9332,100.0,58,"Co_58"], ["Cu",62.929601,69.17,62,"Cu_62"], ["Cu",64.927794,30.83,64,"Cu_64"], ["Dy",155.924278,0.06,155,"Dy_155"], ["Dy",157.924405,0.1,157,"Dy_157"], ["Dy",159.925194,2.34,159,"Dy_159"], ["Dy",160.92693,18.91,160,"Dy_160"], ["Dy",161.926795,25.51,161,"Dy_161"], ["Dy",162.928728,24.9,162,"Dy_162"], ["Dy",163.929171,28.18,163,"Dy_163"], ["Er",161.928775,0.14,161,"Er_161"], ["Er",163.929197,1.61,163,"Er_163"], ["Er",165.93029,33.61,165,"Er_165"], ["Er",166.932045,22.93,166,"Er_166"], ["Er",167.932368,26.78,167,"Er_167"], ["Er",169.93546,14.93,169,"Er_169"], ["Eu",150.919846,47.81,150,"Eu_150"], ["Eu",152.921226,52.19,152,"Eu_152"], ["F",18.998403,100.0,18,"F_18"], ["Ga",68.925581,60.108,68,"Ga_68"], ["Ga",70.924705,39.892,70,"Ga_70"], ["Gd",151.919788,0.2,151,"Gd_151"], ["Gd",153.920862,2.18,153,"Gd_153"], ["Gd",154.822619,14.8,154,"Gd_154"], ["Gd",155.92212,20.47,155,"Gd_155"], ["Gd",156.923957,15.65,156,"Gd_156"], ["Gd",157.924101,24.84,157,"Gd_157"], ["Gd",159.927051,21.86,159,"Gd_159"], ["Ge",69.92425,20.84,69,"Ge_69"], ["Ge",71.922076,27.54,71,"Ge_71"], ["Ge",72.923459,7.73,72,"Ge_72"], ["Ge",73.921178,36.5,73,"Ge_73"], ["Ge",75.921403,7.61,75,"Ge_75"], ["Au",196.966552,100.0,196,"Au_196"], ["Hf",173.94004,0.16,173,"Hf_173"], ["Hf",175.941402,5.26,175,"Hf_175"], ["Hf",176.94322,18.6,176,"Hf_176"], ["Hf",177.943698,27.28,177,"Hf_177"], ["Hf",178.945815,13.62,178,"Hf_178"], ["Hf",179.946549,35.08,179,"Hf_179"], ["He",3.016029,0.000137,3,"He_3"], ["He",4.002603,99.999863,4,"He_4"], ["Ho",164.930319,100.0,164,"Ho_164"], ["H",1.007825,99.9885,1,"H_1"], ["H",2.014102,0.115,2,"H_2"], ["In",112.904061,4.29,112,"In_112"], ["In",114.903878,95.71,114,"In_114"], ["I",126.904468,100.0,126,"I_126"], ["Ir",190.960591,37.3,190,"Ir_190"], ["Ir",192.962924,62.7,192,"Ir_192"], ["Fe",53.939615,5.845,53,"Fe_53"], ["Fe",55.934942,91.754,55,"Fe_55"], ["Fe",56.935399,2.119,56,"Fe_56"], ["Fe",57.93328,0.282,57,"Fe_57"], ["Kr",77.920386,0.35,77,"Kr_77"], ["Kr",79.916378,2.28,79,"Kr_79"], ["Kr",81.913485,11.58,81,"Kr_81"], ["Kr",82.914136,11.49,82,"Kr_82"], ["Kr",83.911507,57.0,83,"Kr_83"], ["Kr",85.91061,17.3,85,"Kr_85"], ["La",137.907107,0.09,137,"La_137"], ["La",138.906348,99.91,138,"La_138"], ["Pb",203.973029,1.4,203,"Pb_203"], ["Pb",205.974449,24.1,205,"Pb_205"], ["Pb",206.975881,22.1,206,"Pb_206"], ["Pb",207.976636,52.4,207,"Pb_207"], ["Li",6.015122,7.59,6,"Li_6"], ["Li",7.016004,92.41,7,"Li_7"], ["Lu",174.940768,97.41,174,"Lu_174"], ["Lu",175.942682,2.59,175,"Lu_175"], ["Mg",23.985042,78.99,23,"Mg_23"], ["Mg",24.985837,10.0,24,"Mg_24"], ["Mg",25.982593,11.01,25,"Mg_25"], ["Mn",54.93805,100.0,54,"Mn_54"], ["Hg",195.965815,0.15,195,"Hg_195"], ["Hg",197.966752,9.97,197,"Hg_197"], ["Hg",198.968262,16.87,198,"Hg_198"], ["Hg",199.968309,23.1,199,"Hg_199"], ["Hg",200.970285,13.18,200,"Hg_200"], ["Hg",201.970626,29.86,201,"Hg_201"], ["Hg",203.973476,6.87,203,"Hg_203"], ["Mo",91.90681,14.84,91,"Mo_91"], ["Mo",93.905088,9.25,93,"Mo_93"], ["Mo",94.905841,15.92,94,"Mo_94"], ["Mo",95.904679,16.68,95,"Mo_95"], ["Mo",96.906021,9.55,96,"Mo_96"], ["Mo",97.905408,24.13,97,"Mo_97"], ["Mo",99.907477,9.63,99,"Mo_99"], ["Nd",141.907719,27.2,141,"Nd_141"], ["Nd",142.90981,12.2,142,"Nd_142"], ["Nd",143.910083,23.8,143,"Nd_143"], ["Nd",144.912569,8.3,144,"Nd_144"], ["Nd",145.913112,17.2,145,"Nd_145"], ["Nd",147.916889,5.7,147,"Nd_147"], ["Nd",149.920887,5.6,149,"Nd_149"], ["Ne",19.99244,90.48,19,"Ne_19"], ["Ne",20.993847,0.27,20,"Ne_20"], ["Ne",21.991386,9.25,21,"Ne_21"], ["Ni",57.935348,68.0769,57,"Ni_57"], ["Ni",59.930791,26.2231,59,"Ni_59"], ["Ni",60.93106,1.1399,60,"Ni_60"], ["Ni",61.928349,3.6345,61,"Ni_61"], ["Ni",63.92797,0.9256,63,"Ni_63"], ["Nb",92.906378,100.0,92,"Nb_92"], ["N",14.003074,99.632,14,"N_14"], ["N",15.000109,0.368,15,"N_15"], ["Os",183.952491,0.02,183,"Os_183"], ["Os",185.953838,1.59,185,"Os_185"], ["Os",186.955748,1.96,186,"Os_186"], ["Os",187.955836,13.24,187,"Os_187"], ["Os",188.958145,16.15,188,"Os_188"], ["Os",189.958445,26.26,189,"Os_189"], ["Os",191.961479,40.78,191,"Os_191"], ["O",15.994915,99.757,15,"O_15"], ["O",16.999132,0.038,16,"O_16"], ["O",17.99916,0.205,17,"O_17"], ["Pd",101.905608,1.02,101,"Pd_101"], ["Pd",103.904035,11.14,103,"Pd_103"], ["Pd",104.905084,22.33,104,"Pd_104"], ["Pd",105.903483,27.33,105,"Pd_105"], ["Pd",107.903894,26.46,107,"Pd_107"], ["Pd",109.905152,11.72,109,"Pd_109"], ["P",30.973762,100.0,30,"P_30"], ["Pt",189.95993,0.014,189,"Pt_189"], ["Pt",191.961035,0.782,191,"Pt_191"], ["Pt",193.962664,32.967,193,"Pt_193"], ["Pt",194.964774,33.832,194,"Pt_194"], ["Pt",195.964935,25.242,195,"Pt_195"], ["Pt",197.967876,7.163,197,"Pt_197"], ["K",38.963707,93.2581,38,"K_38"], ["K",39.963999,0.0117,39,"K_39"], ["K",40.961826,6.7302,40,"K_40"], ["Pr",140.907648,100.0,140,"Pr_140"], ["Re",184.952956,37.4,184,"Re_184"], ["Re",186.955751,62.6,186,"Re_186"], ["Rh",102.905504,100.0,102,"Rh_102"], ["Rb",84.911789,72.17,84,"Rb_84"], ["Rb",86.909183,27.83,86,"Rb_86"], ["Ru",95.907598,5.54,95,"Ru_95"], ["Ru",97.905287,1.87,97,"Ru_97"], ["Ru",98.905939,12.76,98,"Ru_98"], ["Ru",99.90422,12.6,99,"Ru_99"], ["Ru",100.905582,17.06,100,"Ru_100"], ["Ru",101.90435,31.55,101,"Ru_101"], ["Ru",103.90543,18.62,103,"Ru_103"], ["Sm",143.911995,3.07,143,"Sm_143"], ["Sm",146.914893,14.99,146,"Sm_146"], ["Sm",147.914818,11.24,147,"Sm_147"], ["Sm",148.91718,13.82,148,"Sm_148"], ["Sm",149.917271,7.38,149,"Sm_149"], ["Sm",151.919728,26.75,151,"Sm_151"], ["Sm",153.922205,22.75,153,"Sm_153"], ["Sc",44.95591,100.0,44,"Sc_44"], ["Se",73.922477,0.89,73,"Se_73"], ["Se",75.919214,9.37,75,"Se_75"], ["Se",76.919915,7.63,76,"Se_76"], ["Se",77.91731,23.77,77,"Se_77"], ["Se",79.916522,49.61,79,"Se_79"], ["Se",81.9167,8.73,81,"Se_81"], ["Si",27.976927,92.2297,27,"Si_27"], ["Si",28.976495,4.6832,28,"Si_28"], ["Si",29.97377,3.0872,29,"Si_29"], ["Ag",106.905093,51.839,106,"Ag_106"], ["Ag",108.904756,48.161,108,"Ag_108"], ["Na",22.98977,100.0,22,"Na_22"], ["Sr",83.913425,0.56,83,"Sr_83"], ["Sr",85.909262,9.86,85,"Sr_85"], ["Sr",86.908879,7.0,86,"Sr_86"], ["Sr",87.905614,82.58,87,"Sr_87"], ["S",31.972071,94.93,31,"S_31"], ["S",32.971458,0.76,32,"S_32"], ["S",33.967867,4.29,33,"S_33"], ["S",35.967081,0.02,35,"S_35"], ["Ta",179.947466,0.012,179,"Ta_179"], ["Ta",180.947996,99.988,180,"Ta_180"], ["Te",119.90402,0.09,119,"Te_119"], ["Te",121.903047,2.55,121,"Te_121"], ["Te",122.904273,0.89,122,"Te_122"], ["Te",123.902819,4.74,123,"Te_123"], ["Te",124.904425,7.07,124,"Te_124"], ["Te",125.903306,18.84,125,"Te_125"], ["Te",127.904461,31.74,127,"Te_127"], ["Te",129.906223,34.08,129,"Te_129"], ["Tb",158.925343,100.0,158,"Tb_158"], ["Tl",202.972329,29.524,202,"Tl_202"], ["Tl",204.974412,70.476,204,"Tl_204"], ["Th",232.03805,100.0,232,"Th_232"], ["Tm",168.934211,100.0,168,"Tm_168"], ["Sn",111.904821,0.97,111,"Sn_111"], ["Sn",113.902782,0.66,113,"Sn_113"], ["Sn",114.903346,0.34,114,"Sn_114"], ["Sn",115.901744,14.54,115,"Sn_115"], ["Sn",116.902954,7.68,116,"Sn_116"], ["Sn",117.901606,24.22,117,"Sn_117"], ["Sn",118.903309,8.59,118,"Sn_118"], ["Sn",119.902197,32.58,119,"Sn_119"], ["Sn",121.90344,4.63,121,"Sn_121"], ["Sn",123.905275,5.79,123,"Sn_123"], ["Ti",45.952629,8.25,45,"Ti_45"], ["Ti",46.951764,7.44,46,"Ti_46"], ["Ti",47.947871,73.72,47,"Ti_47"], ["W",179.946706,0.12,179,"W_179"], ["W",181.948206,26.5,181,"W_181"], ["W",182.950224,14.31,182,"W_182"], ["W",183.950933,30.64,183,"W_183"], ["W",185.954362,28.43,185,"W_185"], ["U",234.040946,0.0055,234,"U_234"], ["U",235.043923,0.72,235,"U_235"], ["U",238.050783,99.2745,238,"U_238"], ["V",49.947163,0.25,49,"V_49"], ["V",50.943964,99.75,50,"V_50"], ["Xe",123.905896,0.09,123,"Xe_123"], ["Xe",125.904269,0.09,125,"Xe_125"], ["Xe",127.90353,1.92,127,"Xe_127"], ["Xe",128.904779,26.44,128,"Xe_128"], ["Xe",129.903508,10.44,129,"Xe_129"], ["Xe",130.905082,21.18,130,"Xe_130"], ["Xe",131.904154,26.89,131,"Xe_131"], ["Xe",133.905395,10.44,133,"Xe_133"], ["Xe",135.90722,8.87,135,"Xe_135"], ["Yb",167.933894,0.13,167,"Yb_167"], ["Yb",169.934759,3.04,169,"Yb_169"], ["Yb",170.936322,14.28,170,"Yb_170"], ["Yb",171.936378,21.83,171,"Yb_171"], ["Yb",172.938207,16.13,172,"Yb_172"], ["Yb",173.938858,31.83,173,"Yb_173"], ["Yb",175.942568,12.76,175,"Yb_175"], ["Y",88.905848,100.0,88,"Y_88"], ["Zn",63.929147,48.63,63,"Zn_63"], ["Zn",65.926037,27.9,65,"Zn_65"], ["Zn",66.927131,4.1,66,"Zn_66"], ["Zn",67.924848,18.75,67,"Zn_67"], ["Zn",69.925325,0.62,69,"Zn_69"], ["Zr",89.904704,51.45,89,"Zr_89"], ["Zr",90.905645,11.22,90,"Zr_90"], ["Zr",91.90504,17.15,91,"Zr_91"], ["Zr",93.906316,17.38,93,"Zr_93"], ["Zr",95.908276,2.8,95,"Zr_95"]])
if __name__ == '__main__': print(brutto_gen())