python source code of pre

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import os, json, re, sys
from glob import glob
from datetime import datetime
from itertools import groupby
import pandas as pd
from mpcontribs.io.core.utils import get_composition_from_string
from mpcontribs.io.core.recdict import RecursiveDict
from mpcontribs.io.core.utils import clean_value, read_csv, nest_dict
from mpcontribs.io.core.components import Table
from mpcontribs.users.utils import duplicate_check
from mpcontribs.users.redox_thermo_csp.utils import redenth_act, get_debye_temp


def get_fit_pars(sample_number):
    import solar_perovskite
    from solar_perovskite.modelling.isographs import Experimental
    from solar_perovskite.init.import_data import Importdata

    max_dgts = 6
    d = RecursiveDict()
    exp = Experimental(sample_number)
    fitparam = exp.get_fit_parameters()
    # fitparam = compstr, delta_0, tolfac, mol_mass, fit_param_enth,
    #            fit_type_entr, fit_param_entr, delta_min, delta_max
    fit_par_ent = [fitparam[6][0], fitparam[6][1], fitparam[1]]
    d["fit_par_ent"] = RecursiveDict(
        (k, clean_value(v, max_dgts=max_dgts)) for k, v in zip("abc", fit_par_ent)
    )
    d["fit_param_enth"] = RecursiveDict(
        (k, clean_value(v, max_dgts=max_dgts)) for k, v in zip("abcd", fitparam[4])
    )
    d["fit_type_entr"] = clean_value(fitparam[5], max_dgts=max_dgts)
    d["delta_0"] = clean_value(fitparam[1], max_dgts=max_dgts)
    d["delta_min"] = clean_value(fitparam[7], max_dgts=max_dgts)
    d["delta_max"] = clean_value(fitparam[8], max_dgts=max_dgts)
    fit_param_fe = pd.np.loadtxt(
        os.path.abspath(
            os.path.join(
                os.path.dirname(solar_perovskite.__file__),
                "datafiles",
                "entropy_fitparam_SrFeOx",
            )
        )
    )
    d["fit_param_fe"] = RecursiveDict(
        (k, clean_value(v, max_dgts=max_dgts)) for k, v in zip("abcd", fit_param_fe)
    )
    imp = Importdata()
    act_mat = imp.find_active(sample_no=sample_number)
    d["act_mat"] = clean_value(act_mat[1], max_dgts=max_dgts)
    fpath = os.path.join(
        os.path.dirname(solar_perovskite.__file__),
        "rawdata",
        "JV_P_{}_H_S_error_advanced.csv".format(sample_number),
    )
    temps = read_csv(open(fpath, "r").read(), usecols=["T"])
    d["t_avg"] = clean_value(pd.to_numeric(temps["T"]).mean(), max_dgts=max_dgts)
    return d


def get_table(results, letter):
    y = "Δ{}".format(letter)
    df = Table(
        RecursiveDict([("δ", results[0]), (y, results[1]), (y + "ₑᵣᵣ", results[2])])
    )
    x0, x1 = map(float, df["δ"].iloc[[0, -1]])
    pad = 0.15 * (x1 - x0)
    mask = (results[3] > x0 - pad) & (results[3] < x1 + pad)
    x, fit = results[3][mask], results[4][mask]
    df.set_index("δ", inplace=True)
    df2 = pd.DataFrame(RecursiveDict([("δ", x), (y + " Fit", fit)]))
    df2.set_index("δ", inplace=True)
    cols = ["δ", y, y + "ₑᵣᵣ", y + " Fit"]
    return (
        pd.concat([df, df2], sort=True)
        .sort_index()
        .reset_index()
        .rename(columns={"index": "δ"})
        .fillna("")[cols]
    )


def add_comp_one(compstr):
    """
    Adds stoichiometries of 1 to compstr that don't have them
    :param compstr:  composition as a string
    :return:         compositon with stoichiometries of 1 added
    """
    sample = pd.np.array(re.sub(r"([A-Z])", r" \1", compstr).split()).astype(str)
    sample = ["".join(g) for _, g in groupby(sample, str.isalpha)]
    samp_new = ""
    for k in range(len(sample)):
        spl_samp = re.sub(r"([A-Z])", r" \1", sample[k]).split()
        for l in range(len(spl_samp)):
            if spl_samp[l][-1].isalpha() and spl_samp[l][-1] != "x":
                spl_samp[l] = spl_samp[l] + "1"
            samp_new += spl_samp[l]
    return samp_new


t_ox_airsep = [350, 400, 450, 500, 600, 700, 800]
t_red_airsep = [600, 700, 800, 900, 1000, 1100, 1200, 1400]
p_ox_airsep = [1e-20, 1e-15, 1e-12, 1e-10, 1e-8, 1e-6, 1e-5, 1e-4, 1e-3]
p_red_airsep = [1e-8, 1e-6, 1e-5, 1e-4, 1e-3, 0.21, 1]
processes = ["AS", "WS", "CS"]
enth_steps = 20
t_ox_ws_cs = [600, 700, 800, 900, 1000, 1050, 1100, 1150]
t_red_ws_cs = [1100, 1200, 1250, 1300, 1350, 1400, 1450, 1500]
p_ox_ws_cs = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
p_red_ws_cs = [1e-6, 1e-5, 1e-3, 0.21, 1]


@duplicate_check
def run(mpfile, **kwargs):
    # TODO clone solar_perovskite if needed, abort if insufficient permissions
    try:
        import solar_perovskite
        from solar_perovskite.core import GetExpThermo
        from solar_perovskite.init.find_structures import FindStructures
        from solar_perovskite.init.import_data import Importdata
        from solar_perovskite.modelling.from_theo import EnthTheo
    except ImportError:
        print("could not import solar_perovskite, clone github repo")
        sys.exit(1)

    input_files = mpfile.hdata.general["input_files"]
    input_dir = os.path.dirname(solar_perovskite.__file__)
    input_file = os.path.join(input_dir, input_files["exp"])
    exp_table = read_csv(open(input_file, "r").read().replace(";", ","))
    print("exp data loaded.")
    with open(os.path.join(input_dir, input_files["theo"]), "r") as f:
        theo_data = json.loads(f.read()).pop("collection")
    print("theo data loaded.")
    with open(input_files["energy"], "r") as f:
        data = json.load(f).pop("collection")
    print("energy data loaded.")
    l = [
        dict(sdoc, parameters=doc["_id"])
        for doc in data
        for sdoc in doc["energy_analysis"]
    ]
    frame = pd.DataFrame(l)
    parameters = frame["parameters"]
    frame.drop(labels=["parameters"], axis=1, inplace=True)
    frame.insert(0, "parameters", parameters)
    print("energy dataframe:", frame.shape)

    mpfile_singles = [m for m in mpfile.split()]
    for mpfile_single in mpfile_singles:
        identifier = mpfile_single.ids[0]
        # if identifier in run.existing_identifiers:
        #    print (not updating', identifier)
        #    continue
        if identifier != "mp-1076585":
            continue
        hdata = mpfile_single.hdata[identifier]
        print(identifier)

        print("add hdata ...")
        d = RecursiveDict()
        d["data"] = RecursiveDict()
        compstr = hdata["pars"]["theo_compstr"]
        row = exp_table.loc[exp_table["theo_compstr"] == compstr]
        if not row.empty:
            sample_number = int(row.iloc[0]["sample_number"])
            d["pars"] = get_fit_pars(sample_number)
            d["data"]["availability"] = "Exp+Theo"
        else:
            d["pars"] = RecursiveDict()
            d["data"]["availability"] = "Theo"
        # print('dh_min, dh_max ...')
        # _, dh_min, dh_max, _ = redenth_act(compstr)
        # d['pars']['dh_min'] = clean_value(dh_min, max_dgts=4)
        # d['pars']['dh_max'] = clean_value(dh_max, max_dgts=4)
        # d['pars']['elastic'] = RecursiveDict()
        # print('debye temps ...')
        # d['pars']['elastic']['debye_temp'] = RecursiveDict()
        # try:
        #    t_d_perov = get_debye_temp(identifier)
        #    t_d_brownm = get_debye_temp(hdata['data']['reduced_phase']['closest-MP'])
        #    tensors_available = 'True'
        # except TypeError:
        #    t_d_perov = get_debye_temp("mp-510624")
        #    t_d_brownm = get_debye_temp("mp-561589")
        #    tensors_available = 'False'
        # d['pars']['elastic']['debye_temp']['perovskite'] = clean_value(t_d_perov, max_dgts=6)
        # d['pars']['elastic']['debye_temp']['brownmillerite'] = clean_value(t_d_brownm, max_dgts=6)
        # d['pars']['elastic']['tensors_available'] = tensors_available
        d["pars"]["last_updated"] = str(datetime.now())
        mpfile_single.add_hierarchical_data(d, identifier=identifier)

        # for process in processes:
        #    if process != "AS":
        #        t_ox_l = t_ox_ws_cs
        #        t_red_l = t_red_ws_cs
        #        p_ox_l = p_ox_ws_cs
        #        p_red_l = p_red_ws_cs
        #        data_source = ["Theo"]
        #    else:
        #        t_ox_l = t_ox_airsep
        #        t_red_l = t_red_airsep
        #        p_ox_l = p_ox_airsep
        #        p_red_l = p_red_airsep
        #        data_source = ["Theo", "Exp"]

        #    for red_temp in t_red_l:
        #        for ox_temp in t_ox_l:
        #            for ox_pr in p_ox_l:
        #                for red_pr in p_red_l:
        #                    for data_sources in data_source:
        #                        db_id = process + "_" + str(float(ox_temp)) + "_" \
        #                                + str(float(red_temp)) + "_" + str(float(ox_pr)) \
        #                                + "_" + str(float(red_pr)) + "_" + data_sources + \
        #                                "_" + str(float(enth_steps))

        print("add energy analysis ...")
        group = frame.query('compstr.str.contains("{}")'.format(compstr[:-1]))
        group.drop(labels="compstr", axis=1, inplace=True)
        for prodstr, subgroup in group.groupby(["prodstr", "prodstr_alt"], sort=False):
            subgroup.drop(labels=["prodstr", "prodstr_alt"], axis=1, inplace=True)
            for unstable, subsubgroup in subgroup.groupby("unstable", sort=False):
                subsubgroup.drop(labels="unstable", axis=1, inplace=True)
                name = "energy-analysis_{}_{}".format(
                    "unstable" if unstable else "stable", "-".join(prodstr)
                )
                print(name)
                mpfile_single.add_data_table(identifier, subsubgroup, name)

        print(mpfile_single)
        mpfile.concat(mpfile_single)
        break

        if not row.empty:
            print("add ΔH ...")
            exp_thermo = GetExpThermo(sample_number, plotting=False)
            enthalpy = exp_thermo.exp_dh()
            table = get_table(enthalpy, "H")
            mpfile_single.add_data_table(identifier, table, name="enthalpy")

            print("add ΔS ...")
            entropy = exp_thermo.exp_ds()
            table = get_table(entropy, "S")
            mpfile_single.add_data_table(identifier, table, name="entropy")

            print("add raw data ...")
            tga_results = os.path.join(
                os.path.dirname(solar_perovskite.__file__), "tga_results"
            )
            for path in glob(
                os.path.join(tga_results, "ExpDat_JV_P_{}_*.csv".format(sample_number))
            ):
                print(path.split("_{}_".format(sample_number))[-1].split(".")[0], "...")
                body = open(path, "r").read()
                cols = ["Time [min]", "Temperature [C]", "dm [%]", "pO2"]
                table = read_csv(
                    body, lineterminator=os.linesep, usecols=cols, skiprows=5
                )
                table = table[cols].iloc[::100, :]
                # scale/shift for better graphs
                T, dm, p = [pd.to_numeric(table[col]) for col in cols[1:]]
                T_min, T_max, dm_min, dm_max, p_max = (
                    T.min(),
                    T.max(),
                    dm.min(),
                    dm.max(),
                    p.max(),
                )
                rT, rdm = abs(T_max - T_min), abs(dm_max - dm_min)
                table[cols[2]] = (dm - dm_min) * rT / rdm
                table[cols[3]] = p * rT / p_max
                table.rename(
                    columns={
                        "dm [%]": "(dm [%] + {:.4g}) * {:.4g}".format(
                            -dm_min, rT / rdm
                        ),
                        "pO2": "pO₂ * {:.4g}".format(rT / p_max),
                    },
                    inplace=True,
                )
                mpfile_single.add_data_table(identifier, table, name="raw")