Python pandas.DataFrame() Examples

The following are 30 code examples of pandas.DataFrame(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function

Example #1

Source File: technical_indicators.py From pandas-technical-indicators with MIT License

27 votes

def average_true_range(df, n):
    """
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    i = 0
    TR_l = [0]
    while i < df.index[-1]:
        TR = max(df.loc[i + 1, 'High'], df.loc[i, 'Close']) - min(df.loc[i + 1, 'Low'], df.loc[i, 'Close'])
        TR_l.append(TR)
        i = i + 1
    TR_s = pd.Series(TR_l)
    ATR = pd.Series(TR_s.ewm(span=n, min_periods=n).mean(), name='ATR_' + str(n))
    df = df.join(ATR)
    return df

Example #2

Source File: display_methods.py From indras_net with GNU General Public License v3.0

8 votes

def create_lines(self, x, varieties):
        """
        Draw just the data portion.
        """
        lines = pd.DataFrame()
        for i, var in enumerate(varieties):
            self.legend.append(var)
            data = varieties[var]["data"]
            color = get_color(varieties[var], i)
            x_array = np.array(x)
            y_array = np.array(data)
            line = pd.DataFrame({"x": x_array,
                                 "y": y_array,
                                 "color": color,
                                 "var": var})
            lines = lines.append(line, ignore_index=True, sort=False)
        return lines

Example #3

Source File: utils.py From backtrader-cn with GNU General Public License v3.0

8 votes

def write_daily_alert(cls, symbol, stock_id, action):
        """
        write daily stock alert to MongoDB.
        :param symbol: Arctic symbol
        :param data: dict, like: {'stock': '000651', 'action': 'buy/sell'}
        :return: None
        """

        lib = get_or_create_library(conf.DAILY_STOCK_ALERT_LIBNAME)

        data = {
            'stock': stock_id,
            'action': action
        }
        df = pd.DataFrame([data], columns=data.keys())
        if symbol in lib.list_symbols():
            lib.append(symbol, df)
        else:
            lib.write(symbol, df)

Example #4

Source File: display_methods.py From indras_net with GNU General Public License v3.0

7 votes

def create_scats(self, varieties):
        self.scats = pd.DataFrame(columns=["x", "y", "color", "marker", "var"])
        for i, var in enumerate(varieties):
            self.legend.append(var)
            (x_array, y_array) = self.get_arrays(varieties, var)
            if len(x_array) <= 0:  # no data to graph!
                '''
                I am creating a single "position" for an agent that cannot
                be seen. This seems to fix the issue of colors being
                missmatched in the occasion that a group has no agents.
                '''
                x_array = [-1]
                y_array = [-1]
            elif len(x_array) != len(y_array):
                logging.debug("Array length mismatch in scatter plot")
                return
            color = get_color(varieties[var], i)
            marker = get_marker(varieties[var], i)
            scat = pd.DataFrame({"x": pd.Series(x_array),
                                 "y": pd.Series(y_array),
                                 "color": color,
                                 "marker": marker,
                                 "var": var})
            self.scats = self.scats.append(scat, ignore_index=True,
                                           sort=False)

Example #5

Source File: pre_submission.py From MPContribs with MIT License

7 votes

def get_table(results, letter):
    y = "Δ{}".format(letter)
    df = Table(
        RecursiveDict([("δ", results[0]), (y, results[1]), (y + "ₑᵣᵣ", results[2])])
    )
    x0, x1 = map(float, df["δ"].iloc[[0, -1]])
    pad = 0.15 * (x1 - x0)
    mask = (results[3] > x0 - pad) & (results[3] < x1 + pad)
    x, fit = results[3][mask], results[4][mask]
    df.set_index("δ", inplace=True)
    df2 = pd.DataFrame(RecursiveDict([("δ", x), (y + " Fit", fit)]))
    df2.set_index("δ", inplace=True)
    cols = ["δ", y, y + "ₑᵣᵣ", y + " Fit"]
    return (
        pd.concat([df, df2], sort=True)
        .sort_index()
        .reset_index()
        .rename(columns={"index": "δ"})
        .fillna("")[cols]
    )

Example #6

Source File: runTests.py From svviz with MIT License

7 votes

def run(which):
    print("running all tests...")
    summary = pandas.DataFrame(columns=["pass", "info", "timing"])

    # Test chromosome ends
    if len(which)==0 or "chrom_ends" in which:
        summary.loc["chrom_ends"] = _runTest(runTestIssues, "issues")

    # Run the demos
    if len(which)==0 or "demos" in which:
        summary.loc["demos"] = _runTest(testDemos.run, "demos")

    # Run regression testing on ref/alt/amb counts
    if len(which)==0 or "counts" in which:
        summary.loc["counts"] = _runTest(runTestCounts, "counts")

    # Run the render regression tests
    if len(which)==0 or "rendering" in which:
        summary.loc["rendering"] = _runTest(rendertest.run, "rendering")    

    summary["timing"] = summary["timing"].apply(lambda x: "{}".format(datetime.timedelta(seconds=int(x))))
    print(summary)

    saveTimingInfo(summary)

Example #7

Source File: dataloader_m.py From models with MIT License

7 votes

def prepro_pos_table(pos_tables):
    """Extracts unique positions and sorts them."""
    if not isinstance(pos_tables, list):
        pos_tables = [pos_tables]

    pos_table = None
    for next_pos_table in pos_tables:
        if pos_table is None:
            pos_table = next_pos_table
        else:
            pos_table = pd.concat([pos_table, next_pos_table])
        pos_table = pos_table.groupby('chromo').apply(
            lambda df: pd.DataFrame({'pos': np.unique(df['pos'])}))
        pos_table.reset_index(inplace=True)
        pos_table = pos_table[['chromo', 'pos']]
        pos_table.sort_values(['chromo', 'pos'], inplace=True)
    return pos_table

Example #8

Source File: test_integration.py From pylivy with MIT License

6 votes

def test_session(integration_url, capsys, session_kind, params):

    assert livy_available(integration_url)

    with LivySession.create(integration_url, kind=session_kind) as session:

        assert session.state == SessionState.IDLE

        session.run(params.print_foo_code)
        assert capsys.readouterr() == (params.print_foo_output, "")

        session.run(params.create_dataframe_code)
        capsys.readouterr()

        session.run(params.dataframe_count_code)
        assert capsys.readouterr() == (params.dataframe_count_output, "")

        with pytest.raises(SparkRuntimeError):
            session.run(params.error_code)

        expected = pandas.DataFrame({"value": range(100)})
        assert session.read("df").equals(expected)

    assert session_stopped(integration_url, session.session_id)

Example #9

Source File: Senti.py From Financial-NLP with Apache License 2.0

6 votes

def calculate_scores_of_all(self, saveflag=0, savefilename=''):
        dates = os.listdir(self.article_dir)
        all_date_score=[]
        for date in dates:
            try:
                score,info=self.score_of_date(date)
                all_date_score.append((date,score))
            except:
                continue
        if saveflag:
            rawdata=pd.DataFrame(all_date_score)
            pd.DataFrame.to_csv(rawdata, savefilename)
        return all_date_score,dates

Example #10

Source File: dns_oa.py From incubator-spot with Apache License 2.0

6 votes

def _ingest_summary(self):
        # get date parameters.
        yr = self._date[:4]
        mn = self._date[4:6]
        dy = self._date[6:]

        self._logger.info("Getting ingest summary data for the day")
        
        ingest_summary_cols = ["date","total"]		
        result_rows = []        
        df_filtered =  pd.DataFrame()

        query_to_load = ("""
            SELECT frame_time, COUNT(*) as total FROM {0}.{1}
            WHERE y={2} AND m={3} AND d={4} AND unix_tstamp IS NOT NULL
            AND frame_time IS NOT NULL AND frame_len IS NOT NULL
            AND dns_qry_name IS NOT NULL AND ip_src IS NOT NULL
            AND (dns_qry_class IS NOT NULL AND dns_qry_type IS NOT NULL
            AND dns_qry_rcode IS NOT NULL ) GROUP BY frame_time;
        """).format(self._db,self._table_name, yr, mn, dy)

        results = impala.execute_query_as_list(query_to_load)
        df = pd.DataFrame(results)

        # Forms a new dataframe splitting the minutes from the time column
        df_new = pd.DataFrame([["{0}-{1}-{2} {3}:{4}".format(yr, mn, dy,\
            val['frame_time'].replace("  "," ").split(" ")[3].split(":")[0].zfill(2),\
            val['frame_time'].replace("  "," ").split(" ")[3].split(":")[1].zfill(2)),\
            int(val['total']) if not math.isnan(val['total']) else 0 ] for key,val in df.iterrows()],columns = ingest_summary_cols)

        #Groups the data by minute
        sf = df_new.groupby(by=['date'])['total'].sum()
        df_per_min = pd.DataFrame({'date':sf.index, 'total':sf.values})

        df_final = df_filtered.append(df_per_min, ignore_index=True).to_records(False,False)

        if len(df_final) > 0:
            query_to_insert=("""
                INSERT INTO {0}.dns_ingest_summary PARTITION (y={1}, m={2}, d={3}) VALUES {4};
            """).format(self._db, yr, mn, dy, tuple(df_final))
            impala.execute_query(query_to_insert)

Example #11

Source File: test_datas_utils.py From backtrader-cn with GNU General Public License v3.0

6 votes

def _test_strip_unused_cols(self):
        data = pd.DataFrame({
            'name': ['tom', 'jack'],
            'age': [24, 56],
            'gender': ['male', 'male'],
            'address': ['cn', 'us']
        })
        data.index = pd.date_range(start='2017-01-01', periods=2)

        origin_cols = ['name', 'age', 'gender', 'address']
        unused_cols = ['address', 'gender']
        new_cols = ['name', 'age']

        self.assertEqual(list(data.columns).sort(), origin_cols.sort())

        bdu.Utils.strip_unused_cols(data, *unused_cols)

        self.assertEqual(list(data.columns).sort(), new_cols.sort())

Example #12

Source File: technical_indicators.py From pandas-technical-indicators with MIT License

6 votes

def money_flow_index(df, n):
    """Calculate Money Flow Index and Ratio for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    PP = (df['High'] + df['Low'] + df['Close']) / 3
    i = 0
    PosMF = [0]
    while i < df.index[-1]:
        if PP[i + 1] > PP[i]:
            PosMF.append(PP[i + 1] * df.loc[i + 1, 'Volume'])
        else:
            PosMF.append(0)
        i = i + 1
    PosMF = pd.Series(PosMF)
    TotMF = PP * df['Volume']
    MFR = pd.Series(PosMF / TotMF)
    MFI = pd.Series(MFR.rolling(n, min_periods=n).mean(), name='MFI_' + str(n))
    df = df.join(MFI)
    return df

Example #13

Source File: technical_indicators.py From pandas-technical-indicators with MIT License

6 votes

def ppsr(df):
    """Calculate Pivot Points, Supports and Resistances for given data
    
    :param df: pandas.DataFrame
    :return: pandas.DataFrame
    """
    PP = pd.Series((df['High'] + df['Low'] + df['Close']) / 3)
    R1 = pd.Series(2 * PP - df['Low'])
    S1 = pd.Series(2 * PP - df['High'])
    R2 = pd.Series(PP + df['High'] - df['Low'])
    S2 = pd.Series(PP - df['High'] + df['Low'])
    R3 = pd.Series(df['High'] + 2 * (PP - df['Low']))
    S3 = pd.Series(df['Low'] - 2 * (df['High'] - PP))
    psr = {'PP': PP, 'R1': R1, 'S1': S1, 'R2': R2, 'S2': S2, 'R3': R3, 'S3': S3}
    PSR = pd.DataFrame(psr)
    df = df.join(PSR)
    return df

Example #14

Source File: technical_indicators.py From pandas-technical-indicators with MIT License

6 votes

def trix(df, n):
    """Calculate TRIX for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    EX1 = df['Close'].ewm(span=n, min_periods=n).mean()
    EX2 = EX1.ewm(span=n, min_periods=n).mean()
    EX3 = EX2.ewm(span=n, min_periods=n).mean()
    i = 0
    ROC_l = [np.nan]
    while i + 1 <= df.index[-1]:
        ROC = (EX3[i + 1] - EX3[i]) / EX3[i]
        ROC_l.append(ROC)
        i = i + 1
    Trix = pd.Series(ROC_l, name='Trix_' + str(n))
    df = df.join(Trix)
    return df

Example #15

Source File: technical_indicators.py From pandas-technical-indicators with MIT License

6 votes

def vortex_indicator(df, n):
    """Calculate the Vortex Indicator for given data.
    
    Vortex Indicator described here:
        http://www.vortexindicator.com/VFX_VORTEX.PDF
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    i = 0
    TR = [0]
    while i < df.index[-1]:
        Range = max(df.loc[i + 1, 'High'], df.loc[i, 'Close']) - min(df.loc[i + 1, 'Low'], df.loc[i, 'Close'])
        TR.append(Range)
        i = i + 1
    i = 0
    VM = [0]
    while i < df.index[-1]:
        Range = abs(df.loc[i + 1, 'High'] - df.loc[i, 'Low']) - abs(df.loc[i + 1, 'Low'] - df.loc[i, 'High'])
        VM.append(Range)
        i = i + 1
    VI = pd.Series(pd.Series(VM).rolling(n).sum() / pd.Series(TR).rolling(n).sum(), name='Vortex_' + str(n))
    df = df.join(VI)
    return df

Example #16

Source File: gather.py From models with MIT License

6 votes

def get_df(vcf_file, model_name):
    df = pd.DataFrame(list(KipoiVCFParser(vcf_file)))
    meta_info  = df[["variant_chr", "variant_pos", "variant_ref", "variant_alt", "variant_id"]]
    meta_info["variant_uid"] = df["variant_chr"].astype(str) + ':' + df["variant_pos"].astype(str) + ':' + df["variant_ref"] + ':' + df["variant_alt"]
    df.index = meta_info["variant_uid"]
    meta_info.index = meta_info["variant_uid"]
    obsolete_variant_columns = ["variant_chr", "variant_pos", "variant_ref", "variant_alt", "variant_id"]
    df = df[[col for col in df.columns if col not in obsolete_variant_columns]]
    df = df[[col for col in df.columns if "rID" not in col]]
    col_types = ["_LOGIT_REF", "_LOGIT_ALT", "_REF", "_ALT", "_DIFF", "_LOGIT"]
    if model_name == "labranchor":
        df = average_labranchor(df, model_name, col_types)
    else:
        df.columns = [refmt_col(col, model_name, col_types) for col in df.columns]
    # clump variants together
    df = deduplicate_vars(df)
    # subset meta_info like df and add variant_uid as common ID
    meta_info=meta_info.loc[df.index,:]
    return df, meta_info

Example #17

Source File: gather.py From models with MIT License

6 votes

def get_df(vcf_file, model_name):
    df = pd.DataFrame(list(KipoiVCFParser(vcf_file)))
    meta_info  = df[["variant_chr", "variant_pos", "variant_ref", "variant_alt", "variant_id"]]
    meta_info["variant_uid"] = df["variant_chr"].astype(str) + ':' + df["variant_pos"].astype(str) + ':' + df["variant_ref"] + ':' + df["variant_alt"]
    df.index = meta_info["variant_uid"]
    meta_info.index = meta_info["variant_uid"]
    obsolete_variant_columns = ["variant_chr", "variant_pos", "variant_ref", "variant_alt", "variant_id"]
    df = df[[col for col in df.columns if col not in obsolete_variant_columns]]
    df = df[[col for col in df.columns if "rID" not in col]]
    col_types = ["_LOGIT_REF", "_LOGIT_ALT", "_REF", "_ALT", "_DIFF", "_LOGIT"]
    if model_name == "labranchor":
        df = average_labranchor(df, model_name, col_types)
    else:
        df.columns = [refmt_col(col, model_name, col_types) for col in df.columns]
    # clump variants together
    df = deduplicate_vars(df)
    # subset meta_info like df and add variant_uid as common ID
    meta_info=meta_info.loc[df.index,:]
    return df, meta_info

Example #18

Source File: technical_indicators.py From pandas-technical-indicators with MIT License

6 votes

def on_balance_volume(df, n):
    """Calculate On-Balance Volume for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    i = 0
    OBV = [0]
    while i < df.index[-1]:
        if df.loc[i + 1, 'Close'] - df.loc[i, 'Close'] > 0:
            OBV.append(df.loc[i + 1, 'Volume'])
        if df.loc[i + 1, 'Close'] - df.loc[i, 'Close'] == 0:
            OBV.append(0)
        if df.loc[i + 1, 'Close'] - df.loc[i, 'Close'] < 0:
            OBV.append(-df.loc[i + 1, 'Volume'])
        i = i + 1
    OBV = pd.Series(OBV)
    OBV_ma = pd.Series(OBV.rolling(n, min_periods=n).mean(), name='OBV_' + str(n))
    df = df.join(OBV_ma)
    return df

Example #19

Source File: technical_indicators.py From pandas-technical-indicators with MIT License

6 votes

def coppock_curve(df, n):
    """Calculate Coppock Curve for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    M = df['Close'].diff(int(n * 11 / 10) - 1)
    N = df['Close'].shift(int(n * 11 / 10) - 1)
    ROC1 = M / N
    M = df['Close'].diff(int(n * 14 / 10) - 1)
    N = df['Close'].shift(int(n * 14 / 10) - 1)
    ROC2 = M / N
    Copp = pd.Series((ROC1 + ROC2).ewm(span=n, min_periods=n).mean(), name='Copp_' + str(n))
    df = df.join(Copp)
    return df

Example #20

Source File: technical_indicators.py From pandas-technical-indicators with MIT License

6 votes

def keltner_channel(df, n):
    """Calculate Keltner Channel for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    KelChM = pd.Series(((df['High'] + df['Low'] + df['Close']) / 3).rolling(n, min_periods=n).mean(),
                       name='KelChM_' + str(n))
    KelChU = pd.Series(((4 * df['High'] - 2 * df['Low'] + df['Close']) / 3).rolling(n, min_periods=n).mean(),
                       name='KelChU_' + str(n))
    KelChD = pd.Series(((-2 * df['High'] + 4 * df['Low'] + df['Close']) / 3).rolling(n, min_periods=n).mean(),
                       name='KelChD_' + str(n))
    df = df.join(KelChM)
    df = df.join(KelChU)
    df = df.join(KelChD)
    return df

Example #21

Source File: technical_indicators.py From pandas-technical-indicators with MIT License

6 votes

def ultimate_oscillator(df):
    """Calculate Ultimate Oscillator for given data.
    
    :param df: pandas.DataFrame
    :return: pandas.DataFrame
    """
    i = 0
    TR_l = [0]
    BP_l = [0]
    while i < df.index[-1]:
        TR = max(df.loc[i + 1, 'High'], df.loc[i, 'Close']) - min(df.loc[i + 1, 'Low'], df.loc[i, 'Close'])
        TR_l.append(TR)
        BP = df.loc[i + 1, 'Close'] - min(df.loc[i + 1, 'Low'], df.loc[i, 'Close'])
        BP_l.append(BP)
        i = i + 1
    UltO = pd.Series((4 * pd.Series(BP_l).rolling(7).sum() / pd.Series(TR_l).rolling(7).sum()) + (
                2 * pd.Series(BP_l).rolling(14).sum() / pd.Series(TR_l).rolling(14).sum()) + (
                                 pd.Series(BP_l).rolling(28).sum() / pd.Series(TR_l).rolling(28).sum()),
                     name='Ultimate_Osc')
    df = df.join(UltO)
    return df

Example #22

Source File: technical_indicators.py From pandas-technical-indicators with MIT License

6 votes

def donchian_channel(df, n):
    """Calculate donchian channel of given pandas data frame.
    :param df: pandas.DataFrame
    :param n:
    :return: pandas.DataFrame
    """
    i = 0
    dc_l = []
    while i < n - 1:
        dc_l.append(0)
        i += 1

    i = 0
    while i + n - 1 < df.index[-1]:
        dc = max(df['High'].ix[i:i + n - 1]) - min(df['Low'].ix[i:i + n - 1])
        dc_l.append(dc)
        i += 1

    donchian_chan = pd.Series(dc_l, name='Donchian_' + str(n))
    donchian_chan = donchian_chan.shift(n - 1)
    return df.join(donchian_chan)

Example #23

Source File: tdata.py From MPContribs with MIT License

6 votes

def render(self, total_records=None):
        """use BackGrid JS library to render Pandas DataFrame"""
        # if project given, this will result in an overview table of contributions
        # TODO check for index column in df other than the default numbering
        jtable = json.dumps(self.to_backgrid_dict())
        if total_records is None:
            total_records = self.shape[0]
        config = {"total_records": total_records}
        config["uuids"] = [str(uuid.uuid4()) for i in range(4)]
        if self.tid:
            config["tid"] = self.tid
            config["per_page"] = self.per_page
        else:
            config["project"] = self.project
        config["api_key"] = self.api_key
        config["ncols"] = self.ncols
        config["filters"] = self.filters
        jconfig = json.dumps(config)
        html = '<div class="col-md-6" id="{}"></div>'.format(config["uuids"][0])
        html += '<div class="pull-right" id="{}"></div>'.format(config["uuids"][3])
        html += '<div id="{}" style="width:100%;"></div>'.format(config["uuids"][1])
        html += '<div id="{}"></div>'.format(config["uuids"][2])
        html += f"<script>render_table({{table: {jtable}, config: {jconfig}}})</script>"
        return html

Example #24

Source File: server.py From jiji-with-tensorflow-example with MIT License

5 votes

def estimate():
    # 値の正規化のため、リクエストボディで渡された指標データと訓練で使用したデータを統合。
    data = pd.DataFrame({k: [v] for k, v in request.json.items()}).append(trade_data)
    # 正規化したデータを渡して、損益を予測
    results = estimator.estimate(TradeResults(data).all_data().iloc[[0]])
    return jsonify(result=("up" if results[0] == 0 else "down"))

Example #25

Source File: main.py From tensorflow-DeepFM with MIT License

5 votes

def _make_submission(ids, y_pred, filename="submission.csv"):
    pd.DataFrame({"id": ids, "target": y_pred.flatten()}).to_csv(
        os.path.join(config.SUB_DIR, filename), index=False, float_format="%.5f")

Example #26

Source File: session.py From pylivy with MIT License

5 votes

def read_sql(self, code: str) -> pandas.DataFrame:
        """Evaluate a Spark SQL satatement and retrieve the result.

        :param code: The Spark SQL statement to evaluate.
        """
        if self.kind != SessionKind.SQL:
            raise ValueError("not a SQL session")
        output = self._execute(code)
        output.raise_for_status()
        if output.json is None:
            raise RuntimeError("statement had no JSON output")
        return dataframe_from_json_output(output.json)

Example #27

Source File: session.py From pylivy with MIT License

5 votes

def read(self, dataframe_name: str) -> pandas.DataFrame:
        """Evaluate and retrieve a Spark dataframe in the managed session.

        :param dataframe_name: The name of the Spark dataframe to read.
        """
        code = serialise_dataframe_code(dataframe_name, self.kind)
        output = self._execute(code)
        output.raise_for_status()
        if output.text is None:
            raise RuntimeError("statement had no text output")
        return deserialise_dataframe(output.text)

Example #28

Source File: session.py From pylivy with MIT License

5 votes

def dataframe_from_json_output(json_output: dict) -> pandas.DataFrame:
    try:
        fields = json_output["schema"]["fields"]
        columns = [field["name"] for field in fields]
        data = json_output["data"]
    except KeyError:
        raise ValueError("json output does not match expected structure")
    return pandas.DataFrame(data, columns=columns)

Example #29

Source File: session.py From pylivy with MIT License

5 votes

def deserialise_dataframe(text: str) -> pandas.DataFrame:
    rows = []
    for line in text.split("\n"):
        if line:
            rows.append(json.loads(line))
    return pandas.DataFrame.from_records(rows)

Example #30

Source File: pre_submission.py From MPContribs with MIT License

5 votes

def run(mpfile, **kwargs):

    input_dir = mpfile.hdata["_hdata"]["input_dir"]
    identifier = get_composition_from_string("PbZr20Ti80O3")
    print identifier

    # 'SP128_NSO_LPFM0000.ibw' too big to display in notebook
    files = ["BR_60016 (1).ibw", "SP128_NSO_VPFM0000.ibw"]
    for f in files:
        file_name = os.path.join(input_dir, f)
        df = load_data(file_name)
        name = f.split(".")[0]
        mpfile.add_data_table(identifier, df, name)
        print "imported", f

    xrd_file = os.path.join(input_dir, "Program6_JA_6_2th0m Near SRO (002)_2.xrdml.xml")
    data = read_xrdml(xrd_file)
    df = DataFrame(
        np.stack((data["2Theta"], data["data"]), 1), columns=["2Theta", "Intensity"]
    )
    opts = {"yaxis": {"type": "log"}}  # see plotly docs
    mpfile.add_data_table(identifier, df, "NearSRO", plot_options=opts)
    print "imported", os.path.basename(xrd_file)

    rsm_file = os.path.join(input_dir, "JA 42 RSM 103 STO 001.xrdml.xml")
    rvals, df = load_RSM(rsm_file)
    mpfile.add_hierarchical_data(
        {
            "rsm_range": {
                "x": "{} {}".format(rvals[0], rvals[1]),
                "y": "{} {}".format(rvals[2], rvals[3]),
            }
        },
        identifier=identifier,
    )
    mpfile.add_data_table(identifier, df, "RSM")
    print "imported", os.path.basename(rsm_file)