Python pandas.read_json() Examples

The following are code examples for showing how to use pandas.read_json(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: pymapd-examples   Author: omnisci   File: OKR_techsup_discourse.py    Apache License 2.0 6 votes vote down vote up
def main():
    dfcreds = get_credentials(keyfile)
    str_authentication = "&api_key=" + dfcreds['access_token'] + "&api_username=" + dfcreds['api_username']
    for url, fn in endpoints:
        url_get = url + str_authentication
        df = pd.read_json(url_get, orient="columns")
    #isolate the list
        cell = df.iloc[3,0]
    #format and clean up the data
        df = pd.DataFrame.from_dict(cell) #turn the list into a dataframe
        dfnew = pd.DataFrame(df, columns=["c1_timestamp","c2_value"]) # set the column names
        dfnew["c1_timestamp"] = pd.to_datetime(df["x"])
        dfnew["c2_value"] = pd.to_numeric(df["y"])
    # write to csv
        print ("writing csv to " + fn)
        dfnew.to_csv(fn, index=False, date_format="%Y-%m-%d") # write to csv 
Example 2
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_readlines.py    MIT License 6 votes vote down vote up
def test_read_jsonl_unicode_chars():
    # GH15132: non-ascii unicode characters
    # \u201d == RIGHT DOUBLE QUOTATION MARK

    # simulate file handle
    json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
    json = StringIO(json)
    result = read_json(json, lines=True)
    expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
    assert_frame_equal(result, expected)

    # simulate string
    json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
    result = read_json(json, lines=True)
    expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
    assert_frame_equal(result, expected) 
Example 3
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_readlines.py    MIT License 6 votes vote down vote up
def test_to_jsonl():
    # GH9180
    df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
    result = df.to_json(orient="records", lines=True)
    expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
    assert result == expected

    df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"])
    result = df.to_json(orient="records", lines=True)
    expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
    assert result == expected
    assert_frame_equal(read_json(result, lines=True), df)

    # GH15096: escaped characters in columns and data
    df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"])
    result = df.to_json(orient="records", lines=True)
    expected = '{"a\\\\":"foo\\\\","b":"bar"}\n' '{"a\\\\":"foo\\"","b":"bar"}'
    assert result == expected
    assert_frame_equal(read_json(result, lines=True), df) 
Example 4
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_readlines.py    MIT License 6 votes vote down vote up
def test_readjson_chunks_multiple_empty_lines(chunksize):
    j = """

    {"A":1,"B":4}



    {"A":2,"B":5}







    {"A":3,"B":6}
    """
    orig = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
    test = pd.read_json(j, lines=True, chunksize=chunksize)
    if chunksize is not None:
        test = pd.concat(test)
    tm.assert_frame_equal(
        orig, test, obj="chunksize: {chunksize}".format(chunksize=chunksize)
    ) 
Example 5
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 6 votes vote down vote up
def test_v12_compat(self):
        df = DataFrame(
            [
                [1.56808523, 0.65727391, 1.81021139, -0.17251653],
                [-0.2550111, -0.08072427, -0.03202878, -0.17581665],
                [1.51493992, 0.11805825, 1.629455, -1.31506612],
                [-0.02765498, 0.44679743, 0.33192641, -0.27885413],
                [0.05951614, -2.69652057, 1.28163262, 0.34703478],
            ],
            columns=["A", "B", "C", "D"],
            index=pd.date_range("2000-01-03", "2000-01-07"),
        )
        df["date"] = pd.Timestamp("19920106 18:21:32.12")
        df.iloc[3, df.columns.get_loc("date")] = pd.Timestamp("20130101")
        df["modified"] = df["date"]
        df.iloc[1, df.columns.get_loc("modified")] = pd.NaT

        v12_json = os.path.join(self.dirpath, "tsframe_v012.json")
        df_unser = pd.read_json(v12_json)
        assert_frame_equal(df, df_unser)

        df_iso = df.drop(["modified"], axis=1)
        v12_iso_json = os.path.join(self.dirpath, "tsframe_iso_v012.json")
        df_unser_iso = pd.read_json(v12_iso_json)
        assert_frame_equal(df_iso, df_unser_iso) 
Example 6
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 6 votes vote down vote up
def test_convert_dates(self):

        # frame
        df = self.tsframe.copy()
        df["date"] = Timestamp("20130101")

        json = df.to_json()
        result = read_json(json)
        assert_frame_equal(result, df)

        df["foo"] = 1.0
        json = df.to_json(date_unit="ns")

        result = read_json(json, convert_dates=False)
        expected = df.copy()
        expected["date"] = expected["date"].values.view("i8")
        expected["foo"] = expected["foo"].astype("int64")
        assert_frame_equal(result, expected)

        # series
        ts = Series(Timestamp("20130101"), index=self.ts.index)
        json = ts.to_json()
        result = read_json(json, typ="series")
        assert_series_equal(result, ts) 
Example 7
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 6 votes vote down vote up
def test_convert_dates_infer(self):
        # GH10747
        from pandas.io.json import dumps

        infer_words = [
            "trade_time",
            "date",
            "datetime",
            "sold_at",
            "modified",
            "timestamp",
            "timestamps",
        ]
        for infer_word in infer_words:
            data = [{"id": 1, infer_word: 1036713600000}, {"id": 2}]
            expected = DataFrame(
                [[1, Timestamp("2002-11-08")], [2, pd.NaT]], columns=["id", infer_word]
            )
            result = read_json(dumps(data))[["id", infer_word]]
            assert_frame_equal(result, expected) 
Example 8
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 6 votes vote down vote up
def test_date_format_series(self):
        def test_w_date(date, date_unit=None):
            ts = Series(Timestamp(date), index=self.ts.index)
            ts.iloc[1] = pd.NaT
            ts.iloc[5] = pd.NaT
            if date_unit:
                json = ts.to_json(date_format="iso", date_unit=date_unit)
            else:
                json = ts.to_json(date_format="iso")
            result = read_json(json, typ="series")
            expected = ts.copy()
            expected.index = expected.index.tz_localize("UTC")
            expected = expected.dt.tz_localize("UTC")
            assert_series_equal(result, expected)

        test_w_date("20130101 20:43:42.123")
        test_w_date("20130101 20:43:42", date_unit="s")
        test_w_date("20130101 20:43:42.123", date_unit="ms")
        test_w_date("20130101 20:43:42.123456", date_unit="us")
        test_w_date("20130101 20:43:42.123456789", date_unit="ns")

        ts = Series(Timestamp("20130101 20:43:42.123"), index=self.ts.index)
        msg = "Invalid value 'foo' for option 'date_unit'"
        with pytest.raises(ValueError, match=msg):
            ts.to_json(date_format="iso", date_unit="foo") 
Example 9
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 6 votes vote down vote up
def test_date_unit(self):
        df = self.tsframe.copy()
        df["date"] = Timestamp("20130101 20:43:42")
        dl = df.columns.get_loc("date")
        df.iloc[1, dl] = Timestamp("19710101 20:43:42")
        df.iloc[2, dl] = Timestamp("21460101 20:43:42")
        df.iloc[4, dl] = pd.NaT

        for unit in ("s", "ms", "us", "ns"):
            json = df.to_json(date_format="epoch", date_unit=unit)

            # force date unit
            result = read_json(json, date_unit=unit)
            assert_frame_equal(result, df)

            # detect date unit
            result = read_json(json, date_unit=None)
            assert_frame_equal(result, df) 
Example 10
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 6 votes vote down vote up
def test_weird_nested_json(self):
        # this used to core dump the parser
        s = r"""{
        "status": "success",
        "data": {
        "posts": [
            {
            "id": 1,
            "title": "A blog post",
            "body": "Some useful content"
            },
            {
            "id": 2,
            "title": "Another blog post",
            "body": "More content"
            }
           ]
          }
        }"""

        read_json(s) 
Example 11
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 6 votes vote down vote up
def test_misc_example(self):

        # parsing unordered input fails
        result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]', numpy=True)
        expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])

        error_msg = """DataFrame\\.index are different

DataFrame\\.index values are different \\(100\\.0 %\\)
\\[left\\]:  Index\\(\\['a', 'b'\\], dtype='object'\\)
\\[right\\]: RangeIndex\\(start=0, stop=2, step=1\\)"""
        with pytest.raises(AssertionError, match=error_msg):
            assert_frame_equal(result, expected, check_index_type=False)

        result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]')
        expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
        assert_frame_equal(result, expected) 
Example 12
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 6 votes vote down vote up
def test_read_jsonl_unicode_chars(self):
        # GH15132: non-ascii unicode characters
        # \u201d == RIGHT DOUBLE QUOTATION MARK

        # simulate file handle
        json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
        json = StringIO(json)
        result = read_json(json, lines=True)
        expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
        assert_frame_equal(result, expected)

        # simulate string
        json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
        result = read_json(json, lines=True)
        expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
        assert_frame_equal(result, expected) 
Example 13
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 6 votes vote down vote up
def test_to_jsonl(self):
        # GH9180
        df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
        result = df.to_json(orient="records", lines=True)
        expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
        assert result == expected

        df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"])
        result = df.to_json(orient="records", lines=True)
        expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
        assert result == expected
        assert_frame_equal(pd.read_json(result, lines=True), df)

        # GH15096: escaped characters in columns and data
        df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"])
        result = df.to_json(orient="records", lines=True)
        expected = '{"a\\\\":"foo\\\\","b":"bar"}\n' '{"a\\\\":"foo\\"","b":"bar"}'
        assert result == expected
        assert_frame_equal(pd.read_json(result, lines=True), df)

    # TODO: there is a near-identical test for pytables; can we share? 
Example 14
Project: OpenAPS   Author: medicinexlab   File: bgdata.py    MIT License 5 votes vote down vote up
def get_bg_dataframe(id_str):
    """
    Function to convert the json file to a pandas dataframe.
    It takes in the string of the id and looks for the devicestatus.json file.
    All data should be stored such that in the directory where main.py lies,
    there is a directory called "data". Inside this directory,
    there is another directory with just the ID Number. Inside this data folder lies the
    devicestatus.json file, which contains the data. If the file is not in the path given,
    it raises an IOError. The path should look like the following example:

    ./data/12345678/devicestatus.json

    Input:      id_str                          ID number as a string
    Output:     bg_df                           Pandas dataframe of all of the data from ./data/[id_str]/devicestatus.json
    Usage:      bg_df = get_bg_dataframe("12345678")
    """

    try:
        file_location = "./data/" + id_str + "/devicestatus.json"
        bg_df = pd.read_json(file_location) #Opens the data file and reads in the data into a dataFrame
    except:
        raise IOError(file_location + " is not a valid file.")

    print
    print("{} total entries.".format(len(bg_df)))

    return bg_df


#Function to find the indices for the given start and end date strings 
Example 15
Project: sklearn2docker   Author: KhaledSharif   File: decision_tree_test.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_decision_tree(self):
        from requests import post
        from os import system
        from pandas import DataFrame, read_json
        from sklearn.datasets import load_iris
        from sklearn.tree import DecisionTreeClassifier

        iris = load_iris()
        input_df = DataFrame(data=iris['data'], columns=iris['feature_names'])
        clf = DecisionTreeClassifier(max_depth=2)
        clf.fit(input_df.values, iris['target'])

        # convert classifier to Docker container
        from sklearn2docker.constructor import Sklearn2Docker
        s2d = Sklearn2Docker(
            classifier=clf,
            feature_names=iris['feature_names'],
            class_names=iris['target_names'].tolist(),
        )
        s2d.save(
            name="classifier",
            tag="iris",
        )

        # run your Docker container as a detached process
        system("docker run -d -p {}:5000 --name {} classifier:iris && sleep 5".format(self.port, self.container_name))

        # send your training data as a json string
        request = post("http://localhost:{}/predict/split".format(self.port), json=input_df.to_json(orient="split"))
        result = read_json(request.content.decode(), orient="split")
        self.assertEqual(list(result), ['prediction'])
        self.assertGreater(len(result), 0)

        request = post("http://localhost:{}/predict_proba/split".format(self.port), json=input_df.to_json(orient="split"))
        result = read_json(request.content.decode(), orient="split")
        self.assertEqual(list(result), ['setosa', 'versicolor', 'virginica'])
        self.assertGreater(len(result), 0) 
Example 16
Project: sklearn2docker   Author: KhaledSharif   File: keras_classifier_test.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_barebones_keras(self):
        from sklearn.datasets import load_iris
        from pandas import DataFrame
        from numpy import array
        from os import system
        from pandas import read_json
        from requests import post

        iris = load_iris()
        input_df = DataFrame(data=iris['data'], columns=iris['feature_names'])
        model = self.create_categorical_classification_model()
        X, Y = input_df.values, array(iris['target'])
        model.fit(X, Y)

        # convert classifier to Docker container
        from sklearn2docker.constructor import Sklearn2Docker
        s2d = Sklearn2Docker(
            classifier=model,
            feature_names=list(input_df),
            class_names=iris['target_names'].tolist(),
        )
        s2d.save(
            name="classifier",
            tag="keras",
        )

        # # run your Docker container as a detached process
        system("docker run -d -p {}:5000 --name {} classifier:keras && sleep 5".format(self.port, self.container_name))

        # send your training data as a json string
        request = post("http://localhost:{}/predict/split".format(self.port), json=input_df.to_json(orient="split"))
        result = read_json(request.content.decode(), orient="split")
        self.assertEqual(len(list(result)), 1)
        self.assertEqual(len(result), len(input_df))

        request = post("http://localhost:{}/predict_proba/split".format(self.port), json=input_df.to_json(orient="split"))
        result = read_json(request.content.decode(), orient="split")
        self.assertEqual(len(list(result)), 3)
        self.assertEqual(len(result), len(input_df)) 
Example 17
Project: Kaggle-Statoil-Challenge   Author: adodd202   File: utils.py    MIT License 5 votes vote down vote up
def getStatoilTrainValLoaders(args):
    fixSeed(args)
    local_data = pd.read_json('/home/adodd202/train.json')

    local_data = shuffle(local_data)  # otherwise same validation set each time!
    local_data = local_data.reindex(np.random.permutation(local_data.index))

    local_data['band_1'] = local_data['band_1'].apply(lambda x: np.array(x).reshape(75, 75))
    local_data['band_2'] = local_data['band_2'].apply(lambda x: np.array(x).reshape(75, 75))
    local_data['inc_angle'] = pd.to_numeric(local_data['inc_angle'], errors='coerce')
    local_data['inc_angle'].fillna(0, inplace=True)

    band_1 = np.concatenate([im for im in local_data['band_1']]).reshape(-1, 75, 75)
    band_2 = np.concatenate([im for im in local_data['band_2']]).reshape(-1, 75, 75)
    # band_3=(band_1+band_2)/2
    local_full_img = np.stack([band_1, band_2], axis=1)

    train_imgs = XnumpyToTensor(local_full_img, args)
    train_targets = YnumpyToTensor(local_data['is_iceberg'].values, args)
    dset_train = TensorDataset(train_imgs, train_targets)

    local_train_ds, local_val_ds = trainTestSplit(dset_train, args.validationRatio)
    local_train_loader = torch.utils.data.DataLoader(local_train_ds, batch_size=args.batch_size, shuffle=False,
                                                     num_workers=args.workers)
    local_val_loader = torch.utils.data.DataLoader(local_val_ds, batch_size=args.batch_size, shuffle=False,
                                                   num_workers=args.workers)
    return local_train_loader, local_val_loader, local_train_ds, local_val_ds 
Example 18
Project: Kaggle-Statoil-Challenge   Author: adodd202   File: utils.py    MIT License 5 votes vote down vote up
def BinaryInference(local_model, args):
    if args.use_cuda:
        local_model.cuda()
    local_model.eval()
    df_test_set = pd.read_json('/home/adodd202/test.json')
    df_test_set['band_1'] = df_test_set['band_1'].apply(lambda x: np.array(x).reshape(75, 75))
    df_test_set['band_2'] = df_test_set['band_2'].apply(lambda x: np.array(x).reshape(75, 75))
    df_test_set['inc_angle'] = pd.to_numeric(df_test_set['inc_angle'], errors='coerce')
    # df_test_set.head(3)
    print(df_test_set.shape)
    columns = ['id', 'is_iceberg']
    df_pred = pd.DataFrame(data=np.zeros((0, len(columns))), columns=columns)
    # df_pred.id.astype(int)
    for index, row in df_test_set.iterrows():
        rwo_no_id = row.drop('id')
        band_1_test = (rwo_no_id['band_1']).reshape(-1, 75, 75)
        band_2_test = (rwo_no_id['band_2']).reshape(-1, 75, 75)
        # band_3_test = (band_1_test + band_2_test) / 2
        full_img_test = np.stack([band_1_test, band_2_test], axis=1)

        x_data_np = np.array(full_img_test, dtype=np.float32)
        if args.use_cuda:
            X_tensor_test = Variable(torch.from_numpy(x_data_np).cuda())  # Note the conversion for pytorch
        else:
            X_tensor_test = Variable(torch.from_numpy(x_data_np))  # Note the conversion for pytorch

        # X_tensor_test=X_tensor_test.view(1, trainX.shape[1]) # does not work with 1d tensors
        predicted_val = (local_model(X_tensor_test).data).float()  # probabilities
        p_test = predicted_val.cpu().numpy().item()  # otherwise we get an array, we need a single float

        df_pred = df_pred.append({'id': row['id'], 'is_iceberg': p_test}, ignore_index=True)

    return df_pred 
Example 19
Project: Kaggle-Statoil-Challenge   Author: adodd202   File: capsulenet.py    MIT License 5 votes vote down vote up
def load_icebergs():

    #Import data
    train = pd.read_json('../train.json') #online is '../train.json'
    #train = pd.read_json('/Users/adodd202/Documents/GitHub/Statoil_Data/train.json')
    y_old=train['is_iceberg']

    y = np.zeros((y_old.size, 2))
    y[np.arange(y_old.size),y_old] = 1

    #Generate the training data
    x_band_1=-1*np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_1"]])
    # x_band_1= x_band_1/np.amax(x_band_1)
    x_band_2=-1*np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_2"]])
    # x_band_2= x_band_2/np.amax(x_band_2)
    x_band_3=(x_band_1+x_band_2)/2
    # x_band_3= x_band_3/np.amax(x_band_3)
    #X_band_3=np.array([np.full((75, 75), angel).astype(np.float32) for angel in train["inc_angle"]])
    x_data = np.concatenate([x_band_1[:, :, :, np.newaxis]
                          , x_band_2[:, :, :, np.newaxis]
                         , x_band_3[:, :, :, np.newaxis]], axis=-1)

    #lets multiply by -1, and divide by max value for each pic

    # print (type(y))
    # print (y.shape)
    # print (type(x_data))
    # print (x_data.shape)

    x_train,x_test,y_train,y_test = train_test_split(x_data,y,test_size=0.2)

    return (x_train, y_train), (x_test, y_test) 
Example 20
Project: pymapd-examples   Author: omnisci   File: OKR_techsup_discourse.py    Apache License 2.0 5 votes vote down vote up
def get_credentials(keyfile):
    dfkv = pd.read_json(keyfile, typ='series')
    return dfkv 
Example 21
Project: pymapd-examples   Author: omnisci   File: OKR_techsup_docker.py    Apache License 2.0 5 votes vote down vote up
def main():
    for url, fn in endpoints_and_files:
        print ("pulling data from " + url)
        df = pd.read_json(url, orient="columns")
    # write to csv
        print ("writing csv to " + fn)
        df.to_csv(fn, index=False, date_format="%Y-%m-%d") # write to csv 
Example 22
Project: pymapd-examples   Author: omnisci   File: OKR_techsup_higherlogic.py    Apache License 2.0 5 votes vote down vote up
def main():
   # credentials
   dfcreds = get_credentials(keyfile) # get the authentication information from the keyfile
   headers = {'content-type': 'application/json', 'HLIAMKey': dfcreds['key']}
   
   # Viewable Communities
   rViewableCommunities = requests.get(repo_path + 'api/v2.0/Communities/GetViewableCommunities', headers=headers)
   dfViewableCommunities = pd.read_json(rViewableCommunities.content)

   # Community Members
   payload = {
           "CommunityKey": 'd06df790-8ca4-4e54-91a0-244af0228ddc',
           "StartRecord": 1,
           "EndRecord": 1500
           }
   rCommunityMembers = requests.post(repo_path + 'api/v2.0/Communities/GetCommunityMembers', headers=headers, json=payload)
   dfCommunityMembers = pd.read_json(rCommunityMembers.content)
   # add a timestamp to the data
   dfCommunityMembers['cmtimestamp'] = dt.datetime.now()
   dfCommunityMembers.index.names = ['rowUID']
   dfCommunityMembers.drop('Community', 1, inplace=True) #remove the nested dictionary of community information
   dfCommunityMembers.to_csv(file_path + 'techsup_hl_communitymembers.csv', index=False, date_format="%Y-%m-%d")

   # Discussion Posts
   rDiscussionPosts = requests.get(repo_path + 'api/v2.0/Discussions/GetDiscussionPosts?maxToRetrieve=5000', headers=headers)
   dfDiscussionPosts = pd.read_json(rDiscussionPosts.content)
   # add a timestamp to the data
   dfDiscussionPosts['dptimestamp'] = dt.datetime.now()
   dfDiscussionPosts.to_csv(file_path + 'techsup_hl_discussionposts.csv', index=False, date_format="%Y-%m-%d") 
Example 23
Project: pymapd-examples   Author: omnisci   File: omnisci_utils.py    Apache License 2.0 5 votes vote down vote up
def get_credentials(keyfile):
    dfkv = pd.read_json(keyfile, typ='series')
    return dfkv

#def get_credentials_header(keyfile):
#    dkfv = pd.DataFrame.to_json(keyfile)
#    return dkfv 
Example 24
Project: kaggle-cooking   Author: fpoli   File: utils.py    GNU General Public License v3.0 5 votes vote down vote up
def read_data(project_path):
    print "Reading data..."
    train = pd.read_json(project_path + "/data/train.json")
    test = pd.read_json(project_path + "/data/test.json")

    print "Train size:", len(train.id)
    print "Test size:", len(test.id)

    return train, test 
Example 25
Project: RNASEqTool   Author: armell   File: content_representations.py    MIT License 5 votes vote down vote up
def to_html(self):
        str_buffer = StringIO.StringIO()
        df = pd.read_json(json.dumps(self.content))
        df.to_html(str_buffer)

        return str_buffer.getvalue()

    #send raw csv file with counts 
Example 26
Project: Recession-Predictor   Author: tzhangwps   File: make_dataset.py    MIT License 5 votes vote down vote up
def create_secondary_data(self):
        """
        Creates and saves the secondary dataset as a json object.
        """
        print('\nCreating secondary dataset from "primary_dataset_most_recent.json"')
        self.primary_df_output = pd.read_json(path.data_primary_most_recent)
        self.primary_df_output.sort_index(inplace=True)
        self.calculate_secondary_data()
        print('Finished creating secondary dataset!')
        print('\t|--Saving secondary dataset to {}'.format(path.data_secondary))
        self.secondary_df_output.to_json(path.data_secondary)
        self.secondary_df_output.to_json(path.data_secondary_most_recent)
        print('\nSecondary dataset saved to {}'.format(path.data_secondary_most_recent)) 
Example 27
Project: Recession-Predictor   Author: tzhangwps   File: deployment.py    MIT License 5 votes vote down vote up
def run_test_procedures(self):
        """
        Runs test procedures on final dataset.
        """
        print('\nDeploying prediction model...\n')
        self.final_df_output = pd.read_json(path.data_final)
        self.final_df_output.sort_index(inplace=True)
        self.fill_testing_dates()
        self.perform_backtests()
        self.create_full_predictions_dataframe()
        print('\nDeployment complete!')
        
        
#MIT License
#
#Copyright (c) 2019 Terrence Zhang
#
#Permission is hereby granted, free of charge, to any person obtaining a copy
#of this software and associated documentation files (the "Software"), to deal
#in the Software without restriction, including without limitation the rights
#to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#copies of the Software, and to permit persons to whom the Software is
#furnished to do so, subject to the following conditions:
#
#The above copyright notice and this permission notice shall be included in all
#copies or substantial portions of the Software.
#
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
#SOFTWARE. 
Example 28
Project: Recession-Predictor   Author: tzhangwps   File: testing.py    MIT License 5 votes vote down vote up
def run_test_procedures(self):
        """
        Runs test procedures on final dataset.
        """
        print('\nPerforming backtests...\n')
        self.final_df_output = pd.read_json(path.data_final)
        self.final_df_output.sort_index(inplace=True)
        self.fill_testing_dates()
        self.perform_backtests()
        self.create_full_predictions_dataframe()
        print('\nBacktesting complete!')    
        
        
#MIT License
#
#Copyright (c) 2019 Terrence Zhang
#
#Permission is hereby granted, free of charge, to any person obtaining a copy
#of this software and associated documentation files (the "Software"), to deal
#in the Software without restriction, including without limitation the rights
#to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#copies of the Software, and to permit persons to whom the Software is
#furnished to do so, subject to the following conditions:
#
#The above copyright notice and this permission notice shall be included in all
#copies or substantial portions of the Software.
#
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
#SOFTWARE. 
Example 29
Project: whiskyInvest   Author: arms3   File: app.py    MIT License 5 votes vote down vote up
def update_table(json):
    if json is None:
        return dbc.Table.from_dataframe(table, dark=False, responsive='md', hover=True, float_format='.2f', size='sm')
    df = pd.read_json(json, orient='split')
    df = df[['formatted_whisky', 'days_to_close_spread', 'annual_return', 'r_value', 'owned']]
    df['annual_return'] = df['annual_return'].map('{:.1f}%'.format)
    df = df.replace({'owned':{True:'✓',False:'✗'}})
    df.columns = ['Whisky', 'Days to Close Bid Ask Spread', 'Annual Return, %', 'Confidence (R Value)', 'Own?']
    return dbc.Table.from_dataframe(df, dark=False, responsive='md', hover=True, float_format='.2f', size='sm') 
Example 30
Project: whiskyInvest   Author: arms3   File: app.py    MIT License 5 votes vote down vote up
def update_bar_chart(json):
    if json is None:
        return best_returns_bar()
    global table
    table = pd.read_json(json, orient='split')
    table = table[['formatted_whisky', 'days_to_close_spread', 'annual_return', 'r_value', 'owned']]
    table['annual_return'] = table['annual_return'].map('{:.1f}%'.format)
    # table = table.replace({'owned': {True: '✓', False: '✗'}})
    table.columns = ['Whisky', 'Days to Close Bid Ask Spread', 'Annual Return, %', 'Confidence (R Value)', 'Own?']
    return best_returns_bar()


# Main chart page 
Example 31
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_readlines.py    MIT License 5 votes vote down vote up
def test_read_jsonl():
    # GH9180
    result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
    expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
    assert_frame_equal(result, expected) 
Example 32
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_readlines.py    MIT License 5 votes vote down vote up
def test_readjson_chunks(lines_json_df, chunksize):
    # Basic test that read_json(chunks=True) gives the same result as
    # read_json(chunks=False)
    # GH17048: memory usage when lines=True

    unchunked = read_json(StringIO(lines_json_df), lines=True)
    reader = read_json(StringIO(lines_json_df), lines=True, chunksize=chunksize)
    chunked = pd.concat(reader)

    assert_frame_equal(chunked, unchunked) 
Example 33
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_readlines.py    MIT License 5 votes vote down vote up
def test_readjson_chunksize_requires_lines(lines_json_df):
    msg = "chunksize can only be passed if lines=True"
    with pytest.raises(ValueError, match=msg):
        pd.read_json(StringIO(lines_json_df), lines=False, chunksize=2) 
Example 34
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_readlines.py    MIT License 5 votes vote down vote up
def test_readjson_each_chunk(lines_json_df):
    # Other tests check that the final result of read_json(chunksize=True)
    # is correct. This checks the intermediate chunks.
    chunks = list(pd.read_json(StringIO(lines_json_df), lines=True, chunksize=2))
    assert chunks[0].shape == (2, 2)
    assert chunks[1].shape == (1, 2) 
Example 35
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_readlines.py    MIT License 5 votes vote down vote up
def test_readjson_chunks_from_file():
    with ensure_clean("test.json") as path:
        df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
        df.to_json(path, lines=True, orient="records")
        chunked = pd.concat(pd.read_json(path, lines=True, chunksize=1))
        unchunked = pd.read_json(path, lines=True)
        assert_frame_equal(unchunked, chunked) 
Example 36
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_readlines.py    MIT License 5 votes vote down vote up
def test_readjson_invalid_chunksize(lines_json_df, chunksize):
    msg = r"'chunksize' must be an integer >=1"

    with pytest.raises(ValueError, match=msg):
        pd.read_json(StringIO(lines_json_df), lines=True, chunksize=chunksize) 
Example 37
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_frame_double_encoded_labels(self):
        df = DataFrame(
            [["a", "b"], ["c", "d"]],
            index=['index " 1', "index / 2"],
            columns=["a \\ b", "y / z"],
        )

        assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split"))
        assert_frame_equal(
            df, read_json(df.to_json(orient="columns"), orient="columns")
        )
        assert_frame_equal(df, read_json(df.to_json(orient="index"), orient="index"))
        df_unser = read_json(df.to_json(orient="records"), orient="records")
        assert_index_equal(df.columns, df_unser.columns)
        tm.assert_numpy_array_equal(df.values, df_unser.values) 
Example 38
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_frame_non_unique_columns(self):
        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "x"])

        msg = "DataFrame columns must be unique for orient='index'"
        with pytest.raises(ValueError, match=msg):
            df.to_json(orient="index")
        msg = "DataFrame columns must be unique for orient='columns'"
        with pytest.raises(ValueError, match=msg):
            df.to_json(orient="columns")
        msg = "DataFrame columns must be unique for orient='records'"
        with pytest.raises(ValueError, match=msg):
            df.to_json(orient="records")

        assert_frame_equal(
            df, read_json(df.to_json(orient="split"), orient="split", dtype=False)
        )
        unser = read_json(df.to_json(orient="values"), orient="values")
        tm.assert_numpy_array_equal(df.values, unser.values)

        # GH4377; duplicate columns not processing correctly
        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "y"])
        result = read_json(df.to_json(orient="split"), orient="split")
        assert_frame_equal(result, df)

        def _check(df):
            result = read_json(
                df.to_json(orient="split"), orient="split", convert_dates=["x"]
            )
            assert_frame_equal(result, df)

        for o in [
            [["a", "b"], ["c", "d"]],
            [[1.5, 2.5], [3.5, 4.5]],
            [[1, 2.5], [3, 4.5]],
            [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]],
        ]:
            _check(DataFrame(o, index=[1, 2], columns=["x", "x"])) 
Example 39
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_frame_from_json_bad_data(self):
        with pytest.raises(ValueError, match="Expected object or value"):
            read_json(StringIO('{"key":b:a:d}'))

        # too few indices
        json = StringIO(
            '{"columns":["A","B"],'
            '"index":["2","3"],'
            '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}'
        )
        msg = r"Shape of passed values is \(3, 2\), indices imply \(2, 2\)"
        with pytest.raises(ValueError, match=msg):
            read_json(json, orient="split")

        # too many columns
        json = StringIO(
            '{"columns":["A","B","C"],'
            '"index":["1","2","3"],'
            '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}'
        )
        msg = "3 columns passed, passed data had 2 columns"
        with pytest.raises(ValueError, match=msg):
            read_json(json, orient="split")

        # bad key
        json = StringIO(
            '{"badkey":["A","B"],'
            '"index":["2","3"],'
            '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}'
        )
        with pytest.raises(ValueError, match=r"unexpected key\(s\): badkey"):
            read_json(json, orient="split") 
Example 40
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_frame_from_json_nones(self):
        df = DataFrame([[1, 2], [4, 5, 6]])
        unser = read_json(df.to_json())
        assert np.isnan(unser[2][0])

        df = DataFrame([["1", "2"], ["4", "5", "6"]])
        unser = read_json(df.to_json())
        assert np.isnan(unser[2][0])
        unser = read_json(df.to_json(), dtype=False)
        assert unser[2][0] is None
        unser = read_json(df.to_json(), convert_axes=False, dtype=False)
        assert unser["2"]["0"] is None

        unser = read_json(df.to_json(), numpy=False)
        assert np.isnan(unser[2][0])
        unser = read_json(df.to_json(), numpy=False, dtype=False)
        assert unser[2][0] is None
        unser = read_json(df.to_json(), numpy=False, convert_axes=False, dtype=False)
        assert unser["2"]["0"] is None

        # infinities get mapped to nulls which get mapped to NaNs during
        # deserialisation
        df = DataFrame([[1, 2], [4, 5, 6]])
        df.loc[0, 2] = np.inf
        unser = read_json(df.to_json())
        assert np.isnan(unser[2][0])
        unser = read_json(df.to_json(), dtype=False)
        assert np.isnan(unser[2][0])

        df.loc[0, 2] = np.NINF
        unser = read_json(df.to_json())
        assert np.isnan(unser[2][0])
        unser = read_json(df.to_json(), dtype=False)
        assert np.isnan(unser[2][0]) 
Example 41
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_frame_empty(self):
        df = DataFrame(columns=["jim", "joe"])
        assert not df._is_mixed_type
        assert_frame_equal(
            read_json(df.to_json(), dtype=dict(df.dtypes)), df, check_index_type=False
        )
        # GH 7445
        result = pd.DataFrame({"test": []}, index=[]).to_json(orient="columns")
        expected = '{"test":{}}'
        assert result == expected 
Example 42
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_frame_empty_mixedtype(self):
        # mixed type
        df = DataFrame(columns=["jim", "joe"])
        df["joe"] = df["joe"].astype("i8")
        assert df._is_mixed_type
        assert_frame_equal(
            read_json(df.to_json(), dtype=dict(df.dtypes)), df, check_index_type=False
        ) 
Example 43
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_series_non_unique_index(self):
        s = Series(["a", "b"], index=[1, 1])

        msg = "Series index must be unique for orient='index'"
        with pytest.raises(ValueError, match=msg):
            s.to_json(orient="index")

        assert_series_equal(
            s, read_json(s.to_json(orient="split"), orient="split", typ="series")
        )
        unser = read_json(s.to_json(orient="records"), orient="records", typ="series")
        tm.assert_numpy_array_equal(s.values, unser.values) 
Example 44
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_series_from_json_precise_float(self):
        s = Series([4.56, 4.56, 4.56])
        result = read_json(s.to_json(), typ="series", precise_float=True)
        assert_series_equal(result, s, check_index_type=False) 
Example 45
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_series_with_dtype(self):
        # GH 21986
        s = Series([4.56, 4.56, 4.56])
        result = read_json(s.to_json(), typ="series", dtype=np.int64)
        expected = Series([4] * 3)
        assert_series_equal(result, expected) 
Example 46
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_frame_from_json_precise_float(self):
        df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]])
        result = read_json(df.to_json(), precise_float=True)
        assert_frame_equal(result, df, check_index_type=False, check_column_type=False) 
Example 47
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_reconstruction_index(self):

        df = DataFrame([[1, 2, 3], [4, 5, 6]])
        result = read_json(df.to_json())

        assert_frame_equal(result, df)

        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"])
        result = read_json(df.to_json())
        assert_frame_equal(result, df) 
Example 48
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_path(self):
        with ensure_clean("test.json") as path:
            for df in [
                self.frame,
                self.frame2,
                self.intframe,
                self.tsframe,
                self.mixed_frame,
            ]:
                df.to_json(path)
                read_json(path) 
Example 49
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_axis_dates(self):

        # frame
        json = self.tsframe.to_json()
        result = read_json(json)
        assert_frame_equal(result, self.tsframe)

        # series
        json = self.ts.to_json()
        result = read_json(json, typ="series")
        assert_series_equal(result, self.ts, check_names=False)
        assert result.name is None 
Example 50
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_doc_example(self):
        dfj2 = DataFrame(np.random.randn(5, 2), columns=list("AB"))
        dfj2["date"] = Timestamp("20130101")
        dfj2["ints"] = range(5)
        dfj2["bools"] = True
        dfj2.index = pd.date_range("20130101", periods=5)

        json = dfj2.to_json()
        result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_})
        assert_frame_equal(result, result) 
Example 51
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_url(self, field, dtype):
        url = "https://api.github.com/repos/pandas-dev/pandas/issues?per_page=5"  # noqa
        result = read_json(url, convert_dates=True)
        assert result[field].dtype == dtype 
Example 52
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_timedelta(self):
        converter = lambda x: pd.to_timedelta(x, unit="ms")

        s = Series([timedelta(23), timedelta(seconds=5)])
        assert s.dtype == "timedelta64[ns]"

        result = pd.read_json(s.to_json(), typ="series").apply(converter)
        assert_series_equal(result, s)

        s = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1]))
        assert s.dtype == "timedelta64[ns]"
        result = pd.read_json(s.to_json(), typ="series").apply(converter)
        assert_series_equal(result, s)

        frame = DataFrame([timedelta(23), timedelta(seconds=5)])
        assert frame[0].dtype == "timedelta64[ns]"
        assert_frame_equal(frame, pd.read_json(frame.to_json()).apply(converter))

        frame = DataFrame(
            {
                "a": [timedelta(days=23), timedelta(seconds=5)],
                "b": [1, 2],
                "c": pd.date_range(start="20130101", periods=2),
            }
        )

        result = pd.read_json(frame.to_json(date_unit="ns"))
        result["a"] = pd.to_timedelta(result.a, unit="ns")
        result["c"] = pd.to_datetime(result.c)
        assert_frame_equal(frame, result) 
Example 53
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_mixed_timedelta_datetime(self):
        frame = DataFrame(
            {"a": [timedelta(23), pd.Timestamp("20130101")]}, dtype=object
        )

        expected = DataFrame(
            {"a": [pd.Timedelta(frame.a[0]).value, pd.Timestamp(frame.a[1]).value]}
        )
        result = pd.read_json(frame.to_json(date_unit="ns"), dtype={"a": "int64"})
        assert_frame_equal(result, expected, check_index_type=False) 
Example 54
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_default_handler(self):
        value = object()
        frame = DataFrame({"a": [7, value]})
        expected = DataFrame({"a": [7, str(value)]})
        result = pd.read_json(frame.to_json(default_handler=str))
        assert_frame_equal(expected, result, check_index_type=False) 
Example 55
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_read_inline_jsonl(self):
        # GH9180
        result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
        expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
        assert_frame_equal(result, expected) 
Example 56
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_read_local_jsonl(self):
        # GH17200
        with ensure_clean("tmp_items.json") as path:
            with open(path, "w") as infile:
                infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
            result = read_json(path, lines=True)
            expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
            assert_frame_equal(result, expected) 
Example 57
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_read_json_large_numbers(self):
        # GH18842
        json = '{"articleId": "1404366058080022500245"}'
        json = StringIO(json)
        result = read_json(json, typ="series")
        expected = Series(1.404366e21, index=["articleId"])
        assert_series_equal(result, expected)

        json = '{"0": {"articleId": "1404366058080022500245"}}'
        json = StringIO(json)
        result = read_json(json)
        expected = DataFrame(1.404366e21, index=["articleId"], columns=[0])
        assert_frame_equal(result, expected) 
Example 58
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_latin_encoding(self):
        # GH 13774
        pytest.skip("encoding not implemented in .to_json(), xref #13774")

        values = [
            [b"E\xc9, 17", b"", b"a", b"b", b"c"],
            [b"E\xc9, 17", b"a", b"b", b"c"],
            [b"EE, 17", b"", b"a", b"b", b"c"],
            [b"E\xc9, 17", b"\xf8\xfc", b"a", b"b", b"c"],
            [b"", b"a", b"b", b"c"],
            [b"\xf8\xfc", b"a", b"b", b"c"],
            [b"A\xf8\xfc", b"", b"a", b"b", b"c"],
            [np.nan, b"", b"b", b"c"],
            [b"A\xf8\xfc", np.nan, b"", b"b", b"c"],
        ]

        def _try_decode(x, encoding="latin-1"):
            try:
                return x.decode(encoding)
            except AttributeError:
                return x

        # not sure how to remove latin-1 from code in python 2 and 3
        values = [[_try_decode(x) for x in y] for y in values]

        examples = []
        for dtype in ["category", object]:
            for val in values:
                examples.append(Series(val, dtype=dtype))

        def roundtrip(s, encoding="latin-1"):
            with ensure_clean("test.json") as path:
                s.to_json(path, encoding=encoding)
                retr = read_json(path, encoding=encoding)
                assert_series_equal(s, retr, check_categorical=False)

        for s in examples:
            roundtrip(s) 
Example 59
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_from_json_to_json_table_dtypes(self):
        # GH21345
        expected = pd.DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]})
        dfjson = expected.to_json(orient="table")
        result = pd.read_json(dfjson, orient="table")
        assert_frame_equal(result, expected) 
Example 60
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_read_json_table_dtype_raises(self, dtype):
        # GH21345
        df = pd.DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]})
        dfjson = df.to_json(orient="table")
        msg = "cannot pass both dtype and orient='table'"
        with pytest.raises(ValueError, match=msg):
            pd.read_json(dfjson, orient="table", dtype=dtype) 
Example 61
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_read_json_table_convert_axes_raises(self):
        # GH25433 GH25435
        df = DataFrame([[1, 2], [3, 4]], index=[1.0, 2.0], columns=["1.", "2."])
        dfjson = df.to_json(orient="table")
        msg = "cannot pass both convert_axes and orient='table'"
        with pytest.raises(ValueError, match=msg):
            pd.read_json(dfjson, orient="table", convert_axes=True) 
Example 62
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_index_false_from_json_to_json(self, orient, index):
        # GH25170
        # Test index=False in from_json to_json
        expected = DataFrame({"a": [1, 2], "b": [3, 4]})
        dfjson = expected.to_json(orient=orient, index=index)
        result = read_json(dfjson, orient=orient)
        assert_frame_equal(result, expected) 
Example 63
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_pandas.py    MIT License 5 votes vote down vote up
def test_read_timezone_information(self):
        # GH 25546
        result = read_json(
            '{"2019-01-01T11:00:00.000Z":88}', typ="series", orient="index"
        )
        expected = Series([88], index=DatetimeIndex(["2019-01-01 11:00:00"], tz="UTC"))
        assert_series_equal(result, expected) 
Example 64
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_json_table_schema.py    MIT License 5 votes vote down vote up
def test_read_json_table_orient_raises(self, index_nm, vals, recwarn):
        df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
        out = df.to_json(orient="table")
        with pytest.raises(NotImplementedError, match="can not yet read "):
            pd.read_json(out, orient="table") 
Example 65
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_json_table_schema.py    MIT License 5 votes vote down vote up
def test_comprehensive(self):
        df = DataFrame(
            {
                "A": [1, 2, 3, 4],
                "B": ["a", "b", "c", "c"],
                "C": pd.date_range("2016-01-01", freq="d", periods=4),
                # 'D': pd.timedelta_range('1H', periods=4, freq='T'),
                "E": pd.Series(pd.Categorical(["a", "b", "c", "c"])),
                "F": pd.Series(pd.Categorical(["a", "b", "c", "c"], ordered=True)),
                "G": [1.1, 2.2, 3.3, 4.4],
                # 'H': pd.date_range('2016-01-01', freq='d', periods=4,
                #                   tz='US/Central'),
                "I": [True, False, False, True],
            },
            index=pd.Index(range(4), name="idx"),
        )

        out = df.to_json(orient="table")
        result = pd.read_json(out, orient="table")
        tm.assert_frame_equal(df, result) 
Example 66
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_json_table_schema.py    MIT License 5 votes vote down vote up
def test_multiindex(self, index_names):
        # GH 18912
        df = pd.DataFrame(
            [["Arr", "alpha", [1, 2, 3, 4]], ["Bee", "Beta", [10, 20, 30, 40]]],
            index=[["A", "B"], ["Null", "Eins"]],
            columns=["Aussprache", "Griechisch", "Args"],
        )
        df.index.names = index_names
        out = df.to_json(orient="table")
        result = pd.read_json(out, orient="table")
        tm.assert_frame_equal(df, result) 
Example 67
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_json_table_schema.py    MIT License 5 votes vote down vote up
def test_empty_frame_roundtrip(self):
        # GH 21287
        df = pd.DataFrame(columns=["a", "b", "c"])
        expected = df.copy()
        out = df.to_json(orient="table")
        result = pd.read_json(out, orient="table")
        tm.assert_frame_equal(expected, result) 
Example 68
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_compression.py    MIT License 5 votes vote down vote up
def test_compression_roundtrip(compression):
    df = pd.DataFrame(
        [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
        index=["A", "B"],
        columns=["X", "Y", "Z"],
    )

    with tm.ensure_clean() as path:
        df.to_json(path, compression=compression)
        assert_frame_equal(df, pd.read_json(path, compression=compression))

        # explicitly ensure file was compressed.
        with tm.decompress_file(path, compression) as fh:
            result = fh.read().decode("utf8")
        assert_frame_equal(df, pd.read_json(result)) 
Example 69
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_compression.py    MIT License 5 votes vote down vote up
def test_with_s3_url(compression, s3_resource):
    # Bucket "pandas-test" created in tests/io/conftest.py

    df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')

    with tm.ensure_clean() as path:
        df.to_json(path, compression=compression)
        with open(path, "rb") as f:
            s3_resource.Bucket("pandas-test").put_object(Key="test-1", Body=f)

    roundtripped_df = pd.read_json("s3://pandas-test/test-1", compression=compression)
    assert_frame_equal(df, roundtripped_df) 
Example 70
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_compression.py    MIT License 5 votes vote down vote up
def test_lines_with_compression(compression):

    with tm.ensure_clean() as path:
        df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
        df.to_json(path, orient="records", lines=True, compression=compression)
        roundtripped_df = pd.read_json(path, lines=True, compression=compression)
        assert_frame_equal(df, roundtripped_df) 
Example 71
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_compression.py    MIT License 5 votes vote down vote up
def test_chunksize_with_compression(compression):

    with tm.ensure_clean() as path:
        df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}')
        df.to_json(path, orient="records", lines=True, compression=compression)

        res = pd.read_json(path, lines=True, chunksize=1, compression=compression)
        roundtripped_df = pd.concat(res)
        assert_frame_equal(df, roundtripped_df) 
Example 72
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_compression.py    MIT License 5 votes vote down vote up
def test_write_unsupported_compression_type():
    df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
    with tm.ensure_clean() as path:
        msg = "Unrecognized compression type: unsupported"
        with pytest.raises(ValueError, match=msg):
            df.to_json(path, compression="unsupported") 
Example 73
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_compression.py    MIT License 5 votes vote down vote up
def test_read_unsupported_compression_type():
    with tm.ensure_clean() as path:
        msg = "Unrecognized compression type: unsupported"
        with pytest.raises(ValueError, match=msg):
            pd.read_json(path, compression="unsupported") 
Example 74
Project: QuantStudio   Author: Scorpi000   File: SQLRDB.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_readCov__(self, dts, ids=None):
        SQLStr = "SELECT DateTime, Cov "
        SQLStr += "FROM "+self._DBTableName+" "
        SQLStr += "WHERE ("+genSQLInCondition("DateTime", [iDT.strftime("%Y-%m-%d %H:%M:%S.%f") for iDT in dts], is_str=True, max_num=1000)+") "
        Data = {}
        for iDT, iCov in self._RiskDB.fetchall(SQLStr):
            iCov = pd.read_json(iCov, orient="split")
            iCov.index = iCov.columns
            if ids is not None:
                if iCov.index.intersection(ids).shape[0]>0: iCov = iCov.loc[ids, ids]
                else: iCov = pd.DataFrame(index=ids, columns=ids)
            Data[iDT] = iCov
        if Data: return pd.Panel(Data).loc[dts]
        return pd.Panel(items=dts, major_axis=ids, minor_axis=ids) 
Example 75
Project: QuantStudio   Author: Scorpi000   File: SQLRDB.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_readFactorCov__(self, dts):
        SQLStr = "SELECT DateTime, FactorCov "
        SQLStr += "FROM "+self._DBTableName+" "
        SQLStr += "WHERE FactorCov IS NOT NULL "
        SQLStr += "AND ("+genSQLInCondition("DateTime", [iDT.strftime("%Y-%m-%d %H:%M:%S.%f") for iDT in dts], is_str=True, max_num=1000)+") "
        Data = {}
        for iDT, iCov in self._RiskDB.fetchall(SQLStr):
            iCov = pd.read_json(iCov, orient="split")
            iCov.index = iCov.columns
            Data[iDT] = iCov
        if Data: return pd.Panel(Data).loc[dts]
        return pd.Panel(items=dts) 
Example 76
Project: sklearn2docker   Author: KhaledSharif   File: keras_classifier_test.py    GNU Lesser General Public License v3.0 4 votes vote down vote up
def test_binary_classifier(self):
        from keras.wrappers.scikit_learn import KerasClassifier
        from sklearn.pipeline import Pipeline
        from sklearn.datasets import load_breast_cancer
        from sklearn import preprocessing
        from pandas import DataFrame
        from numpy import array
        from os import system
        from pandas import read_json
        from requests import post

        breast_cancer = load_breast_cancer()
        input_df = DataFrame(data=breast_cancer['data'], columns=breast_cancer['feature_names'])
        model = Pipeline([
            ('rescale', preprocessing.StandardScaler()),
            ('min_max', preprocessing.MinMaxScaler((-1, 1,))),
            ('nn', KerasClassifier(build_fn=self.create_binary_classification_model, epochs=1, verbose=1)),
        ])
        X, Y = input_df.values, array(breast_cancer['target'])
        model.fit(X, Y)

        # convert classifier to Docker container
        from sklearn2docker.constructor import Sklearn2Docker
        s2d = Sklearn2Docker(
            classifier=model,
            feature_names=list(input_df),
            class_names=breast_cancer['target_names'].tolist(),
        )
        s2d.save(
            name="classifier",
            tag="keras",
        )

        # # run your Docker container as a detached process
        system("docker run -d -p {}:5000 --name {} classifier:keras && sleep 5".format(self.port, self.container_name))

        # send your training data as a json string
        request = post("http://localhost:{}/predict/split".format(self.port), json=input_df.to_json(orient="split"))
        result = read_json(request.content.decode(), orient="split")
        self.assertEqual(len(list(result)), 1)
        self.assertEqual(len(result), len(input_df))

        request = post("http://localhost:{}/predict_proba/split".format(self.port), json=input_df.to_json(orient="split"))
        result = read_json(request.content.decode(), orient="split")
        self.assertEqual(len(list(result)), 1)
        self.assertEqual(len(result), len(input_df)) 
Example 77
Project: sklearn2docker   Author: KhaledSharif   File: keras_classifier_test.py    GNU Lesser General Public License v3.0 4 votes vote down vote up
def test_categorical_classifier(self):
        from keras.wrappers.scikit_learn import KerasClassifier
        from sklearn.pipeline import Pipeline
        from sklearn.datasets import load_iris
        from sklearn import preprocessing
        from pandas import DataFrame
        from numpy import array
        from os import system
        from pandas import read_json
        from requests import post

        iris = load_iris()
        input_df = DataFrame(data=iris['data'], columns=iris['feature_names'])
        model = Pipeline([
            ('rescale', preprocessing.StandardScaler()),
            ('min_max', preprocessing.MinMaxScaler((-1, 1,))),
            ('nn', KerasClassifier(build_fn=self.create_categorical_classification_model, epochs=1, verbose=1)),
        ])
        X, Y = input_df.values, array(iris['target'])
        model.fit(X, Y)

        # convert classifier to Docker container
        from sklearn2docker.constructor import Sklearn2Docker
        s2d = Sklearn2Docker(
            classifier=model,
            feature_names=list(input_df),
            class_names=iris['target_names'].tolist(),
        )
        s2d.save(
            name="classifier",
            tag="keras",
        )

        # # run your Docker container as a detached process
        system("docker run -d -p {}:5000 --name {} classifier:keras && sleep 5".format(self.port, self.container_name))

        # send your training data as a json string
        request = post("http://localhost:{}/predict/split".format(self.port), json=input_df.to_json(orient="split"))
        result = read_json(request.content.decode(), orient="split")
        self.assertEqual(len(list(result)), 1)
        self.assertEqual(len(result), len(input_df))

        request = post("http://localhost:{}/predict_proba/split".format(self.port), json=input_df.to_json(orient="split"))
        result = read_json(request.content.decode(), orient="split")
        self.assertEqual(len(list(result)), 3)
        self.assertEqual(len(result), len(input_df)) 
Example 78
Project: sklearn2docker   Author: KhaledSharif   File: api.py    GNU Lesser General Public License v3.0 4 votes vote down vote up
def perform_prediction(probabilistic, orient) -> str:
    global tensorflow_default_graph
    global classifier

    # attempt to retrieve data as json string, else fail
    data = request.get_json(force=True)

    # convert json to pandas dataframe
    data = read_json(data, orient=orient)

    # reorder dataframe with our expected column names
    data = data[classifier.expected_column_names]

    # perform prediction
    if not probabilistic:
        if tensorflow_default_graph:
            with tensorflow_default_graph.as_default():
                prediction = classifier.classifier_object.predict(data.values)[:, 0].tolist()
                prediction = [1 if x > 0.5 else 0 for x in prediction]
        else:
            prediction = classifier.classifier_object.predict(data.values).tolist()

        prediction = [classifier.class_names[x] for x in prediction]
        prediction_dataframe = DataFrame()
        prediction_dataframe["prediction"] = prediction
    else:
        if tensorflow_default_graph:
            with tensorflow_default_graph.as_default():
                prediction = classifier.classifier_object.predict_proba(data.values)[:, 0]

                print(prediction)
        else:
            prediction = classifier.classifier_object.predict_proba(data.values).tolist()

        prediction_dataframe = DataFrame(data=prediction, columns=classifier.class_names)

    # set the correct index
    prediction_dataframe.index = data.index

    # convert dataframe back to json string
    data = prediction_dataframe.to_json(orient=orient)

    return data 
Example 79
Project: Recession-Predictor   Author: tzhangwps   File: build_features_and_labels.py    MIT License 4 votes vote down vote up
def create_final_dataset(self):
        """
        Creates and saves the final dataset.
        """
        print('\nCreating final dataset...')
        self.secondary_df_output = pd.read_json(path.data_secondary_most_recent)
        self.secondary_df_output.sort_index(inplace=True)        
        self.final_df_output = self.secondary_df_output
        self.make_features()
        self.label_output()
        new_cols = ['Dates', 'Recession', 'Recession_in_6mo',
                    'Recession_in_12mo', 'Recession_in_24mo',
                    'Recession_within_6mo', 'Recession_within_12mo',
                    'Recession_within_24mo', 'Payrolls_3mo_pct_chg_annualized',
                    'Payrolls_12mo_pct_chg', 'Payrolls_3mo_vs_12mo',
                    'Unemployment_Rate', 'Unemployment_Rate_12mo_chg',
                    'Real_Fed_Funds_Rate', 'Real_Fed_Funds_Rate_12mo_chg',
                    'CPI_3mo_pct_chg_annualized', 'CPI_12mo_pct_chg',
                    'CPI_3mo_vs_12mo', '10Y_Treasury_Rate_12mo_chg',
                    '3M_Treasury_Rate_12mo_chg', '3M_10Y_Treasury_Spread',
                    '3M_10Y_Treasury_Spread_12mo_chg',
                    '5Y_10Y_Treasury_Spread', 'S&P_500_3mo_chg',
                    'S&P_500_12mo_chg', 'S&P_500_3mo_vs_12mo',
                    'IPI_3mo_pct_chg_annualized', 'IPI_12mo_pct_chg',
                    'IPI_3mo_vs_12mo']
        self.final_df_output = self.final_df_output[new_cols]
        print('Finished creating final dataset!')
        print('\t|--Saving final dataset to {}'.format(path.data_final))
        self.final_df_output.to_json(path.data_final)
        print('\nFinal dataset saved to {}'.format(path.data_final))
        
        
#MIT License
#
#Copyright (c) 2019 Terrence Zhang
#
#Permission is hereby granted, free of charge, to any person obtaining a copy
#of this software and associated documentation files (the "Software"), to deal
#in the Software without restriction, including without limitation the rights
#to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#copies of the Software, and to permit persons to whom the Software is
#furnished to do so, subject to the following conditions:
#
#The above copyright notice and this permission notice shall be included in all
#copies or substantial portions of the Software.
#
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
#SOFTWARE. 
Example 80
Project: Recession-Predictor   Author: tzhangwps   File: deployment_results.py    MIT License 4 votes vote down vote up
def plot_test_results(self):
        """
        Loads test results for the deployed model, and plots it into a single PDF.
        """        
        self.svm_test_results = pd.read_json(path.deployment_svm_test_results)
        self.svm_test_results.sort_index(inplace=True)
        print('\nPlotting test results...')
        self.pdf_object = PdfPages(path.deployment_results_plots)
        print('\t|--Plotting SVM test results...')
        self.plot_probabilities(dataframe=self.svm_test_results,
                                name='SVM', exponential=False)
        self.svm_test_results = pd.read_json(path.deployment_svm_test_results)
        self.svm_test_results.sort_index(inplace=True)
        self.plot_probabilities(dataframe=self.svm_test_results,
                                name='SVM EMA', exponential=True)  
        print('\nPlotted results saved to {}'.format(path.deployment_results_plots))
        self.svm_test_results = pd.read_json(path.deployment_svm_test_results)
        self.svm_test_results.sort_index(inplace=True)  
        self.create_chart_data(dataframe=self.svm_test_results)
        print('\nChart data saved to {}'.format(path.deployment_chart_data))
        self.pdf_object.close()
        
#MIT License
#
#Copyright (c) 2019 Terrence Zhang
#
#Permission is hereby granted, free of charge, to any person obtaining a copy
#of this software and associated documentation files (the "Software"), to deal
#in the Software without restriction, including without limitation the rights
#to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#copies of the Software, and to permit persons to whom the Software is
#furnished to do so, subject to the following conditions:
#
#The above copyright notice and this permission notice shall be included in all
#copies or substantial portions of the Software.
#
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
#SOFTWARE.