Python pandas.read_json() Examples

The following are 30 code examples of pandas.read_json(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: test_json_table_schema.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_comprehensive(self):
        df = DataFrame(
            {'A': [1, 2, 3, 4],
             'B': ['a', 'b', 'c', 'c'],
             'C': pd.date_range('2016-01-01', freq='d', periods=4),
             # 'D': pd.timedelta_range('1H', periods=4, freq='T'),
             'E': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'])),
             'F': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'],
                                           ordered=True)),
             'G': [1.1, 2.2, 3.3, 4.4],
             # 'H': pd.date_range('2016-01-01', freq='d', periods=4,
             #                   tz='US/Central'),
             'I': [True, False, False, True],
             },
            index=pd.Index(range(4), name='idx'))

        out = df.to_json(orient="table")
        result = pd.read_json(out, orient="table")
        tm.assert_frame_equal(df, result) 
Example #2
Source File: sql_dash_dropdown.py    From dash-recipes with MIT License 6 votes vote down vote up
def dff_to_table(dff_json, dropdown_x, dropdown_y):
    dff = pd.read_json(dff_json)
    return {
        'data': [{
            'x': dff[dropdown_x],
            'y': dff[dropdown_y],
            'type': 'bar'
        }],
        'layout': {
            'margin': {
                'l': 20,
                'r': 10,
                'b': 60,
                't': 10
            }
        }
    } 
Example #3
Source File: test_pandas.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_date_format_frame(self):
        df = self.tsframe.copy()

        def test_w_date(date, date_unit=None):
            df['date'] = Timestamp(date)
            df.iloc[1, df.columns.get_loc('date')] = pd.NaT
            df.iloc[5, df.columns.get_loc('date')] = pd.NaT
            if date_unit:
                json = df.to_json(date_format='iso', date_unit=date_unit)
            else:
                json = df.to_json(date_format='iso')
            result = read_json(json)
            assert_frame_equal(result, df)

        test_w_date('20130101 20:43:42.123')
        test_w_date('20130101 20:43:42', date_unit='s')
        test_w_date('20130101 20:43:42.123', date_unit='ms')
        test_w_date('20130101 20:43:42.123456', date_unit='us')
        test_w_date('20130101 20:43:42.123456789', date_unit='ns')

        msg = "Invalid value 'foo' for option 'date_unit'"
        with pytest.raises(ValueError, match=msg):
            df.to_json(date_format='iso', date_unit='foo') 
Example #4
Source File: test_pandas.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_v12_compat(self):
        df = DataFrame(
            [[1.56808523, 0.65727391, 1.81021139, -0.17251653],
             [-0.2550111, -0.08072427, -0.03202878, -0.17581665],
             [1.51493992, 0.11805825, 1.629455, -1.31506612],
             [-0.02765498, 0.44679743, 0.33192641, -0.27885413],
             [0.05951614, -2.69652057, 1.28163262, 0.34703478]],
            columns=['A', 'B', 'C', 'D'],
            index=pd.date_range('2000-01-03', '2000-01-07'))
        df['date'] = pd.Timestamp('19920106 18:21:32.12')
        df.iloc[3, df.columns.get_loc('date')] = pd.Timestamp('20130101')
        df['modified'] = df['date']
        df.iloc[1, df.columns.get_loc('modified')] = pd.NaT

        v12_json = os.path.join(self.dirpath, 'tsframe_v012.json')
        df_unser = pd.read_json(v12_json)
        assert_frame_equal(df, df_unser)

        df_iso = df.drop(['modified'], axis=1)
        v12_iso_json = os.path.join(self.dirpath, 'tsframe_iso_v012.json')
        df_unser_iso = pd.read_json(v12_iso_json)
        assert_frame_equal(df_iso, df_unser_iso) 
Example #5
Source File: test_pandas.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_date_format_series(self):
        def test_w_date(date, date_unit=None):
            ts = Series(Timestamp(date), index=self.ts.index)
            ts.iloc[1] = pd.NaT
            ts.iloc[5] = pd.NaT
            if date_unit:
                json = ts.to_json(date_format='iso', date_unit=date_unit)
            else:
                json = ts.to_json(date_format='iso')
            result = read_json(json, typ='series')
            assert_series_equal(result, ts)

        test_w_date('20130101 20:43:42.123')
        test_w_date('20130101 20:43:42', date_unit='s')
        test_w_date('20130101 20:43:42.123', date_unit='ms')
        test_w_date('20130101 20:43:42.123456', date_unit='us')
        test_w_date('20130101 20:43:42.123456789', date_unit='ns')

        ts = Series(Timestamp('20130101 20:43:42.123'), index=self.ts.index)
        msg = "Invalid value 'foo' for option 'date_unit'"
        with pytest.raises(ValueError, match=msg):
            ts.to_json(date_format='iso', date_unit='foo') 
Example #6
Source File: test_pandas.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_date_unit(self):
        df = self.tsframe.copy()
        df['date'] = Timestamp('20130101 20:43:42')
        dl = df.columns.get_loc('date')
        df.iloc[1, dl] = Timestamp('19710101 20:43:42')
        df.iloc[2, dl] = Timestamp('21460101 20:43:42')
        df.iloc[4, dl] = pd.NaT

        for unit in ('s', 'ms', 'us', 'ns'):
            json = df.to_json(date_format='epoch', date_unit=unit)

            # force date unit
            result = read_json(json, date_unit=unit)
            assert_frame_equal(result, df)

            # detect date unit
            result = read_json(json, date_unit=None)
            assert_frame_equal(result, df) 
Example #7
Source File: test_pandas.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_weird_nested_json(self):
        # this used to core dump the parser
        s = r'''{
        "status": "success",
        "data": {
        "posts": [
            {
            "id": 1,
            "title": "A blog post",
            "body": "Some useful content"
            },
            {
            "id": 2,
            "title": "Another blog post",
            "body": "More content"
            }
           ]
          }
        }'''

        read_json(s) 
Example #8
Source File: test_pandas.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_misc_example(self):

        # parsing unordered input fails
        result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]', numpy=True)
        expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])

        error_msg = """DataFrame\\.index are different

DataFrame\\.index values are different \\(100\\.0 %\\)
\\[left\\]:  Index\\(\\[u?'a', u?'b'\\], dtype='object'\\)
\\[right\\]: RangeIndex\\(start=0, stop=2, step=1\\)"""
        with pytest.raises(AssertionError, match=error_msg):
            assert_frame_equal(result, expected, check_index_type=False)

        result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]')
        expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
        assert_frame_equal(result, expected) 
Example #9
Source File: test_readlines.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_readjson_chunks_multiple_empty_lines(chunksize):
    j = """

    {"A":1,"B":4}



    {"A":2,"B":5}







    {"A":3,"B":6}
    """
    orig = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
    test = pd.read_json(j, lines=True, chunksize=chunksize)
    if chunksize is not None:
        test = pd.concat(test)
    tm.assert_frame_equal(
        orig, test, obj="chunksize: {chunksize}".format(chunksize=chunksize)) 
Example #10
Source File: utils_mnli.py    From interpret-text with MIT License 6 votes vote down vote up
def load_mnli_pandas_df(local_cache_path=".", file_split="train"):
    """Loads extracted test_utils into pandas
    Args:
        local_cache_path ([type], optional): [description].
            Defaults to current working directory.
        file_split (str, optional): The subset to load.
            One of: {"train", "dev_matched", "dev_mismatched"}
            Defaults to "train".
    Returns:
        pd.DataFrame: pandas DataFrame containing the specified
            MultiNLI subset.
    """
    try:
        download_file_and_extract(local_cache_path, file_split)
    except Exception as e:
        raise e
    return pd.read_json(
        os.path.join(local_cache_path, DATA_FILES[file_split]), lines=True
    ) 
Example #11
Source File: run_han.py    From DeepResearch with MIT License 6 votes vote down vote up
def main():
    """
    A small tutorial to use HAN module
    """
    filename = './News_Category_Dataset/News_Category_Dataset.json'
    df = pd.read_json(filename, lines=True).reset_index()
    df = preprocessing(df)
    han_network = HAN.HAN(text = df.text, labels = df.category, num_categories = 30, pretrained_embedded_vector_path = './glove.6B/glove.6B.100d.txt', max_features = 200000, max_senten_len = 150, max_senten_num = 4 , embedding_size = 100, validation_split=0.2, verbose=1)
    print(han_network.get_model().summary())
    han_network.show_hyperparameters()
    ## How to change hyperparameters
    # Let's add regularizers
    # To replace a hyperparameter change the corresponding key value to the new value in set_hyperparameters
    han_network.set_hyperparameters({'l2_regulizer': 1e-13, 'dropout_regulizer': 0.5})
    han_network.show_hyperparameters()
    print(han_network.get_model().summary())
    han_network.train_model(epochs=3, batch_size=16,
                            best_model_path='./best_model.h5') 
Example #12
Source File: test_readlines.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_to_jsonl():
    # GH9180
    df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
    result = df.to_json(orient="records", lines=True)
    expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
    assert result == expected

    df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b'])
    result = df.to_json(orient="records", lines=True)
    expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
    assert result == expected
    assert_frame_equal(read_json(result, lines=True), df)

    # GH15096: escaped characters in columns and data
    df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]],
                   columns=["a\\", 'b'])
    result = df.to_json(orient="records", lines=True)
    expected = ('{"a\\\\":"foo\\\\","b":"bar"}\n'
                '{"a\\\\":"foo\\"","b":"bar"}')
    assert result == expected
    assert_frame_equal(read_json(result, lines=True), df) 
Example #13
Source File: test_readlines.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_read_jsonl_unicode_chars():
    # GH15132: non-ascii unicode characters
    # \u201d == RIGHT DOUBLE QUOTATION MARK

    # simulate file handle
    json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
    json = StringIO(json)
    result = read_json(json, lines=True)
    expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
                         columns=['a', 'b'])
    assert_frame_equal(result, expected)

    # simulate string
    json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
    result = read_json(json, lines=True)
    expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
                         columns=['a', 'b'])
    assert_frame_equal(result, expected) 
Example #14
Source File: test_readlines.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_read_jsonl_unicode_chars():
    # GH15132: non-ascii unicode characters
    # \u201d == RIGHT DOUBLE QUOTATION MARK

    # simulate file handle
    json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
    json = StringIO(json)
    result = read_json(json, lines=True)
    expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
                         columns=['a', 'b'])
    assert_frame_equal(result, expected)

    # simulate string
    json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
    result = read_json(json, lines=True)
    expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
                         columns=['a', 'b'])
    assert_frame_equal(result, expected) 
Example #15
Source File: test_readlines.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_to_jsonl():
    # GH9180
    df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
    result = df.to_json(orient="records", lines=True)
    expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
    assert result == expected

    df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b'])
    result = df.to_json(orient="records", lines=True)
    expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
    assert result == expected
    assert_frame_equal(read_json(result, lines=True), df)

    # GH15096: escaped characters in columns and data
    df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]],
                   columns=["a\\", 'b'])
    result = df.to_json(orient="records", lines=True)
    expected = ('{"a\\\\":"foo\\\\","b":"bar"}\n'
                '{"a\\\\":"foo\\"","b":"bar"}')
    assert result == expected
    assert_frame_equal(read_json(result, lines=True), df) 
Example #16
Source File: test_pandas.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_read_jsonl_unicode_chars(self):
        # GH15132: non-ascii unicode characters
        # \u201d == RIGHT DOUBLE QUOTATION MARK

        # simulate file handle
        json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
        json = StringIO(json)
        result = read_json(json, lines=True)
        expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
                             columns=['a', 'b'])
        assert_frame_equal(result, expected)

        # simulate string
        json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
        result = read_json(json, lines=True)
        expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
                             columns=['a', 'b'])
        assert_frame_equal(result, expected) 
Example #17
Source File: test_compression.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_read_zipped_json(datapath):
    uncompressed_path = datapath("io", "json", "data", "tsframe_v012.json")
    uncompressed_df = pd.read_json(uncompressed_path)

    compressed_path = datapath("io", "json", "data", "tsframe_v012.json.zip")
    compressed_df = pd.read_json(compressed_path, compression='zip')

    assert_frame_equal(uncompressed_df, compressed_df) 
Example #18
Source File: test_compression.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_read_unsupported_compression_type():
    with tm.ensure_clean() as path:
        msg = "Unrecognized compression type: unsupported"
        with pytest.raises(ValueError, match=msg):
            pd.read_json(path, compression="unsupported") 
Example #19
Source File: exploredata.py    From MSMARCO with MIT License 5 votes vote down vote up
def general_stats_data_public(path):
    df = pd.read_json(path)
    query_type_label = {'LOCATION': 0, 'DESCRIPTION':0, 'NUMERIC':0, 'ENTITY':0, 'PERSON':0}
    total_size = len(df)
    for row in df.iterrows():
        category = row[1]['query_type']
        if category in query_type_label:
            query_type_label[category] += 1
    print('Columns:{}'.format(df.columns.values))
    print('{} queries'.format(total_size))
    print('----query distribution by dataset type----')
    for key in query_type_label:
        print(key + ',' + str(query_type_label[key])+ ',' + str(query_type_label[key]/total_size)) 
Example #20
Source File: test_compression.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_write_unsupported_compression_type():
    df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
    with tm.ensure_clean() as path:
        msg = "Unrecognized compression type: unsupported"
        with pytest.raises(ValueError, match=msg):
            df.to_json(path, compression="unsupported") 
Example #21
Source File: test_compression.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_lines_with_compression(compression):

    with tm.ensure_clean() as path:
        df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
        df.to_json(path, orient='records', lines=True,
                   compression=compression)
        roundtripped_df = pd.read_json(path, lines=True,
                                       compression=compression)
        assert_frame_equal(df, roundtripped_df) 
Example #22
Source File: get_stats_about_length.py    From MSMARCO with MIT License 5 votes vote down vote up
def main():
    file = sys.argv[1]
    df = pd.read_json(file)
    queries = {}
    answers = {}
    well_formed_answers = {}
    passages = {}
    
    for row in df.iterrows():
        queries[row[1]['query']] = 1
        for v in row[1]['answers']:
            answers[v] = 1
        for v in row[1]['wellFormedAnswers']:
            well_formed_answers[v] = 1
        for p in row[1]['passages']:
            passages[p['passage_text']] = 1
    data = {'queries' : queries, 'answers' : answers, 'well_formed_answers' : well_formed_answers, 'passages' : passages}
    for value in data:

        histogram = {}
        for v in data[value]:
            l = len(v.split())
            if l in histogram:
                histogram[l] += 1
            else:
                histogram[l] = 1
        compute_stats(histogram, value) 
Example #23
Source File: converttowellformed.py    From MSMARCO with MIT License 5 votes vote down vote up
def makewf(input,output):
    df = pd.read_json(input)
    df = df.drop('answers',1)
    df = df.rename(columns={'wellFormedAnswers':'answers'})
    df = df[df.answers != '[]']
    df.to_json(output)
    return 
Example #24
Source File: test_compression.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_with_s3_url(compression, s3_resource):
    # Bucket "pandas-test" created in tests/io/conftest.py

    df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')

    with tm.ensure_clean() as path:
        df.to_json(path, compression=compression)
        with open(path, 'rb') as f:
            s3_resource.Bucket("pandas-test").put_object(Key='test-1', Body=f)

    roundtripped_df = pd.read_json('s3://pandas-test/test-1',
                                   compression=compression)
    assert_frame_equal(df, roundtripped_df) 
Example #25
Source File: test_pandas.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_index_false_from_json_to_json(self, orient, index):
        # GH25170
        # Test index=False in from_json to_json
        expected = DataFrame({'a': [1, 2], 'b': [3, 4]})
        dfjson = expected.to_json(orient=orient, index=index)
        result = read_json(dfjson, orient=orient)
        assert_frame_equal(result, expected) 
Example #26
Source File: test_compression.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_compression_roundtrip(compression):
    df = pd.DataFrame([[0.123456, 0.234567, 0.567567],
                       [12.32112, 123123.2, 321321.2]],
                      index=['A', 'B'], columns=['X', 'Y', 'Z'])

    with tm.ensure_clean() as path:
        df.to_json(path, compression=compression)
        assert_frame_equal(df, pd.read_json(path,
                                            compression=compression))

        # explicitly ensure file was compressed.
        with tm.decompress_file(path, compression) as fh:
            result = fh.read().decode('utf8')
        assert_frame_equal(df, pd.read_json(result)) 
Example #27
Source File: test_json_table_schema.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_empty_frame_roundtrip(self, strict_check):
        # GH 21287
        df = pd.DataFrame([], columns=['a', 'b', 'c'])
        expected = df.copy()
        out = df.to_json(orient='table')
        result = pd.read_json(out, orient='table')
        # TODO: When DF coercion issue (#21345) is resolved tighten type checks
        tm.assert_frame_equal(expected, result,
                              check_dtype=strict_check,
                              check_index_type=strict_check) 
Example #28
Source File: test_json_table_schema.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_read_json_table_orient_raises(self, index_nm, vals, recwarn):
        df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
        out = df.to_json(orient="table")
        with pytest.raises(NotImplementedError, match='can not yet read '):
            pd.read_json(out, orient="table") 
Example #29
Source File: test_json_table_schema.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_read_json_table_orient(self, index_nm, vals, recwarn):
        df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
        out = df.to_json(orient="table")
        result = pd.read_json(out, orient="table")
        tm.assert_frame_equal(df, result) 
Example #30
Source File: test_pandas.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_read_inline_jsonl(self):
        # GH9180
        result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
        expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
        assert_frame_equal(result, expected)