Python pandas.io.json.json_normalize() Examples

The following are 30 code examples of pandas.io.json.json_normalize(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.io.json , or try the search function .
Example #1
Source File: test_normalize.py    From recruit with Apache License 2.0 7 votes vote down vote up
def test_meta_name_conflict(self):
        data = [{'foo': 'hello',
                 'bar': 'there',
                 'data': [{'foo': 'something', 'bar': 'else'},
                          {'foo': 'something2', 'bar': 'else2'}]}]

        msg = (r"Conflicting metadata name (foo|bar),"
               " need distinguishing prefix")
        with pytest.raises(ValueError, match=msg):
            json_normalize(data, 'data', meta=['foo', 'bar'])

        result = json_normalize(data, 'data', meta=['foo', 'bar'],
                                meta_prefix='meta')

        for val in ['metafoo', 'metabar', 'foo', 'bar']:
            assert val in result 
Example #2
Source File: run_s2vt.py    From caption-guided-saliency with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def get_msr_vtt_data(cfg):
    #trainval data
    with open(cfg.trainval_annotations) as data_file:    
        data = json.load(data_file)
    
    sentences = json_normalize(data['sentences'])
    videos = json_normalize(data['videos'])
    train_vids = sentences.loc[sentences["video_id"].isin(videos[videos['split'] == "train"]["video_id"])]
    val_vids = sentences.loc[sentences["video_id"].isin(videos[videos['split'] == "validate"]["video_id"])]
    train_vids['video_path'] = train_vids['video_id'].map(lambda x: os.path.join(cfg.path_to_trainval_descriptors, x + "_incp_v3.npy"))  
    val_vids['video_path'] = val_vids['video_id'].map(lambda x: os.path.join(cfg.path_to_trainval_descriptors, x + "_incp_v3.npy"))
    
    #test data
    with open(cfg.test_annotations) as data_file:    
        data = json.load(data_file)
    sentences = json_normalize(data['sentences'])
    videos = json_normalize(data['videos'])
    test_vids = sentences.loc[sentences["video_id"].isin(videos[videos['split'] == "test"]["video_id"])]
    test_vids['video_path'] = test_vids['video_id'].map(lambda x: os.path.join(cfg.path_to_test_descriptors, x + "_incp_v3.npy"))
    
    return train_vids, val_vids, test_vids 
Example #3
Source File: test_normalize.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_simple_normalize_with_separator(self, deep_nested):
        # GH 14883
        result = json_normalize({'A': {'A': 1, 'B': 2}})
        expected = DataFrame([[1, 2]], columns=['A.A', 'A.B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_')
        expected = DataFrame([[1, 2]], columns=['A_A', 'A_B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3')
        expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize(deep_nested, ['states', 'cities'],
                                meta=['country', ['states', 'name']],
                                sep='_')
        expected = Index(['name', 'pop',
                          'country', 'states_name']).sort_values()
        assert result.columns.sort_values().equals(expected) 
Example #4
Source File: test_normalize.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_simple_normalize(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)

        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties', meta='state')
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
Example #5
Source File: test_normalize.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_simple_normalize(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)

        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties', meta='state')
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
Example #6
Source File: test_normalize.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_simple_normalize_with_separator(self, deep_nested):
        # GH 14883
        result = json_normalize({'A': {'A': 1, 'B': 2}})
        expected = DataFrame([[1, 2]], columns=['A.A', 'A.B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_')
        expected = DataFrame([[1, 2]], columns=['A_A', 'A_B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3')
        expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize(deep_nested, ['states', 'cities'],
                                meta=['country', ['states', 'name']],
                                sep='_')
        expected = Index(['name', 'pop',
                          'country', 'states_name']).sort_values()
        assert result.columns.sort_values().equals(expected) 
Example #7
Source File: test_normalize.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_missing_field(self, author_missing_data):
        # GH20030:
        result = json_normalize(author_missing_data)
        ex_data = [
            {'info': np.nan,
             'author_name.first': np.nan,
             'author_name.last_name': np.nan,
             'info.created_at': np.nan,
             'info.last_updated': np.nan},
            {'info': None,
             'author_name.first': 'Jane',
             'author_name.last_name': 'Doe',
             'info.created_at': '11/08/1993',
             'info.last_updated': '26/05/2012'}
        ]
        expected = DataFrame(ex_data)
        tm.assert_frame_equal(result, expected) 
Example #8
Source File: test_normalize.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_record_prefix(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties',
                                meta='state',
                                record_prefix='county_')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)
        expected = expected.rename(columns=lambda x: 'county_' + x)
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
Example #9
Source File: api.py    From mygeotab-python with Apache License 2.0 6 votes vote down vote up
def to_dataframe(self, normalize=False):
        """Transforms the data into a pandas DataFrame

        :param normalize: Whether or not to normalize any nested objects in the results into distinct columns.
        :type normalize: bool
        :rtype: pandas.DataFrame
        """
        try:
            import pandas
        except ImportError:
            raise ImportError("The 'pandas' package could not be imported")
        if normalize:
            from pandas.io.json import json_normalize

            return json_normalize(self.data)
        return pandas.DataFrame.from_dict(self.data) 
Example #10
Source File: test_normalize.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_more_deeply_nested(self, deep_nested):

        result = json_normalize(deep_nested, ['states', 'cities'],
                                meta=['country', ['states', 'name']])
        # meta_prefix={'states': 'state_'})

        ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3,
                   'states.name': ['California', 'California', 'Ohio', 'Ohio',
                                   'Bayern', 'Nordrhein-Westfalen',
                                   'Nordrhein-Westfalen'],
                   'name': ['San Francisco', 'Los Angeles', 'Columbus',
                            'Cleveland', 'Munich', 'Duesseldorf', 'Koeln'],
                   'pop': [12345, 12346, 1234, 1236, 12347, 1238, 1239]}

        expected = DataFrame(ex_data, columns=result.columns)
        tm.assert_frame_equal(result, expected) 
Example #11
Source File: eval.py    From video-caption-openNMT.pytorch with MIT License 6 votes vote down vote up
def main(opt):
    scorer = COCOScorer()
    gt_dataframe = json_normalize(
        json.load(open(opt["videoinfo_json"]))['sentences'])
    gts = convert_data_to_coco_scorer_format(gt_dataframe)
    samples = {}
    video_ids = open(opt['video_ids'])
    sents = open(opt['pred'])
    for video_id in video_ids:
        # strip file extensions
        video_id = video_id.split('.')[0]
        sent = sents.readline().strip()
        samples[video_id] = [{'image_id': video_id, 'caption': sent}]
    video_ids.close()
    sents.close()
    with suppress_stdout_stderr():
        valid_score = scorer.score(gts, samples, samples.keys())
    print(valid_score) 
Example #12
Source File: test_normalize.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_record_prefix(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties',
                                meta='state',
                                record_prefix='county_')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)
        expected = expected.rename(columns=lambda x: 'county_' + x)
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
Example #13
Source File: test_normalize.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_non_ascii_key(self):
        if compat.PY3:
            testjson = (
                b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' +
                b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]'
            ).decode('utf8')
        else:
            testjson = ('[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},'
                        '{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]')

        testdata = {
            u'sub.A': [1, 3],
            u'sub.B': [2, 4],
            b"\xc3\x9cnic\xc3\xb8de".decode('utf8'): [0, 1]
        }
        expected = DataFrame(testdata)

        result = json_normalize(json.loads(testjson))
        tm.assert_frame_equal(result, expected) 
Example #14
Source File: test_normalize.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_non_ascii_key(self):
        if compat.PY3:
            testjson = (
                b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' +
                b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]'
            ).decode('utf8')
        else:
            testjson = ('[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},'
                        '{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]')

        testdata = {
            u'sub.A': [1, 3],
            u'sub.B': [2, 4],
            b"\xc3\x9cnic\xc3\xb8de".decode('utf8'): [0, 1]
        }
        expected = DataFrame(testdata)

        result = json_normalize(json.loads(testjson))
        tm.assert_frame_equal(result, expected) 
Example #15
Source File: test_json_norm.py    From Computable with MIT License 6 votes vote down vote up
def test_simple_normalize(self):
        result = json_normalize(self.state_data[0], 'counties')
        expected = DataFrame(self.state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(self.state_data, 'counties')

        expected = []
        for rec in self.state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)

        tm.assert_frame_equal(result, expected)

        result = json_normalize(self.state_data, 'counties', meta='state')
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
Example #16
Source File: test_normalize.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_more_deeply_nested(self, deep_nested):

        result = json_normalize(deep_nested, ['states', 'cities'],
                                meta=['country', ['states', 'name']])
        # meta_prefix={'states': 'state_'})

        ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3,
                   'states.name': ['California', 'California', 'Ohio', 'Ohio',
                                   'Bayern', 'Nordrhein-Westfalen',
                                   'Nordrhein-Westfalen'],
                   'name': ['San Francisco', 'Los Angeles', 'Columbus',
                            'Cleveland', 'Munich', 'Duesseldorf', 'Koeln'],
                   'pop': [12345, 12346, 1234, 1236, 12347, 1238, 1239]}

        expected = DataFrame(ex_data, columns=result.columns)
        tm.assert_frame_equal(result, expected) 
Example #17
Source File: test_normalize.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_simple_normalize_with_separator(self, deep_nested):
        # GH 14883
        result = json_normalize({'A': {'A': 1, 'B': 2}})
        expected = DataFrame([[1, 2]], columns=['A.A', 'A.B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_')
        expected = DataFrame([[1, 2]], columns=['A_A', 'A_B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3')
        expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize(deep_nested, ['states', 'cities'],
                                meta=['country', ['states', 'name']],
                                sep='_')
        expected = Index(['name', 'pop',
                          'country', 'states_name']).sort_values()
        assert result.columns.sort_values().equals(expected) 
Example #18
Source File: visualize.py    From vecto with Mozilla Public License 2.0 6 votes vote down vote up
def df_from_file(path):
    data = load_json(path)
    # meta = [["experiment_setup", "task"],
    #         ["experiment_setup", "subcategory"],
    #         ["experiment_setup", "method"],
    #         ["experiment_setup", "embeddings"]]
    dframe = json_normalize(data)
    if "details" in dframe:
        dframe.drop("details", axis="columns", inplace=True)
    default_measurement = "accuracy"
    try:
        default_measurement = dframe["experiment_setup.default_measurement"].unique()[0]
    except KeyError:
        logger.warning(f"default_measurement not specified in {path}")
    dframe["result"] = dframe["result." + default_measurement]
    # df["reciprocal_rank"] = 1 / (df["rank"] + 1)
    return dframe 
Example #19
Source File: test_normalize.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_simple_normalize(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)

        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties', meta='state')
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
Example #20
Source File: test_normalize.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_simple_normalize(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)

        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties', meta='state')
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
Example #21
Source File: test_normalize.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_simple_normalize_with_separator(self, deep_nested):
        # GH 14883
        result = json_normalize({'A': {'A': 1, 'B': 2}})
        expected = DataFrame([[1, 2]], columns=['A.A', 'A.B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_')
        expected = DataFrame([[1, 2]], columns=['A_A', 'A_B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3')
        expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize(deep_nested, ['states', 'cities'],
                                meta=['country', ['states', 'name']],
                                sep='_')
        expected = Index(['name', 'pop',
                          'country', 'states_name']).sort_values()
        assert result.columns.sort_values().equals(expected) 
Example #22
Source File: test_normalize.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_more_deeply_nested(self, deep_nested):

        result = json_normalize(deep_nested, ['states', 'cities'],
                                meta=['country', ['states', 'name']])
        # meta_prefix={'states': 'state_'})

        ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3,
                   'states.name': ['California', 'California', 'Ohio', 'Ohio',
                                   'Bayern', 'Nordrhein-Westfalen',
                                   'Nordrhein-Westfalen'],
                   'name': ['San Francisco', 'Los Angeles', 'Columbus',
                            'Cleveland', 'Munich', 'Duesseldorf', 'Koeln'],
                   'pop': [12345, 12346, 1234, 1236, 12347, 1238, 1239]}

        expected = DataFrame(ex_data, columns=result.columns)
        tm.assert_frame_equal(result, expected) 
Example #23
Source File: test_normalize.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_meta_name_conflict(self):
        data = [{'foo': 'hello',
                 'bar': 'there',
                 'data': [{'foo': 'something', 'bar': 'else'},
                          {'foo': 'something2', 'bar': 'else2'}]}]

        msg = (r"Conflicting metadata name (foo|bar),"
               " need distinguishing prefix")
        with pytest.raises(ValueError, match=msg):
            json_normalize(data, 'data', meta=['foo', 'bar'])

        result = json_normalize(data, 'data', meta=['foo', 'bar'],
                                meta_prefix='meta')

        for val in ['metafoo', 'metabar', 'foo', 'bar']:
            assert val in result 
Example #24
Source File: frmt.py    From bitQuant with MIT License 6 votes vote down vote up
def format_df(response, job):

    if job['type'] == 'trades':
        if job['exchange'] == 'btce':
            for col in response:
                response = response[col]
        df = json_normalize(response)
        if job['exchange'] == 'coinbase':
            df['time'] = to_datetime(df['time'], utc=0)
            df['timestamp'] = df['time'].astype(np.int64) // 10**9
    else:
        df = json_normalize(response)

    df = standard_columns(df)

    if 'exchange' not in df:
        df['exchange'] = job['exchange']
    if 'symbol' not in df:
        df['symbol'] = job['symbol']
    return df

#|Standardize column names and drop columns not in dictionary below 
Example #25
Source File: test_normalize.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_record_prefix(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties',
                                meta='state',
                                record_prefix='county_')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)
        expected = expected.rename(columns=lambda x: 'county_' + x)
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
Example #26
Source File: test_normalize.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_missing_field(self, author_missing_data):
        # GH20030:
        result = json_normalize(author_missing_data)
        ex_data = [
            {'info': np.nan,
             'author_name.first': np.nan,
             'author_name.last_name': np.nan,
             'info.created_at': np.nan,
             'info.last_updated': np.nan},
            {'info': None,
             'author_name.first': 'Jane',
             'author_name.last_name': 'Doe',
             'info.created_at': '11/08/1993',
             'info.last_updated': '26/05/2012'}
        ]
        expected = DataFrame(ex_data)
        tm.assert_frame_equal(result, expected) 
Example #27
Source File: query.py    From lixinger-openapi with Apache License 2.0 6 votes vote down vote up
def query_dataframe(url_suffix, query_params):
    '''
    API接口,返回字典结构
    key     value       type
    code    返回值       int
    data    返回结果     dataframe
    msg     返回消息     string
    '''
    return_value = {'code': -1, 'data': None, 'msg': ''}
    rlt = query_json(url_suffix, query_params)
    if rlt is None:
        return_value['msg'] = 'query failed.'
    else:
        if 'code' in rlt.keys():
            return_value['code'] = rlt['code']
        if 'msg' in rlt.keys():
            return_value['msg'] = rlt['msg']
        if 'data' in rlt.keys():
            return_value['data'] = json_normalize(rlt['data'])
    return return_value 
Example #28
Source File: run_s2vt.py    From caption-guided-saliency with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def get_flickr30k_data(cfg):
    #using the provided splits
    train_split = set(map(lambda x: x.split(".")[0], open(cfg.train_file).read().splitlines()))
    val_split = set(map(lambda x: x.split(".")[0], open(cfg.val_file).read().splitlines()))
    test_split = set(map(lambda x: x.split(".")[0], open(cfg.test_file).read().splitlines()))
    
    data = [{"video_id": item.split(".")[0], "sentence_id": item.split("#")[1].split("\t")[0], "caption":item.split("\t")[1]}
            for item in open(cfg.annotations_path).read().splitlines()]
    
    sentences = json_normalize(data)
    sentences['video_path'] = sentences['video_id'].map(lambda x: os.path.join(cfg.path_to_descriptors, x + cfg.descriptor_suffix + ".npy"))
    
    train_imgs = sentences.loc[sentences["video_id"].isin(train_split)]
    train_imgs.reset_index()
    
    val_imgs = sentences.loc[sentences["video_id"].isin(val_split)]
    val_imgs.reset_index()
    
    test_imgs = sentences.loc[sentences["video_id"].isin(test_split)]
    test_imgs.reset_index()
    
    return train_imgs, val_imgs, test_imgs 
Example #29
Source File: test_normalize.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_record_prefix(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties',
                                meta='state',
                                record_prefix='county_')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)
        expected = expected.rename(columns=lambda x: 'county_' + x)
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
Example #30
Source File: record_packets_to_csv.py    From network-pipeline with Apache License 2.0 5 votes vote down vote up
def process_raw_frame(self,
                          id=None,
                          msg=None):
        """process_raw_frame

        Convert a complex nested json dictionary
        to a flattened dictionary and capture
        all unique keys for table construction

        :param id: key for this msg
        :param msg: raw frame for packet
        """

        # normalize into a dataframe
        df = json_normalize(msg)
        # convert to a flattened dictionary
        dt = json.loads(df.to_json())

        flat_msg = {}

        for k in dt:
            new_key = "raw_{}".format(k)
            flat_msg[new_key] = dt[k]["0"]
            if new_key not in self.raw_keys:
                self.raw_keys[new_key] = k
        # end of capturing all unique keys

        dt["raw_id"] = id
        self.all_raw.append(dt)

        log.debug("RAW data updated:")
        log.debug(self.raw_keys)
        log.debug(self.all_raw)
        log.debug("")

        return flat_msg
    # end of process_raw_frame