Python pandas.io.json.json_normalize() Examples
The following are 30
code examples of pandas.io.json.json_normalize().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.io.json
, or try the search function
.
Example #1
Source File: test_normalize.py From recruit with Apache License 2.0 | 7 votes |
def test_meta_name_conflict(self): data = [{'foo': 'hello', 'bar': 'there', 'data': [{'foo': 'something', 'bar': 'else'}, {'foo': 'something2', 'bar': 'else2'}]}] msg = (r"Conflicting metadata name (foo|bar)," " need distinguishing prefix") with pytest.raises(ValueError, match=msg): json_normalize(data, 'data', meta=['foo', 'bar']) result = json_normalize(data, 'data', meta=['foo', 'bar'], meta_prefix='meta') for val in ['metafoo', 'metabar', 'foo', 'bar']: assert val in result
Example #2
Source File: run_s2vt.py From caption-guided-saliency with BSD 2-Clause "Simplified" License | 6 votes |
def get_msr_vtt_data(cfg): #trainval data with open(cfg.trainval_annotations) as data_file: data = json.load(data_file) sentences = json_normalize(data['sentences']) videos = json_normalize(data['videos']) train_vids = sentences.loc[sentences["video_id"].isin(videos[videos['split'] == "train"]["video_id"])] val_vids = sentences.loc[sentences["video_id"].isin(videos[videos['split'] == "validate"]["video_id"])] train_vids['video_path'] = train_vids['video_id'].map(lambda x: os.path.join(cfg.path_to_trainval_descriptors, x + "_incp_v3.npy")) val_vids['video_path'] = val_vids['video_id'].map(lambda x: os.path.join(cfg.path_to_trainval_descriptors, x + "_incp_v3.npy")) #test data with open(cfg.test_annotations) as data_file: data = json.load(data_file) sentences = json_normalize(data['sentences']) videos = json_normalize(data['videos']) test_vids = sentences.loc[sentences["video_id"].isin(videos[videos['split'] == "test"]["video_id"])] test_vids['video_path'] = test_vids['video_id'].map(lambda x: os.path.join(cfg.path_to_test_descriptors, x + "_incp_v3.npy")) return train_vids, val_vids, test_vids
Example #3
Source File: test_normalize.py From vnpy_crypto with MIT License | 6 votes |
def test_simple_normalize_with_separator(self, deep_nested): # GH 14883 result = json_normalize({'A': {'A': 1, 'B': 2}}) expected = DataFrame([[1, 2]], columns=['A.A', 'A.B']) tm.assert_frame_equal(result.reindex_like(expected), expected) result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_') expected = DataFrame([[1, 2]], columns=['A_A', 'A_B']) tm.assert_frame_equal(result.reindex_like(expected), expected) result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3') expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B']) tm.assert_frame_equal(result.reindex_like(expected), expected) result = json_normalize(deep_nested, ['states', 'cities'], meta=['country', ['states', 'name']], sep='_') expected = Index(['name', 'pop', 'country', 'states_name']).sort_values() assert result.columns.sort_values().equals(expected)
Example #4
Source File: test_normalize.py From recruit with Apache License 2.0 | 6 votes |
def test_simple_normalize(self, state_data): result = json_normalize(state_data[0], 'counties') expected = DataFrame(state_data[0]['counties']) tm.assert_frame_equal(result, expected) result = json_normalize(state_data, 'counties') expected = [] for rec in state_data: expected.extend(rec['counties']) expected = DataFrame(expected) tm.assert_frame_equal(result, expected) result = json_normalize(state_data, 'counties', meta='state') expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2]) tm.assert_frame_equal(result, expected)
Example #5
Source File: test_normalize.py From vnpy_crypto with MIT License | 6 votes |
def test_simple_normalize(self, state_data): result = json_normalize(state_data[0], 'counties') expected = DataFrame(state_data[0]['counties']) tm.assert_frame_equal(result, expected) result = json_normalize(state_data, 'counties') expected = [] for rec in state_data: expected.extend(rec['counties']) expected = DataFrame(expected) tm.assert_frame_equal(result, expected) result = json_normalize(state_data, 'counties', meta='state') expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2]) tm.assert_frame_equal(result, expected)
Example #6
Source File: test_normalize.py From recruit with Apache License 2.0 | 6 votes |
def test_simple_normalize_with_separator(self, deep_nested): # GH 14883 result = json_normalize({'A': {'A': 1, 'B': 2}}) expected = DataFrame([[1, 2]], columns=['A.A', 'A.B']) tm.assert_frame_equal(result.reindex_like(expected), expected) result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_') expected = DataFrame([[1, 2]], columns=['A_A', 'A_B']) tm.assert_frame_equal(result.reindex_like(expected), expected) result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3') expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B']) tm.assert_frame_equal(result.reindex_like(expected), expected) result = json_normalize(deep_nested, ['states', 'cities'], meta=['country', ['states', 'name']], sep='_') expected = Index(['name', 'pop', 'country', 'states_name']).sort_values() assert result.columns.sort_values().equals(expected)
Example #7
Source File: test_normalize.py From recruit with Apache License 2.0 | 6 votes |
def test_missing_field(self, author_missing_data): # GH20030: result = json_normalize(author_missing_data) ex_data = [ {'info': np.nan, 'author_name.first': np.nan, 'author_name.last_name': np.nan, 'info.created_at': np.nan, 'info.last_updated': np.nan}, {'info': None, 'author_name.first': 'Jane', 'author_name.last_name': 'Doe', 'info.created_at': '11/08/1993', 'info.last_updated': '26/05/2012'} ] expected = DataFrame(ex_data) tm.assert_frame_equal(result, expected)
Example #8
Source File: test_normalize.py From recruit with Apache License 2.0 | 6 votes |
def test_record_prefix(self, state_data): result = json_normalize(state_data[0], 'counties') expected = DataFrame(state_data[0]['counties']) tm.assert_frame_equal(result, expected) result = json_normalize(state_data, 'counties', meta='state', record_prefix='county_') expected = [] for rec in state_data: expected.extend(rec['counties']) expected = DataFrame(expected) expected = expected.rename(columns=lambda x: 'county_' + x) expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2]) tm.assert_frame_equal(result, expected)
Example #9
Source File: api.py From mygeotab-python with Apache License 2.0 | 6 votes |
def to_dataframe(self, normalize=False): """Transforms the data into a pandas DataFrame :param normalize: Whether or not to normalize any nested objects in the results into distinct columns. :type normalize: bool :rtype: pandas.DataFrame """ try: import pandas except ImportError: raise ImportError("The 'pandas' package could not be imported") if normalize: from pandas.io.json import json_normalize return json_normalize(self.data) return pandas.DataFrame.from_dict(self.data)
Example #10
Source File: test_normalize.py From recruit with Apache License 2.0 | 6 votes |
def test_more_deeply_nested(self, deep_nested): result = json_normalize(deep_nested, ['states', 'cities'], meta=['country', ['states', 'name']]) # meta_prefix={'states': 'state_'}) ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3, 'states.name': ['California', 'California', 'Ohio', 'Ohio', 'Bayern', 'Nordrhein-Westfalen', 'Nordrhein-Westfalen'], 'name': ['San Francisco', 'Los Angeles', 'Columbus', 'Cleveland', 'Munich', 'Duesseldorf', 'Koeln'], 'pop': [12345, 12346, 1234, 1236, 12347, 1238, 1239]} expected = DataFrame(ex_data, columns=result.columns) tm.assert_frame_equal(result, expected)
Example #11
Source File: eval.py From video-caption-openNMT.pytorch with MIT License | 6 votes |
def main(opt): scorer = COCOScorer() gt_dataframe = json_normalize( json.load(open(opt["videoinfo_json"]))['sentences']) gts = convert_data_to_coco_scorer_format(gt_dataframe) samples = {} video_ids = open(opt['video_ids']) sents = open(opt['pred']) for video_id in video_ids: # strip file extensions video_id = video_id.split('.')[0] sent = sents.readline().strip() samples[video_id] = [{'image_id': video_id, 'caption': sent}] video_ids.close() sents.close() with suppress_stdout_stderr(): valid_score = scorer.score(gts, samples, samples.keys()) print(valid_score)
Example #12
Source File: test_normalize.py From vnpy_crypto with MIT License | 6 votes |
def test_record_prefix(self, state_data): result = json_normalize(state_data[0], 'counties') expected = DataFrame(state_data[0]['counties']) tm.assert_frame_equal(result, expected) result = json_normalize(state_data, 'counties', meta='state', record_prefix='county_') expected = [] for rec in state_data: expected.extend(rec['counties']) expected = DataFrame(expected) expected = expected.rename(columns=lambda x: 'county_' + x) expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2]) tm.assert_frame_equal(result, expected)
Example #13
Source File: test_normalize.py From vnpy_crypto with MIT License | 6 votes |
def test_non_ascii_key(self): if compat.PY3: testjson = ( b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' + b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]' ).decode('utf8') else: testjson = ('[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' '{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]') testdata = { u'sub.A': [1, 3], u'sub.B': [2, 4], b"\xc3\x9cnic\xc3\xb8de".decode('utf8'): [0, 1] } expected = DataFrame(testdata) result = json_normalize(json.loads(testjson)) tm.assert_frame_equal(result, expected)
Example #14
Source File: test_normalize.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_non_ascii_key(self): if compat.PY3: testjson = ( b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' + b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]' ).decode('utf8') else: testjson = ('[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' '{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]') testdata = { u'sub.A': [1, 3], u'sub.B': [2, 4], b"\xc3\x9cnic\xc3\xb8de".decode('utf8'): [0, 1] } expected = DataFrame(testdata) result = json_normalize(json.loads(testjson)) tm.assert_frame_equal(result, expected)
Example #15
Source File: test_json_norm.py From Computable with MIT License | 6 votes |
def test_simple_normalize(self): result = json_normalize(self.state_data[0], 'counties') expected = DataFrame(self.state_data[0]['counties']) tm.assert_frame_equal(result, expected) result = json_normalize(self.state_data, 'counties') expected = [] for rec in self.state_data: expected.extend(rec['counties']) expected = DataFrame(expected) tm.assert_frame_equal(result, expected) result = json_normalize(self.state_data, 'counties', meta='state') expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2]) tm.assert_frame_equal(result, expected)
Example #16
Source File: test_normalize.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_more_deeply_nested(self, deep_nested): result = json_normalize(deep_nested, ['states', 'cities'], meta=['country', ['states', 'name']]) # meta_prefix={'states': 'state_'}) ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3, 'states.name': ['California', 'California', 'Ohio', 'Ohio', 'Bayern', 'Nordrhein-Westfalen', 'Nordrhein-Westfalen'], 'name': ['San Francisco', 'Los Angeles', 'Columbus', 'Cleveland', 'Munich', 'Duesseldorf', 'Koeln'], 'pop': [12345, 12346, 1234, 1236, 12347, 1238, 1239]} expected = DataFrame(ex_data, columns=result.columns) tm.assert_frame_equal(result, expected)
Example #17
Source File: test_normalize.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_simple_normalize_with_separator(self, deep_nested): # GH 14883 result = json_normalize({'A': {'A': 1, 'B': 2}}) expected = DataFrame([[1, 2]], columns=['A.A', 'A.B']) tm.assert_frame_equal(result.reindex_like(expected), expected) result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_') expected = DataFrame([[1, 2]], columns=['A_A', 'A_B']) tm.assert_frame_equal(result.reindex_like(expected), expected) result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3') expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B']) tm.assert_frame_equal(result.reindex_like(expected), expected) result = json_normalize(deep_nested, ['states', 'cities'], meta=['country', ['states', 'name']], sep='_') expected = Index(['name', 'pop', 'country', 'states_name']).sort_values() assert result.columns.sort_values().equals(expected)
Example #18
Source File: visualize.py From vecto with Mozilla Public License 2.0 | 6 votes |
def df_from_file(path): data = load_json(path) # meta = [["experiment_setup", "task"], # ["experiment_setup", "subcategory"], # ["experiment_setup", "method"], # ["experiment_setup", "embeddings"]] dframe = json_normalize(data) if "details" in dframe: dframe.drop("details", axis="columns", inplace=True) default_measurement = "accuracy" try: default_measurement = dframe["experiment_setup.default_measurement"].unique()[0] except KeyError: logger.warning(f"default_measurement not specified in {path}") dframe["result"] = dframe["result." + default_measurement] # df["reciprocal_rank"] = 1 / (df["rank"] + 1) return dframe
Example #19
Source File: test_normalize.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_simple_normalize(self, state_data): result = json_normalize(state_data[0], 'counties') expected = DataFrame(state_data[0]['counties']) tm.assert_frame_equal(result, expected) result = json_normalize(state_data, 'counties') expected = [] for rec in state_data: expected.extend(rec['counties']) expected = DataFrame(expected) tm.assert_frame_equal(result, expected) result = json_normalize(state_data, 'counties', meta='state') expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2]) tm.assert_frame_equal(result, expected)
Example #20
Source File: test_normalize.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_simple_normalize(self, state_data): result = json_normalize(state_data[0], 'counties') expected = DataFrame(state_data[0]['counties']) tm.assert_frame_equal(result, expected) result = json_normalize(state_data, 'counties') expected = [] for rec in state_data: expected.extend(rec['counties']) expected = DataFrame(expected) tm.assert_frame_equal(result, expected) result = json_normalize(state_data, 'counties', meta='state') expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2]) tm.assert_frame_equal(result, expected)
Example #21
Source File: test_normalize.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_simple_normalize_with_separator(self, deep_nested): # GH 14883 result = json_normalize({'A': {'A': 1, 'B': 2}}) expected = DataFrame([[1, 2]], columns=['A.A', 'A.B']) tm.assert_frame_equal(result.reindex_like(expected), expected) result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_') expected = DataFrame([[1, 2]], columns=['A_A', 'A_B']) tm.assert_frame_equal(result.reindex_like(expected), expected) result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3') expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B']) tm.assert_frame_equal(result.reindex_like(expected), expected) result = json_normalize(deep_nested, ['states', 'cities'], meta=['country', ['states', 'name']], sep='_') expected = Index(['name', 'pop', 'country', 'states_name']).sort_values() assert result.columns.sort_values().equals(expected)
Example #22
Source File: test_normalize.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_more_deeply_nested(self, deep_nested): result = json_normalize(deep_nested, ['states', 'cities'], meta=['country', ['states', 'name']]) # meta_prefix={'states': 'state_'}) ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3, 'states.name': ['California', 'California', 'Ohio', 'Ohio', 'Bayern', 'Nordrhein-Westfalen', 'Nordrhein-Westfalen'], 'name': ['San Francisco', 'Los Angeles', 'Columbus', 'Cleveland', 'Munich', 'Duesseldorf', 'Koeln'], 'pop': [12345, 12346, 1234, 1236, 12347, 1238, 1239]} expected = DataFrame(ex_data, columns=result.columns) tm.assert_frame_equal(result, expected)
Example #23
Source File: test_normalize.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_meta_name_conflict(self): data = [{'foo': 'hello', 'bar': 'there', 'data': [{'foo': 'something', 'bar': 'else'}, {'foo': 'something2', 'bar': 'else2'}]}] msg = (r"Conflicting metadata name (foo|bar)," " need distinguishing prefix") with pytest.raises(ValueError, match=msg): json_normalize(data, 'data', meta=['foo', 'bar']) result = json_normalize(data, 'data', meta=['foo', 'bar'], meta_prefix='meta') for val in ['metafoo', 'metabar', 'foo', 'bar']: assert val in result
Example #24
Source File: frmt.py From bitQuant with MIT License | 6 votes |
def format_df(response, job): if job['type'] == 'trades': if job['exchange'] == 'btce': for col in response: response = response[col] df = json_normalize(response) if job['exchange'] == 'coinbase': df['time'] = to_datetime(df['time'], utc=0) df['timestamp'] = df['time'].astype(np.int64) // 10**9 else: df = json_normalize(response) df = standard_columns(df) if 'exchange' not in df: df['exchange'] = job['exchange'] if 'symbol' not in df: df['symbol'] = job['symbol'] return df #|Standardize column names and drop columns not in dictionary below
Example #25
Source File: test_normalize.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_record_prefix(self, state_data): result = json_normalize(state_data[0], 'counties') expected = DataFrame(state_data[0]['counties']) tm.assert_frame_equal(result, expected) result = json_normalize(state_data, 'counties', meta='state', record_prefix='county_') expected = [] for rec in state_data: expected.extend(rec['counties']) expected = DataFrame(expected) expected = expected.rename(columns=lambda x: 'county_' + x) expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2]) tm.assert_frame_equal(result, expected)
Example #26
Source File: test_normalize.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_missing_field(self, author_missing_data): # GH20030: result = json_normalize(author_missing_data) ex_data = [ {'info': np.nan, 'author_name.first': np.nan, 'author_name.last_name': np.nan, 'info.created_at': np.nan, 'info.last_updated': np.nan}, {'info': None, 'author_name.first': 'Jane', 'author_name.last_name': 'Doe', 'info.created_at': '11/08/1993', 'info.last_updated': '26/05/2012'} ] expected = DataFrame(ex_data) tm.assert_frame_equal(result, expected)
Example #27
Source File: query.py From lixinger-openapi with Apache License 2.0 | 6 votes |
def query_dataframe(url_suffix, query_params): ''' API接口,返回字典结构 key value type code 返回值 int data 返回结果 dataframe msg 返回消息 string ''' return_value = {'code': -1, 'data': None, 'msg': ''} rlt = query_json(url_suffix, query_params) if rlt is None: return_value['msg'] = 'query failed.' else: if 'code' in rlt.keys(): return_value['code'] = rlt['code'] if 'msg' in rlt.keys(): return_value['msg'] = rlt['msg'] if 'data' in rlt.keys(): return_value['data'] = json_normalize(rlt['data']) return return_value
Example #28
Source File: run_s2vt.py From caption-guided-saliency with BSD 2-Clause "Simplified" License | 6 votes |
def get_flickr30k_data(cfg): #using the provided splits train_split = set(map(lambda x: x.split(".")[0], open(cfg.train_file).read().splitlines())) val_split = set(map(lambda x: x.split(".")[0], open(cfg.val_file).read().splitlines())) test_split = set(map(lambda x: x.split(".")[0], open(cfg.test_file).read().splitlines())) data = [{"video_id": item.split(".")[0], "sentence_id": item.split("#")[1].split("\t")[0], "caption":item.split("\t")[1]} for item in open(cfg.annotations_path).read().splitlines()] sentences = json_normalize(data) sentences['video_path'] = sentences['video_id'].map(lambda x: os.path.join(cfg.path_to_descriptors, x + cfg.descriptor_suffix + ".npy")) train_imgs = sentences.loc[sentences["video_id"].isin(train_split)] train_imgs.reset_index() val_imgs = sentences.loc[sentences["video_id"].isin(val_split)] val_imgs.reset_index() test_imgs = sentences.loc[sentences["video_id"].isin(test_split)] test_imgs.reset_index() return train_imgs, val_imgs, test_imgs
Example #29
Source File: test_normalize.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_record_prefix(self, state_data): result = json_normalize(state_data[0], 'counties') expected = DataFrame(state_data[0]['counties']) tm.assert_frame_equal(result, expected) result = json_normalize(state_data, 'counties', meta='state', record_prefix='county_') expected = [] for rec in state_data: expected.extend(rec['counties']) expected = DataFrame(expected) expected = expected.rename(columns=lambda x: 'county_' + x) expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2]) tm.assert_frame_equal(result, expected)
Example #30
Source File: record_packets_to_csv.py From network-pipeline with Apache License 2.0 | 5 votes |
def process_raw_frame(self, id=None, msg=None): """process_raw_frame Convert a complex nested json dictionary to a flattened dictionary and capture all unique keys for table construction :param id: key for this msg :param msg: raw frame for packet """ # normalize into a dataframe df = json_normalize(msg) # convert to a flattened dictionary dt = json.loads(df.to_json()) flat_msg = {} for k in dt: new_key = "raw_{}".format(k) flat_msg[new_key] = dt[k]["0"] if new_key not in self.raw_keys: self.raw_keys[new_key] = k # end of capturing all unique keys dt["raw_id"] = id self.all_raw.append(dt) log.debug("RAW data updated:") log.debug(self.raw_keys) log.debug(self.all_raw) log.debug("") return flat_msg # end of process_raw_frame