Python pandas.io.json.json_normalize() Examples

The following are code examples for showing how to use pandas.io.json.json_normalize(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: pymapd-examples   Author: omnisci   File: OKR_techsup_ga.py    Apache License 2.0 6 votes vote down vote up
def format_data(response):
    reports = response['reports'][0]
    columnHeader = reports['columnHeader']['dimensions']
    metricHeader = reports['columnHeader']['metricHeader']['metricHeaderEntries']

    columns = columnHeader
    for metric in metricHeader:
        columns.append(metric['name'])
    data = json_normalize(reports['data']['rows'])
    data_dimensions = pd.DataFrame(data['dimensions'].tolist())
    data_metrics = pd.DataFrame(data['metrics'].tolist())
    data_metrics = data_metrics.applymap(lambda x: x['values'])
    data_metrics = pd.DataFrame(data_metrics[0].tolist())
    result = pd.concat([data_dimensions, data_metrics], axis=1, ignore_index=True)
    result.columns = ["blog_title", "blog_url", "referral_path", "c1_timestamp", "geo_city_code", "unique_pageviews", "time_on_page"] # set the column names
    return (result) 
Example 2
Project: fugle-realtime-py   Author: fortuna-intelligence   File: intraday.py    MIT License 6 votes vote down vote up
def meta(
    apiToken="demo",
    apiVersion="v0",
    host="api.fugle.tw",
    output="dataframe",
    symbolId="2884",
):
    outputs = ["dataframe", "raw"]
    if output not in outputs:
        raise ValueError('output must be one of ["dataframe", "raw"]')
    url = "https://{}/realtime/{}/intraday/meta".format(host, apiVersion)
    params = dict(apiToken=apiToken, symbolId=symbolId)
    response = get(url=url, params=params)
    json = response.json()
    if response.status_code != 200:
        if output == "dataframe":
            return json_normalize(json)
        elif output == "raw":
            return json
    meta = json["data"]["meta"]
    if output == "dataframe":
        return json_normalize(meta)
    elif output == "raw":
        return meta 
Example 3
Project: fugle-realtime-py   Author: fortuna-intelligence   File: intraday.py    MIT License 6 votes vote down vote up
def quote(
    apiToken="demo",
    apiVersion="v0",
    host="api.fugle.tw",
    output="dataframe",
    symbolId="2884",
):
    outputs = ["dataframe", "raw"]
    if output not in outputs:
        raise ValueError('output must be one of ["dataframe", "raw"]')
    url = "https://{}/realtime/{}/intraday/quote".format(host, apiVersion)
    params = dict(apiToken=apiToken, symbolId=symbolId)
    response = get(url=url, params=params)
    json = response.json()
    if response.status_code != 200:
        if output == "dataframe":
            return json_normalize(json)
        elif output == "raw":
            return json
    quote = json["data"]["quote"]
    if output == "dataframe":
        return json_normalize(quote)
    elif output == "raw":
        return quote 
Example 4
Project: airqdata   Author: dr-1   File: utils.py    GNU General Public License v3.0 6 votes vote down vote up
def read_json(file, *_args, **_kwargs):
    """Read a semi-structured JSON file into a flattened dataframe.

    Args:
        file: file-like object
        _args: positional arguments receiver; not used
        _kwargs: keyword arguments receiver; not used

    Returns:
        Dataframe with single column level; original JSON hierarchy is
            expressed as dot notation in column names
    """
    if sys.version_info >= (3, 6):
        _json = json.load(file)
    else:  # In Python < 3.6, json.load does not accept bytes stream
        file_content_str = file.read().decode()
        _json = json.loads(file_content_str)
    flattened = json_normalize(_json)
    return flattened 
Example 5
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_normalize.py    MIT License 6 votes vote down vote up
def test_simple_normalize(self, state_data):
        result = json_normalize(state_data[0], "counties")
        expected = DataFrame(state_data[0]["counties"])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, "counties")

        expected = []
        for rec in state_data:
            expected.extend(rec["counties"])
        expected = DataFrame(expected)

        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, "counties", meta="state")
        expected["state"] = np.array(["Florida", "Ohio"]).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
Example 6
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_normalize.py    MIT License 6 votes vote down vote up
def test_simple_normalize_with_separator(self, deep_nested):
        # GH 14883
        result = json_normalize({"A": {"A": 1, "B": 2}})
        expected = DataFrame([[1, 2]], columns=["A.A", "A.B"])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({"A": {"A": 1, "B": 2}}, sep="_")
        expected = DataFrame([[1, 2]], columns=["A_A", "A_B"])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({"A": {"A": 1, "B": 2}}, sep="\u03c3")
        expected = DataFrame([[1, 2]], columns=["A\u03c3A", "A\u03c3B"])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize(
            deep_nested,
            ["states", "cities"],
            meta=["country", ["states", "name"]],
            sep="_",
        )
        expected = Index(["name", "pop", "country", "states_name"]).sort_values()
        assert result.columns.sort_values().equals(expected) 
Example 7
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_normalize.py    MIT License 6 votes vote down vote up
def test_nested_object_record_path(self):
        # GH 22706
        data = {
            "state": "Florida",
            "info": {
                "governor": "Rick Scott",
                "counties": [
                    {"name": "Dade", "population": 12345},
                    {"name": "Broward", "population": 40000},
                    {"name": "Palm Beach", "population": 60000},
                ],
            },
        }
        result = json_normalize(data, record_path=["info", "counties"])
        expected = DataFrame(
            [["Dade", 12345], ["Broward", 40000], ["Palm Beach", 60000]],
            columns=["name", "population"],
        )
        tm.assert_frame_equal(result, expected) 
Example 8
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_normalize.py    MIT License 6 votes vote down vote up
def test_meta_name_conflict(self):
        data = [
            {
                "foo": "hello",
                "bar": "there",
                "data": [
                    {"foo": "something", "bar": "else"},
                    {"foo": "something2", "bar": "else2"},
                ],
            }
        ]

        msg = r"Conflicting metadata name (foo|bar), need distinguishing prefix"
        with pytest.raises(ValueError, match=msg):
            json_normalize(data, "data", meta=["foo", "bar"])

        result = json_normalize(data, "data", meta=["foo", "bar"], meta_prefix="meta")

        for val in ["metafoo", "metabar", "foo", "bar"]:
            assert val in result 
Example 9
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_normalize.py    MIT License 6 votes vote down vote up
def test_meta_parameter_not_modified(self):
        # GH 18610
        data = [
            {
                "foo": "hello",
                "bar": "there",
                "data": [
                    {"foo": "something", "bar": "else"},
                    {"foo": "something2", "bar": "else2"},
                ],
            }
        ]

        COLUMNS = ["foo", "bar"]
        result = json_normalize(data, "data", meta=COLUMNS, meta_prefix="meta")

        assert COLUMNS == ["foo", "bar"]
        for val in ["metafoo", "metabar", "foo", "bar"]:
            assert val in result 
Example 10
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_normalize.py    MIT License 6 votes vote down vote up
def test_missing_field(self, author_missing_data):
        # GH20030:
        result = json_normalize(author_missing_data)
        ex_data = [
            {
                "info": np.nan,
                "info.created_at": np.nan,
                "info.last_updated": np.nan,
                "author_name.first": np.nan,
                "author_name.last_name": np.nan,
            },
            {
                "info": None,
                "info.created_at": "11/08/1993",
                "info.last_updated": "26/05/2012",
                "author_name.first": "Jane",
                "author_name.last_name": "Doe",
            },
        ]
        expected = DataFrame(ex_data)
        tm.assert_frame_equal(result, expected, check_like=not PY36) 
Example 11
Project: recruit   Author: Frank-qlu   File: test_normalize.py    Apache License 2.0 6 votes vote down vote up
def test_simple_normalize(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)

        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties', meta='state')
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
Example 12
Project: recruit   Author: Frank-qlu   File: test_normalize.py    Apache License 2.0 6 votes vote down vote up
def test_simple_normalize_with_separator(self, deep_nested):
        # GH 14883
        result = json_normalize({'A': {'A': 1, 'B': 2}})
        expected = DataFrame([[1, 2]], columns=['A.A', 'A.B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_')
        expected = DataFrame([[1, 2]], columns=['A_A', 'A_B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3')
        expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize(deep_nested, ['states', 'cities'],
                                meta=['country', ['states', 'name']],
                                sep='_')
        expected = Index(['name', 'pop',
                          'country', 'states_name']).sort_values()
        assert result.columns.sort_values().equals(expected) 
Example 13
Project: recruit   Author: Frank-qlu   File: test_normalize.py    Apache License 2.0 6 votes vote down vote up
def test_more_deeply_nested(self, deep_nested):

        result = json_normalize(deep_nested, ['states', 'cities'],
                                meta=['country', ['states', 'name']])
        # meta_prefix={'states': 'state_'})

        ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3,
                   'states.name': ['California', 'California', 'Ohio', 'Ohio',
                                   'Bayern', 'Nordrhein-Westfalen',
                                   'Nordrhein-Westfalen'],
                   'name': ['San Francisco', 'Los Angeles', 'Columbus',
                            'Cleveland', 'Munich', 'Duesseldorf', 'Koeln'],
                   'pop': [12345, 12346, 1234, 1236, 12347, 1238, 1239]}

        expected = DataFrame(ex_data, columns=result.columns)
        tm.assert_frame_equal(result, expected) 
Example 14
Project: recruit   Author: Frank-qlu   File: test_normalize.py    Apache License 2.0 6 votes vote down vote up
def test_meta_name_conflict(self):
        data = [{'foo': 'hello',
                 'bar': 'there',
                 'data': [{'foo': 'something', 'bar': 'else'},
                          {'foo': 'something2', 'bar': 'else2'}]}]

        msg = (r"Conflicting metadata name (foo|bar),"
               " need distinguishing prefix")
        with pytest.raises(ValueError, match=msg):
            json_normalize(data, 'data', meta=['foo', 'bar'])

        result = json_normalize(data, 'data', meta=['foo', 'bar'],
                                meta_prefix='meta')

        for val in ['metafoo', 'metabar', 'foo', 'bar']:
            assert val in result 
Example 15
Project: recruit   Author: Frank-qlu   File: test_normalize.py    Apache License 2.0 6 votes vote down vote up
def test_record_prefix(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties',
                                meta='state',
                                record_prefix='county_')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)
        expected = expected.rename(columns=lambda x: 'county_' + x)
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
Example 16
Project: recruit   Author: Frank-qlu   File: test_normalize.py    Apache License 2.0 6 votes vote down vote up
def test_missing_field(self, author_missing_data):
        # GH20030:
        result = json_normalize(author_missing_data)
        ex_data = [
            {'info': np.nan,
             'author_name.first': np.nan,
             'author_name.last_name': np.nan,
             'info.created_at': np.nan,
             'info.last_updated': np.nan},
            {'info': None,
             'author_name.first': 'Jane',
             'author_name.last_name': 'Doe',
             'info.created_at': '11/08/1993',
             'info.last_updated': '26/05/2012'}
        ]
        expected = DataFrame(ex_data)
        tm.assert_frame_equal(result, expected) 
Example 17
Project: FUTU_Stop_Loss   Author: BigtoC   File: test_normalize.py    MIT License 6 votes vote down vote up
def test_simple_normalize(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)

        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties', meta='state')
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
Example 18
Project: FUTU_Stop_Loss   Author: BigtoC   File: test_normalize.py    MIT License 6 votes vote down vote up
def test_simple_normalize_with_separator(self, deep_nested):
        # GH 14883
        result = json_normalize({'A': {'A': 1, 'B': 2}})
        expected = DataFrame([[1, 2]], columns=['A.A', 'A.B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_')
        expected = DataFrame([[1, 2]], columns=['A_A', 'A_B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3')
        expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize(deep_nested, ['states', 'cities'],
                                meta=['country', ['states', 'name']],
                                sep='_')
        expected = Index(['name', 'pop',
                          'country', 'states_name']).sort_values()
        assert result.columns.sort_values().equals(expected) 
Example 19
Project: FUTU_Stop_Loss   Author: BigtoC   File: test_normalize.py    MIT License 6 votes vote down vote up
def test_record_prefix(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties',
                                meta='state',
                                record_prefix='county_')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)
        expected = expected.rename(columns=lambda x: 'county_' + x)
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
Example 20
Project: FUTU_Stop_Loss   Author: BigtoC   File: test_normalize.py    MIT License 6 votes vote down vote up
def test_non_ascii_key(self):
        if compat.PY3:
            testjson = (
                b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' +
                b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]'
            ).decode('utf8')
        else:
            testjson = ('[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},'
                        '{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]')

        testdata = {
            u'sub.A': [1, 3],
            u'sub.B': [2, 4],
            b"\xc3\x9cnic\xc3\xb8de".decode('utf8'): [0, 1]
        }
        expected = DataFrame(testdata)

        result = json_normalize(json.loads(testjson))
        tm.assert_frame_equal(result, expected) 
Example 21
Project: vnpy_crypto   Author: birforce   File: test_normalize.py    MIT License 6 votes vote down vote up
def test_simple_normalize(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)

        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties', meta='state')
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
Example 22
Project: vnpy_crypto   Author: birforce   File: test_normalize.py    MIT License 6 votes vote down vote up
def test_simple_normalize_with_separator(self, deep_nested):
        # GH 14883
        result = json_normalize({'A': {'A': 1, 'B': 2}})
        expected = DataFrame([[1, 2]], columns=['A.A', 'A.B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_')
        expected = DataFrame([[1, 2]], columns=['A_A', 'A_B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3')
        expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize(deep_nested, ['states', 'cities'],
                                meta=['country', ['states', 'name']],
                                sep='_')
        expected = Index(['name', 'pop',
                          'country', 'states_name']).sort_values()
        assert result.columns.sort_values().equals(expected) 
Example 23
Project: vnpy_crypto   Author: birforce   File: test_normalize.py    MIT License 6 votes vote down vote up
def test_record_prefix(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties',
                                meta='state',
                                record_prefix='county_')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)
        expected = expected.rename(columns=lambda x: 'county_' + x)
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
Example 24
Project: vnpy_crypto   Author: birforce   File: test_normalize.py    MIT License 6 votes vote down vote up
def test_non_ascii_key(self):
        if compat.PY3:
            testjson = (
                b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' +
                b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]'
            ).decode('utf8')
        else:
            testjson = ('[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},'
                        '{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]')

        testdata = {
            u'sub.A': [1, 3],
            u'sub.B': [2, 4],
            b"\xc3\x9cnic\xc3\xb8de".decode('utf8'): [0, 1]
        }
        expected = DataFrame(testdata)

        result = json_normalize(json.loads(testjson))
        tm.assert_frame_equal(result, expected) 
Example 25
Project: Computable   Author: ktraunmueller   File: test_json_norm.py    MIT License 6 votes vote down vote up
def test_simple_normalize(self):
        result = json_normalize(self.state_data[0], 'counties')
        expected = DataFrame(self.state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(self.state_data, 'counties')

        expected = []
        for rec in self.state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)

        tm.assert_frame_equal(result, expected)

        result = json_normalize(self.state_data, 'counties', meta='state')
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
Example 26
Project: vecto   Author: vecto-ai   File: visualize.py    Mozilla Public License 2.0 6 votes vote down vote up
def df_from_file(path):
    data = load_json(path)
    # meta = [["experiment_setup", "task"],
    #         ["experiment_setup", "subcategory"],
    #         ["experiment_setup", "method"],
    #         ["experiment_setup", "embeddings"]]
    dframe = json_normalize(data)
    if "details" in dframe:
        dframe.drop("details", axis="columns", inplace=True)
    default_measurement = "accuracy"
    try:
        default_measurement = dframe["experiment_setup.default_measurement"].unique()[0]
    except KeyError:
        logger.warning(f"default_measurement not specified in {path}")
    dframe["result"] = dframe["result." + default_measurement]
    # df["reciprocal_rank"] = 1 / (df["rank"] + 1)
    return dframe 
Example 27
Project: marketing-performance-report   Author: nicolasmelo1   File: apicalls.py    MIT License 6 votes vote down vote up
def reportcampaigns(self, dateStart):
        # this gets all the campaigns and campaignids that we made with the account
        getcampaigns = 'https://ads-api.twitter.com/2/accounts/18ce54np2w4/campaigns'
        content = requests.get(getcampaigns, auth=twitterinit()).json()
        campaigns = json_normalize(content, ['data'])

        campaigns.drop(['updated_at', 'total_budget_amount_local_micro', 'start_time', 'standard_delivery', 'servable',
                        'funding_instrument_id', 'frequency_cap', 'entity_status', 'duration_in_days', 'deleted',
                        'daily_budget_amount_local_micro', 'currency', 'created_at', 'account_id'],
                       inplace=True,
                       axis=1)
        campaigns['end_time'] = campaigns['end_time'].apply(lambda x: str(x).split('T')[0] if x is not None else None)
        campaigns['end_time'] = pandas.to_datetime(pandas.Series(campaigns['end_time']), format="%Y-%m-%d")

        campaigns = campaigns[(campaigns['end_time'] >= dateStart) | (campaigns['reasons_not_servable'] != 'EXPIRED')]
        campaigns.drop(['end_time', 'reasons_not_servable'], inplace=True, axis=1)
        return campaigns 
Example 28
Project: predictive-maintenance-using-machine-learning   Author: awslabs   File: test_normalize.py    Apache License 2.0 6 votes vote down vote up
def test_simple_normalize(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)

        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties', meta='state')
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
Example 29
Project: fugle-realtime-py   Author: fortuna-intelligence   File: intraday.py    MIT License 5 votes vote down vote up
def chart(
    apiToken="demo",
    apiVersion="v0",
    host="api.fugle.tw",
    output="dataframe",
    symbolId="2884",
):
    outputs = ["dataframe", "raw"]
    if output not in outputs:
        raise ValueError('output must be one of ["dataframe", "raw"]')
    url = "https://{}/realtime/{}/intraday/chart".format(host, apiVersion)
    params = dict(apiToken=apiToken, symbolId=symbolId)
    response = get(url=url, params=params)
    json = response.json()
    if response.status_code != 200:
        if output == "dataframe":
            return json_normalize(json)
        elif output == "raw":
            return json
    chart = json["data"]["chart"]
    if output == "dataframe":
        chart = [dict(at=at, **rest) for at, rest in chart.items()]
        df = json_normalize(chart)
        if "at" in df.columns:
            df["at"] = to_datetime(df["at"])
            df = df.sort_values("at")
            df = df.reset_index(drop=True)
        return df
    elif output == "raw":
        return chart 
Example 30
Project: fugle-realtime-py   Author: fortuna-intelligence   File: intraday.py    MIT License 5 votes vote down vote up
def trades(
    apiToken="demo",
    apiVersion="v0",
    host="api.fugle.tw",
    output="dataframe",
    symbolId="2884",
):
    outputs = ["dataframe", "raw"]
    if output not in outputs:
        raise ValueError('output must be one of ["dataframe", "raw"]')
    url = "https://{}/realtime/{}/intraday/trades".format(host, apiVersion)
    params = dict(apiToken=apiToken, symbolId=symbolId)
    response = get(url=url, params=params)
    json = response.json()
    if response.status_code != 200:
        if output == "dataframe":
            return json_normalize(json)
        elif output == "raw":
            return json
    trades = json["data"]["trades"]
    if output == "dataframe":
        df = json_normalize(trades)
        if "at" in df.columns:
            df["at"] = to_datetime(df["at"])
            df = df.sort_values("at")
            df = df.reset_index(drop=True)
        return df
    elif output == "raw":
        return trades 
Example 31
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_normalize.py    MIT License 5 votes vote down vote up
def max_level_test_input_data():
    """
    input data to test json_normalize with max_level param
    """
    return [
        {
            "CreatedBy": {"Name": "User001"},
            "Lookup": {
                "TextField": "Some text",
                "UserField": {"Id": "ID001", "Name": "Name001"},
            },
            "Image": {"a": "b"},
        }
    ] 
Example 32
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_simple_records(self):
        recs = [
            {"a": 1, "b": 2, "c": 3},
            {"a": 4, "b": 5, "c": 6},
            {"a": 7, "b": 8, "c": 9},
            {"a": 10, "b": 11, "c": 12},
        ]

        result = json_normalize(recs)
        expected = DataFrame(recs)

        tm.assert_frame_equal(result, expected) 
Example 33
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_empty_array(self):
        result = json_normalize([])
        expected = DataFrame()
        tm.assert_frame_equal(result, expected) 
Example 34
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_more_deeply_nested(self, deep_nested):

        result = json_normalize(
            deep_nested, ["states", "cities"], meta=["country", ["states", "name"]]
        )
        ex_data = {
            "country": ["USA"] * 4 + ["Germany"] * 3,
            "states.name": [
                "California",
                "California",
                "Ohio",
                "Ohio",
                "Bayern",
                "Nordrhein-Westfalen",
                "Nordrhein-Westfalen",
            ],
            "name": [
                "San Francisco",
                "Los Angeles",
                "Columbus",
                "Cleveland",
                "Munich",
                "Duesseldorf",
                "Koeln",
            ],
            "pop": [12345, 12346, 1234, 1236, 12347, 1238, 1239],
        }

        expected = DataFrame(ex_data, columns=result.columns)
        tm.assert_frame_equal(result, expected) 
Example 35
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_shallow_nested(self):
        data = [
            {
                "state": "Florida",
                "shortname": "FL",
                "info": {"governor": "Rick Scott"},
                "counties": [
                    {"name": "Dade", "population": 12345},
                    {"name": "Broward", "population": 40000},
                    {"name": "Palm Beach", "population": 60000},
                ],
            },
            {
                "state": "Ohio",
                "shortname": "OH",
                "info": {"governor": "John Kasich"},
                "counties": [
                    {"name": "Summit", "population": 1234},
                    {"name": "Cuyahoga", "population": 1337},
                ],
            },
        ]

        result = json_normalize(
            data, "counties", ["state", "shortname", ["info", "governor"]]
        )
        ex_data = {
            "name": ["Dade", "Broward", "Palm Beach", "Summit", "Cuyahoga"],
            "state": ["Florida"] * 3 + ["Ohio"] * 2,
            "shortname": ["FL", "FL", "FL", "OH", "OH"],
            "info.governor": ["Rick Scott"] * 3 + ["John Kasich"] * 2,
            "population": [12345, 40000, 60000, 1234, 1337],
        }
        expected = DataFrame(ex_data, columns=result.columns)
        tm.assert_frame_equal(result, expected) 
Example 36
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_non_ascii_key(self):
        testjson = (
            b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},'
            + b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]'
        ).decode("utf8")

        testdata = {
            b"\xc3\x9cnic\xc3\xb8de".decode("utf8"): [0, 1],
            "sub.A": [1, 3],
            "sub.B": [2, 4],
        }
        expected = DataFrame(testdata)

        result = json_normalize(json.loads(testjson))
        tm.assert_frame_equal(result, expected) 
Example 37
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_max_level_with_records_path(self, max_level, expected):
        # GH23843: Enhanced JSON normalize
        test_input = [
            {
                "CreatedBy": {"Name": "User001"},
                "Lookup": [
                    {
                        "TextField": "Some text",
                        "UserField": {"Id": "ID001", "Name": "Name001"},
                    },
                    {
                        "TextField": "Some text",
                        "UserField": {"Id": "ID001", "Name": "Name001"},
                    },
                ],
                "Image": {"a": "b"},
                "tags": [
                    {"foo": "something", "bar": "else"},
                    {"foo": "something2", "bar": "else2"},
                ],
            }
        ]

        result = json_normalize(
            test_input,
            record_path=["Lookup"],
            meta=[["CreatedBy"], ["Image"]],
            max_level=max_level,
        )
        expected_df = DataFrame(data=expected, columns=result.columns.values)
        tm.assert_equal(expected_df, result) 
Example 38
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_json_normalize_errors(self, missing_metadata):
        # GH14583:
        # If meta keys are not always present a new option to set
        # errors='ignore' has been implemented

        msg = "Try running with errors='ignore' as key 'name' is not always present"
        with pytest.raises(KeyError, match=msg):
            json_normalize(
                data=missing_metadata,
                record_path="addresses",
                meta="name",
                errors="raise",
            ) 
Example 39
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_missing_meta(self, missing_metadata):
        # GH25468
        # If metadata is nullable with errors set to ignore, the null values
        # should be numpy.nan values
        result = json_normalize(
            data=missing_metadata, record_path="addresses", meta="name", errors="ignore"
        )
        ex_data = [
            [9562, "Morris St.", "Massillon", "OH", 44646, "Alice"],
            [8449, "Spring St.", "Elizabethton", "TN", 37643, np.nan],
        ]
        columns = ["city", "number", "state", "street", "zip", "name"]
        columns = ["number", "street", "city", "state", "zip", "name"]
        expected = DataFrame(ex_data, columns=columns)
        tm.assert_frame_equal(result, expected, check_like=not PY36) 
Example 40
Project: recruit   Author: Frank-qlu   File: test_normalize.py    Apache License 2.0 5 votes vote down vote up
def test_simple_records(self):
        recs = [{'a': 1, 'b': 2, 'c': 3},
                {'a': 4, 'b': 5, 'c': 6},
                {'a': 7, 'b': 8, 'c': 9},
                {'a': 10, 'b': 11, 'c': 12}]

        result = json_normalize(recs)
        expected = DataFrame(recs)

        tm.assert_frame_equal(result, expected) 
Example 41
Project: recruit   Author: Frank-qlu   File: test_normalize.py    Apache License 2.0 5 votes vote down vote up
def test_empty_array(self):
        result = json_normalize([])
        expected = DataFrame()
        tm.assert_frame_equal(result, expected) 
Example 42
Project: recruit   Author: Frank-qlu   File: test_normalize.py    Apache License 2.0 5 votes vote down vote up
def test_value_array_record_prefix(self):
        # GH 21536
        result = json_normalize({'A': [1, 2]}, 'A', record_prefix='Prefix.')
        expected = DataFrame([[1], [2]], columns=['Prefix.0'])
        tm.assert_frame_equal(result, expected) 
Example 43
Project: recruit   Author: Frank-qlu   File: test_normalize.py    Apache License 2.0 5 votes vote down vote up
def test_shallow_nested(self):
        data = [{'state': 'Florida',
                 'shortname': 'FL',
                 'info': {
                     'governor': 'Rick Scott'
                 },
                 'counties': [{'name': 'Dade', 'population': 12345},
                              {'name': 'Broward', 'population': 40000},
                              {'name': 'Palm Beach', 'population': 60000}]},
                {'state': 'Ohio',
                 'shortname': 'OH',
                 'info': {
                     'governor': 'John Kasich'
                 },
                 'counties': [{'name': 'Summit', 'population': 1234},
                              {'name': 'Cuyahoga', 'population': 1337}]}]

        result = json_normalize(data, 'counties',
                                ['state', 'shortname',
                                 ['info', 'governor']])
        ex_data = {'name': ['Dade', 'Broward', 'Palm Beach', 'Summit',
                            'Cuyahoga'],
                   'state': ['Florida'] * 3 + ['Ohio'] * 2,
                   'shortname': ['FL', 'FL', 'FL', 'OH', 'OH'],
                   'info.governor': ['Rick Scott'] * 3 + ['John Kasich'] * 2,
                   'population': [12345, 40000, 60000, 1234, 1337]}
        expected = DataFrame(ex_data, columns=result.columns)
        tm.assert_frame_equal(result, expected) 
Example 44
Project: recruit   Author: Frank-qlu   File: test_normalize.py    Apache License 2.0 5 votes vote down vote up
def test_meta_parameter_not_modified(self):
        # GH 18610
        data = [{'foo': 'hello',
                 'bar': 'there',
                 'data': [{'foo': 'something', 'bar': 'else'},
                          {'foo': 'something2', 'bar': 'else2'}]}]

        COLUMNS = ['foo', 'bar']
        result = json_normalize(data, 'data', meta=COLUMNS,
                                meta_prefix='meta')

        assert COLUMNS == ['foo', 'bar']
        for val in ['metafoo', 'metabar', 'foo', 'bar']:
            assert val in result 
Example 45
Project: homeassistant-ring_alarm_component   Author: rs1932   File: pyringalarm.py    Apache License 2.0 5 votes vote down vote up
def _build_initial_entity_list(received_data):
    from pandas.io.json import json_normalize
    _hubID = received_data['src']
    ringalarm_devices = json_normalize(received_data['body'])
    ringalarm_devices.loc[:, DEVICE_SOURCE] = _hubID
    ringalarm_devices_list.append(ringalarm_devices) 
Example 46
Project: homeassistant-ring_alarm_component   Author: rs1932   File: pyringalarm.py    Apache License 2.0 5 votes vote down vote up
def _build_update_entity_list(received_data):
    from pandas.io.json import json_normalize
    _hubID = received_data['src']
    updated_devices = json_normalize(received_data['body'])
    return updated_devices 
Example 47
Project: hexpy   Author: sullivancolin   File: models.py    MIT License 5 votes vote down vote up
def to_dataframe(self) -> pd.DataFrame:
        """Convert UploadCollection to pandas Dataframe with one colume for each field"""
        return json_normalize(self.dict()) 
Example 48
Project: hexpy   Author: sullivancolin   File: conftest.py    MIT License 5 votes vote down vote up
def upload_dataframe(upload_items: List[JSONDict]) -> pd.DataFrame:
    """Pandas dataframe of upload content"""
    return json_normalize(upload_items) 
Example 49
Project: FUTU_Stop_Loss   Author: BigtoC   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_simple_records(self):
        recs = [{'a': 1, 'b': 2, 'c': 3},
                {'a': 4, 'b': 5, 'c': 6},
                {'a': 7, 'b': 8, 'c': 9},
                {'a': 10, 'b': 11, 'c': 12}]

        result = json_normalize(recs)
        expected = DataFrame(recs)

        tm.assert_frame_equal(result, expected) 
Example 50
Project: FUTU_Stop_Loss   Author: BigtoC   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_empty_array(self):
        result = json_normalize([])
        expected = DataFrame()
        tm.assert_frame_equal(result, expected) 
Example 51
Project: FUTU_Stop_Loss   Author: BigtoC   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_value_array_record_prefix(self):
        # GH 21536
        result = json_normalize({'A': [1, 2]}, 'A', record_prefix='Prefix.')
        expected = DataFrame([[1], [2]], columns=['Prefix.0'])
        tm.assert_frame_equal(result, expected) 
Example 52
Project: FUTU_Stop_Loss   Author: BigtoC   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_shallow_nested(self):
        data = [{'state': 'Florida',
                 'shortname': 'FL',
                 'info': {
                     'governor': 'Rick Scott'
                 },
                 'counties': [{'name': 'Dade', 'population': 12345},
                              {'name': 'Broward', 'population': 40000},
                              {'name': 'Palm Beach', 'population': 60000}]},
                {'state': 'Ohio',
                 'shortname': 'OH',
                 'info': {
                     'governor': 'John Kasich'
                 },
                 'counties': [{'name': 'Summit', 'population': 1234},
                              {'name': 'Cuyahoga', 'population': 1337}]}]

        result = json_normalize(data, 'counties',
                                ['state', 'shortname',
                                 ['info', 'governor']])
        ex_data = {'name': ['Dade', 'Broward', 'Palm Beach', 'Summit',
                            'Cuyahoga'],
                   'state': ['Florida'] * 3 + ['Ohio'] * 2,
                   'shortname': ['FL', 'FL', 'FL', 'OH', 'OH'],
                   'info.governor': ['Rick Scott'] * 3 + ['John Kasich'] * 2,
                   'population': [12345, 40000, 60000, 1234, 1337]}
        expected = DataFrame(ex_data, columns=result.columns)
        tm.assert_frame_equal(result, expected) 
Example 53
Project: FUTU_Stop_Loss   Author: BigtoC   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_meta_name_conflict(self):
        data = [{'foo': 'hello',
                 'bar': 'there',
                 'data': [{'foo': 'something', 'bar': 'else'},
                          {'foo': 'something2', 'bar': 'else2'}]}]

        with pytest.raises(ValueError):
            json_normalize(data, 'data', meta=['foo', 'bar'])

        result = json_normalize(data, 'data', meta=['foo', 'bar'],
                                meta_prefix='meta')

        for val in ['metafoo', 'metabar', 'foo', 'bar']:
            assert val in result 
Example 54
Project: FUTU_Stop_Loss   Author: BigtoC   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_meta_parameter_not_modified(self):
        # GH 18610
        data = [{'foo': 'hello',
                 'bar': 'there',
                 'data': [{'foo': 'something', 'bar': 'else'},
                          {'foo': 'something2', 'bar': 'else2'}]}]

        COLUMNS = ['foo', 'bar']
        result = json_normalize(data, 'data', meta=COLUMNS,
                                meta_prefix='meta')

        assert COLUMNS == ['foo', 'bar']
        for val in ['metafoo', 'metabar', 'foo', 'bar']:
            assert val in result 
Example 55
Project: vnpy_crypto   Author: birforce   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_simple_records(self):
        recs = [{'a': 1, 'b': 2, 'c': 3},
                {'a': 4, 'b': 5, 'c': 6},
                {'a': 7, 'b': 8, 'c': 9},
                {'a': 10, 'b': 11, 'c': 12}]

        result = json_normalize(recs)
        expected = DataFrame(recs)

        tm.assert_frame_equal(result, expected) 
Example 56
Project: vnpy_crypto   Author: birforce   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_empty_array(self):
        result = json_normalize([])
        expected = DataFrame()
        tm.assert_frame_equal(result, expected) 
Example 57
Project: vnpy_crypto   Author: birforce   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_value_array_record_prefix(self):
        # GH 21536
        result = json_normalize({'A': [1, 2]}, 'A', record_prefix='Prefix.')
        expected = DataFrame([[1], [2]], columns=['Prefix.0'])
        tm.assert_frame_equal(result, expected) 
Example 58
Project: vnpy_crypto   Author: birforce   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_shallow_nested(self):
        data = [{'state': 'Florida',
                 'shortname': 'FL',
                 'info': {
                     'governor': 'Rick Scott'
                 },
                 'counties': [{'name': 'Dade', 'population': 12345},
                              {'name': 'Broward', 'population': 40000},
                              {'name': 'Palm Beach', 'population': 60000}]},
                {'state': 'Ohio',
                 'shortname': 'OH',
                 'info': {
                     'governor': 'John Kasich'
                 },
                 'counties': [{'name': 'Summit', 'population': 1234},
                              {'name': 'Cuyahoga', 'population': 1337}]}]

        result = json_normalize(data, 'counties',
                                ['state', 'shortname',
                                 ['info', 'governor']])
        ex_data = {'name': ['Dade', 'Broward', 'Palm Beach', 'Summit',
                            'Cuyahoga'],
                   'state': ['Florida'] * 3 + ['Ohio'] * 2,
                   'shortname': ['FL', 'FL', 'FL', 'OH', 'OH'],
                   'info.governor': ['Rick Scott'] * 3 + ['John Kasich'] * 2,
                   'population': [12345, 40000, 60000, 1234, 1337]}
        expected = DataFrame(ex_data, columns=result.columns)
        tm.assert_frame_equal(result, expected) 
Example 59
Project: vnpy_crypto   Author: birforce   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_meta_name_conflict(self):
        data = [{'foo': 'hello',
                 'bar': 'there',
                 'data': [{'foo': 'something', 'bar': 'else'},
                          {'foo': 'something2', 'bar': 'else2'}]}]

        with pytest.raises(ValueError):
            json_normalize(data, 'data', meta=['foo', 'bar'])

        result = json_normalize(data, 'data', meta=['foo', 'bar'],
                                meta_prefix='meta')

        for val in ['metafoo', 'metabar', 'foo', 'bar']:
            assert val in result 
Example 60
Project: vnpy_crypto   Author: birforce   File: test_normalize.py    MIT License 5 votes vote down vote up
def test_meta_parameter_not_modified(self):
        # GH 18610
        data = [{'foo': 'hello',
                 'bar': 'there',
                 'data': [{'foo': 'something', 'bar': 'else'},
                          {'foo': 'something2', 'bar': 'else2'}]}]

        COLUMNS = ['foo', 'bar']
        result = json_normalize(data, 'data', meta=COLUMNS,
                                meta_prefix='meta')

        assert COLUMNS == ['foo', 'bar']
        for val in ['metafoo', 'metabar', 'foo', 'bar']:
            assert val in result 
Example 61
Project: twtools   Author: fradeve   File: twstats.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, time_span, step, tag):
        self.time_span = time_span
        self.step = step
        self.tag = tag
        self.tw_intervals = self._generate_intervals()
        self.df = json_normalize(self.tw_intervals) 
Example 62
Project: Computable   Author: ktraunmueller   File: test_json_norm.py    MIT License 5 votes vote down vote up
def test_simple_records(self):
        recs = [{'a': 1, 'b': 2, 'c': 3},
                {'a': 4, 'b': 5, 'c': 6},
                {'a': 7, 'b': 8, 'c': 9},
                {'a': 10, 'b': 11, 'c': 12}]

        result = json_normalize(recs)
        expected = DataFrame(recs)

        tm.assert_frame_equal(result, expected) 
Example 63
Project: Computable   Author: ktraunmueller   File: test_json_norm.py    MIT License 5 votes vote down vote up
def test_more_deeply_nested(self):
        data = [{'country': 'USA',
                 'states': [{'name': 'California',
                             'cities': [{'name': 'San Francisco',
                                         'pop': 12345},
                                        {'name': 'Los Angeles',
                                         'pop': 12346}]
                            },
                            {'name': 'Ohio',
                             'cities': [{'name': 'Columbus',
                                         'pop': 1234},
                                        {'name': 'Cleveland',
                                         'pop': 1236}]}
                           ]
                 },
                {'country': 'Germany',
                 'states': [{'name': 'Bayern',
                             'cities': [{'name': 'Munich', 'pop': 12347}]
                            },
                            {'name': 'Nordrhein-Westfalen',
                             'cities': [{'name': 'Duesseldorf', 'pop': 1238},
                                        {'name': 'Koeln', 'pop': 1239}]}
                           ]
                 }
                ]

        result = json_normalize(data, ['states', 'cities'],
                                meta=['country', ['states', 'name']])
                                # meta_prefix={'states': 'state_'})

        ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3,
                   'states.name': ['California', 'California', 'Ohio', 'Ohio',
                                   'Bayern', 'Nordrhein-Westfalen',
                                   'Nordrhein-Westfalen'],
                   'name': ['San Francisco', 'Los Angeles', 'Columbus',
                            'Cleveland', 'Munich', 'Duesseldorf', 'Koeln'],
                   'pop': [12345, 12346, 1234, 1236, 12347, 1238, 1239]}

        expected = DataFrame(ex_data, columns=result.columns)
        tm.assert_frame_equal(result, expected) 
Example 64
Project: Computable   Author: ktraunmueller   File: test_json_norm.py    MIT License 5 votes vote down vote up
def test_shallow_nested(self):
        data = [{'state': 'Florida',
                 'shortname': 'FL',
                 'info': {
                      'governor': 'Rick Scott'
                 },
                 'counties': [{'name': 'Dade', 'population': 12345},
                             {'name': 'Broward', 'population': 40000},
                             {'name': 'Palm Beach', 'population': 60000}]},
                {'state': 'Ohio',
                 'shortname': 'OH',
                 'info': {
                      'governor': 'John Kasich'
                 },
                 'counties': [{'name': 'Summit', 'population': 1234},
                              {'name': 'Cuyahoga', 'population': 1337}]}]

        result = json_normalize(data, 'counties',
                                ['state', 'shortname',
                                 ['info', 'governor']])
        ex_data = {'name': ['Dade', 'Broward', 'Palm Beach', 'Summit',
                            'Cuyahoga'],
                   'state': ['Florida'] * 3 + ['Ohio'] * 2,
                   'shortname': ['FL', 'FL', 'FL', 'OH', 'OH'],
                   'info.governor': ['Rick Scott'] * 3 + ['John Kasich'] * 2,
                   'population': [12345, 40000, 60000, 1234, 1337]}
        expected = DataFrame(ex_data, columns=result.columns)
        tm.assert_frame_equal(result, expected) 
Example 65
Project: Computable   Author: ktraunmueller   File: test_json_norm.py    MIT License 5 votes vote down vote up
def test_meta_name_conflict(self):
        data = [{'foo': 'hello',
                 'bar': 'there',
                 'data': [{'foo': 'something', 'bar': 'else'},
                          {'foo': 'something2', 'bar': 'else2'}]}]

        self.assertRaises(ValueError, json_normalize, data,
                          'data', meta=['foo', 'bar'])

        result = json_normalize(data, 'data', meta=['foo', 'bar'],
                                meta_prefix='meta')

        for val in ['metafoo', 'metabar', 'foo', 'bar']:
            self.assertTrue(val in result) 
Example 66
Project: pyStatsBomb   Author: ElSaico   File: helpers.py    Mozilla Public License 2.0 5 votes vote down vote up
def goalkeeper_info(df):
    def get_goalkeeper(ff):
        dff = json_normalize(ff)
        filtered = dff[~dff.teammate & (dff['position.name'] == 'Goalkeeper')]
        if filtered.empty:
            filtered = filtered.append({}, ignore_index=True)
        return filtered.iloc[0]

    goalkeepers = df['shot.freeze_frame'].dropna().apply(get_goalkeeper)
    return df.assign(**{
        'player.id.GK': goalkeepers['player.id'],
        'player.name.GK': goalkeepers['player.name'],
        'location.x.GK': goalkeepers.location.map(lambda gk: gk[0], na_action='ignore'),
        'location.y.GK': goalkeepers.location.map(lambda gk: gk[1], na_action='ignore'),
    }) 
Example 67
Project: pyStatsBomb   Author: ElSaico   File: free.py    Mozilla Public License 2.0 5 votes vote down vote up
def get_matches(competition_ids):
    matches = pd.DataFrame()
    for competition_id in competition_ids:
        response = requests.get(MATCHES_URL.format(competition_id))
        matches = matches.append(json_normalize(response.json()), ignore_index=True)
    return matches 
Example 68
Project: pyStatsBomb   Author: ElSaico   File: free.py    Mozilla Public License 2.0 5 votes vote down vote up
def get_match_lineups(match):
    response = requests.get(LINEUPS_URL.format(match.match_id))
    lineups = json_normalize(response.json())
    return lineups.assign(
        match_id=match['match_id'],
        competition_id=match['competition.competition_id'],
        season_id=match['season.season_id']
    ) 
Example 69
Project: pyStatsBomb   Author: ElSaico   File: free.py    Mozilla Public License 2.0 5 votes vote down vote up
def get_match_events(match):
    response = requests.get(EVENTS_URL.format(match.match_id))
    events = json_normalize(response.json())
    return events.assign(
        match_id=match['match_id'],
        competition_id=match['competition.competition_id'],
        season_id=match['season.season_id']
    ) 
Example 70
Project: predictive-maintenance-using-machine-learning   Author: awslabs   File: test_normalize.py    Apache License 2.0 5 votes vote down vote up
def test_simple_records(self):
        recs = [{'a': 1, 'b': 2, 'c': 3},
                {'a': 4, 'b': 5, 'c': 6},
                {'a': 7, 'b': 8, 'c': 9},
                {'a': 10, 'b': 11, 'c': 12}]

        result = json_normalize(recs)
        expected = DataFrame(recs)

        tm.assert_frame_equal(result, expected) 
Example 71
Project: predictive-maintenance-using-machine-learning   Author: awslabs   File: test_normalize.py    Apache License 2.0 5 votes vote down vote up
def test_empty_array(self):
        result = json_normalize([])
        expected = DataFrame()
        tm.assert_frame_equal(result, expected) 
Example 72
Project: airqdata   Author: dr-1   File: luftdaten.py    GNU General Public License v3.0 4 votes vote down vote up
def get_metadata(self, **retrieval_kwargs):
        """Get sensor metadata and current measurements from cache or
        luftdaten.info API.

        Args:
            retrieval_kwargs: keyword arguments to pass to retrieve
                function

        Warns:
            UserWarning if sensor does not appear to be online
        """

        # Get and cache metadata and measurements of past five minutes
        filename = os.path.basename(self.metadata_url.rstrip("/")) + ".json"
        filepath = os.path.join(cache_dir, filename)
        parsed = retrieve(cache_file=filepath,
                          url=self.metadata_url,
                          label=("sensor {} metadata from luftdaten.info"
                                 .format(self.sensor_id)),
                          call_rate_limiter=call_rate_limiter,
                          **retrieval_kwargs)

        try:
            metadata = (parsed
                        .drop(columns=["sensordatavalues", "timestamp"])
                        .iloc[0])
        except (ValueError, AttributeError):
            warnings.warn("Sensor metadata could not be retrieved")
        else:
            metadata.name = "metadata"
            self.metadata = metadata

            # Extract metadata into corresponding properties
            self.sensor_type = metadata["sensor.sensor_type.name"]
            self.lat = float(metadata["location.latitude"])
            self.lon = float(metadata["location.longitude"])
            self.label = "at " + utils.label_coordinates(self.lat, self.lon)

            # Extract most current measurements
            current = parsed["sensordatavalues"].iloc[-1]
            current = (json_normalize(current)
                       .replace({"P1": "pm10", "P2": "pm2.5"})
                       .set_index("value_type")["value"])
            current = (pd.to_numeric(current)
                       .replace([999.9, 1999.9], pd.np.nan))
            self.current_measurements = dict(current)
            self.phenomena = list(current.index)
            self.units = {phenomenon: UNITS[phenomenon]
                          for phenomenon in UNITS
                          if phenomenon in self.phenomena} 
Example 73
Project: airqdata   Author: dr-1   File: luftdaten.py    GNU General Public License v3.0 4 votes vote down vote up
def search_proximity(lat=50.848, lon=4.351, radius=8):
    """Find sensors within given radius from a location.

    Args:
        lat: latitude of the center of search, in decimal degrees
        lon: longitude of the center of search, in decimal degrees
        radius: maximum distance from center, in kilometers

    Default values are the approximate center and radius of Brussels.

    Returns:
        Dataframe of matching sensors, listing sensor types, locations
        and distances in kilometers from the search center, indexed by
        sensor ID

    Raises:
        requests.HTTPError if request failed
    """
    url = (API_ENDPOINTS["proximity search pattern"]
           .format(lat=lat, lon=lon, radius=radius))
    call_rate_limiter()
    response = requests.get(url)
    response.raise_for_status()
    sensors = json_normalize(response.json())
    if len(sensors) == 0:
        sensors = pd.DataFrame(columns=["sensor_type", "latitude", "longitude",
                                        "distance"])
        sensors.index.name = "sensor_id"
        return sensors
    sensors = (sensors[["sensor.id", "sensor.sensor_type.name",
                        "location.latitude", "location.longitude"]]
               .rename(columns={"sensor.id": "sensor_id",
                                "sensor.sensor_type.name": "sensor_type",
                                "location.latitude": "latitude",
                                "location.longitude": "longitude"}))
    for col in "latitude", "longitude":
        sensors[col] = pd.to_numeric(sensors[col], downcast="float")
    sensors.set_index("sensor_id", inplace=True)

    # Drop duplicates - sensors appear once for each measurement in past 5 mins
    sensors = sensors[~sensors.index.duplicated()]

    # Calculate distances from search center and sort by those distances
    sensors["distance"] = sensors.apply(lambda x:
                                        utils.haversine(lat, lon,
                                                        float(x["latitude"]),
                                                        float(x["longitude"])),
                                        axis=1)
    sensors.sort_values("distance", inplace=True)

    return sensors 
Example 74
Project: recruit   Author: Frank-qlu   File: test_normalize.py    Apache License 2.0 4 votes vote down vote up
def test_json_normalize_errors(self):
        # GH14583: If meta keys are not always present
        # a new option to set errors='ignore' has been implemented
        i = {
            "Trades": [{
                "general": {
                    "tradeid": 100,
                    "trade_version": 1,
                    "stocks": [{

                        "symbol": "AAPL",
                        "name": "Apple",
                        "price": "0"
                    }, {
                        "symbol": "GOOG",
                        "name": "Google",
                        "price": "0"
                    }
                    ]
                }
            }, {
                "general": {
                    "tradeid": 100,
                    "stocks": [{
                        "symbol": "AAPL",
                        "name": "Apple",
                        "price": "0"
                    }, {
                        "symbol": "GOOG",
                        "name": "Google",
                        "price": "0"
                    }
                    ]
                }
            }
            ]
        }
        j = json_normalize(data=i['Trades'],
                           record_path=[['general', 'stocks']],
                           meta=[['general', 'tradeid'],
                                 ['general', 'trade_version']],
                           errors='ignore')
        expected = {'general.trade_version': {0: 1.0, 1: 1.0, 2: '', 3: ''},
                    'general.tradeid': {0: 100, 1: 100, 2: 100, 3: 100},
                    'name': {0: 'Apple', 1: 'Google', 2: 'Apple', 3: 'Google'},
                    'price': {0: '0', 1: '0', 2: '0', 3: '0'},
                    'symbol': {0: 'AAPL', 1: 'GOOG', 2: 'AAPL', 3: 'GOOG'}}

        assert j.fillna('').to_dict() == expected

        msg = ("Try running with errors='ignore' as key 'trade_version'"
               " is not always present")
        with pytest.raises(KeyError, match=msg):
            json_normalize(
                data=i['Trades'],
                record_path=[['general', 'stocks']],
                meta=[['general', 'tradeid'],
                      ['general', 'trade_version']],
                errors='raise') 
Example 75
Project: FUTU_Stop_Loss   Author: BigtoC   File: test_normalize.py    MIT License 4 votes vote down vote up
def test_json_normalize_errors(self):
        # GH14583: If meta keys are not always present
        # a new option to set errors='ignore' has been implemented
        i = {
            "Trades": [{
                "general": {
                    "tradeid": 100,
                    "trade_version": 1,
                    "stocks": [{

                        "symbol": "AAPL",
                        "name": "Apple",
                        "price": "0"
                    }, {
                        "symbol": "GOOG",
                        "name": "Google",
                        "price": "0"
                    }
                    ]
                }
            }, {
                "general": {
                    "tradeid": 100,
                    "stocks": [{
                        "symbol": "AAPL",
                        "name": "Apple",
                        "price": "0"
                    }, {
                        "symbol": "GOOG",
                        "name": "Google",
                        "price": "0"
                    }
                    ]
                }
            }
            ]
        }
        j = json_normalize(data=i['Trades'],
                           record_path=[['general', 'stocks']],
                           meta=[['general', 'tradeid'],
                                 ['general', 'trade_version']],
                           errors='ignore')
        expected = {'general.trade_version': {0: 1.0, 1: 1.0, 2: '', 3: ''},
                    'general.tradeid': {0: 100, 1: 100, 2: 100, 3: 100},
                    'name': {0: 'Apple', 1: 'Google', 2: 'Apple', 3: 'Google'},
                    'price': {0: '0', 1: '0', 2: '0', 3: '0'},
                    'symbol': {0: 'AAPL', 1: 'GOOG', 2: 'AAPL', 3: 'GOOG'}}

        assert j.fillna('').to_dict() == expected

        pytest.raises(KeyError,
                      json_normalize, data=i['Trades'],
                      record_path=[['general', 'stocks']],
                      meta=[['general', 'tradeid'],
                            ['general', 'trade_version']],
                      errors='raise'
                      ) 
Example 76
Project: vnpy_crypto   Author: birforce   File: test_normalize.py    MIT License 4 votes vote down vote up
def test_json_normalize_errors(self):
        # GH14583: If meta keys are not always present
        # a new option to set errors='ignore' has been implemented
        i = {
            "Trades": [{
                "general": {
                    "tradeid": 100,
                    "trade_version": 1,
                    "stocks": [{

                        "symbol": "AAPL",
                        "name": "Apple",
                        "price": "0"
                    }, {
                        "symbol": "GOOG",
                        "name": "Google",
                        "price": "0"
                    }
                    ]
                }
            }, {
                "general": {
                    "tradeid": 100,
                    "stocks": [{
                        "symbol": "AAPL",
                        "name": "Apple",
                        "price": "0"
                    }, {
                        "symbol": "GOOG",
                        "name": "Google",
                        "price": "0"
                    }
                    ]
                }
            }
            ]
        }
        j = json_normalize(data=i['Trades'],
                           record_path=[['general', 'stocks']],
                           meta=[['general', 'tradeid'],
                                 ['general', 'trade_version']],
                           errors='ignore')
        expected = {'general.trade_version': {0: 1.0, 1: 1.0, 2: '', 3: ''},
                    'general.tradeid': {0: 100, 1: 100, 2: 100, 3: 100},
                    'name': {0: 'Apple', 1: 'Google', 2: 'Apple', 3: 'Google'},
                    'price': {0: '0', 1: '0', 2: '0', 3: '0'},
                    'symbol': {0: 'AAPL', 1: 'GOOG', 2: 'AAPL', 3: 'GOOG'}}

        assert j.fillna('').to_dict() == expected

        pytest.raises(KeyError,
                      json_normalize, data=i['Trades'],
                      record_path=[['general', 'stocks']],
                      meta=[['general', 'tradeid'],
                            ['general', 'trade_version']],
                      errors='raise'
                      ) 
Example 77
Project: vcoclient   Author: iddocohen   File: vcoclient.py    MIT License 4 votes vote down vote up
def format_by_name(self, j, name=None, search=None, filters=None, output=None, rows=None, stats=None, **args):
        """
        Converting JSON into Panda dataframe for filtering/searching given keys/values from that datastructure. 
        """
        df  = pd.DataFrame.from_dict(json_normalize(j, sep='_'), orient='columns')
        df.rename(index=df.name.to_dict(), inplace=True)

        found = 1 
        if search:
            expand = {}
            for k,v in self.__search_value(j, search):
                i, *_ = k.split("_")
                n = j[int(i)]["name"]
                k = k[len(i)+1:]
                expand.setdefault(n,{})
                expand[n].setdefault(k,{})
                expand[n]["name"] = n 
                expand[n][k] = v

          # TODO: Not sure what is more efficient, ...(found).T or ...from_dict(found, orient='index'). Fact is, from_dict does not preserve order, hence using .T for now.
            found = bool(expand)
            df = pd.DataFrame(expand).T
          
        if name and found:
            df = df[df['name'].str.contains(name)]

        if filters and found:
            df = df[df.columns[df.columns.str.contains(filters)]]

        if stats and found:
            #df = df.describe(include='all')
            df = df.describe()

        if "name" in df:
            df.drop("name", axis=1, inplace=True)

        df = df.T
        df.fillna(value=pd.np.nan, inplace=True)
        df.dropna(axis='columns', how='all', inplace=True)

        if rows:
            df = list(df.index)

        if output == "json":
            df = df.to_json()
        elif output == "csv":
            df = df.to_csv()
        
        return df 
Example 78
Project: marketing-performance-report   Author: nicolasmelo1   File: apicalls.py    MIT License 4 votes vote down vote up
def recursiveextractor(self, datestart, dateend, campaignslist, placement, twitterdataframe=pandas.DataFrame()):

        # this is the coolest part of the program. what happens is that, twitter doesn't give me by default
        # twitter data divided by date. so what i need to do is to do it myself.
        # i'll get then data from twitter from 24h date range. so i'll loop through this func until datestart is equal to dateend

        lastdate = datetime.datetime.strptime(datestart, "%Y-%m-%d") + datetime.timedelta(days=1)
        lastdate = str(lastdate).partition(" ")[0]
        # make the call
        content = requests.get('https://ads-api.twitter.com/2/stats/accounts/18ce54np2w4/', auth=twitterinit(),
                               params={
                                   'start_time': datestart+'T00:00:00-0300',
                                   'end_time': str(lastdate)+'T00:00:00-0300',
                                   'entity': 'CAMPAIGN',
                                   'granularity': 'TOTAL',
                                   'metric_groups': 'ENGAGEMENT,BILLING',
                                   'placement': placement,
                                   'entity_ids': campaignslist
                               }).json()

        # some json partition and normalization to convert it to dataframe
        campaignid = json_normalize(content, ['data'])
        campaignid.drop(['id_data'], inplace=True, axis=1)
        twitterreport = json_normalize(content['data'], 'id_data')
        twitterreport = pandas.concat(
            [twitterreport.drop('metrics', axis=1), pandas.DataFrame(twitterreport['metrics'].tolist())], axis=1)

        # drop what i don't need
        twitterreport.drop(['card_engagements', 'carousel_swipes', 'engagements', 'follows', 'likes', 'poll_card_vote',
                            'qualified_impressions', 'billed_engagements',
                            'replies', 'app_clicks', 'segment', 'tweets_send', 'url_clicks', 'retweets'], inplace=True, axis=1)

        # as date is something i've came up with, to get this data divided by date i need to put it myself on the DF
        twitterreport['date'] = datestart


        # some etl process in the metrics fields
        twitterreport['billed_charge_local_micro'] = twitterreport['billed_charge_local_micro'].apply(
            lambda x: 0 if x == None else int(round(sum(x)) / 1000000))
        twitterreport['impressions'] = twitterreport['impressions'].apply(lambda x: 0 if x == None else sum(x))
        twitterreport['clicks'] = twitterreport['clicks'].apply(lambda x: 0 if x == None else sum(x))

        twitterreport = pandas.concat([campaignid, twitterreport], axis=1)


        # twitter dataframe is what contains all the dataframes from each day
        twitterdataframe = twitterdataframe.append(twitterreport, ignore_index=True)

        if dateend == datestart:
            return twitterdataframe
        else:
            return self.recursiveextractor(str(lastdate), dateend, campaignslist, placement, twitterdataframe) 
Example 79
Project: jf   Author: alhoo   File: output.py    MIT License 4 votes vote down vote up
def profile(*args, **kwargs):
    """
    Make a profiling report from data

    This function tries to convert strings to numeric values or datetime
    objects and makes a html profiling report as the only result to be yielded.
    Notice! This fails if used with ordered_dict output.

    >>> list(map(lambda x: len(x) > 100, profile([{'a': 1}, {'a': 3}, {'a': 4}])))
    [True]
    >>> list(profile(lambda x: "/tmp/excel.html", [{'a': 1}, {'a': 3}, {'a': 4}], nan='NA'))
    []
    """
    import pandas as pd
    from pandas.io.json import json_normalize
    import pandas_profiling

    def is_numeric(df_):
        try:
            counts = df_.value_counts()
            if len(counts) > 100:
                # Only look a some of the values if we have a large input dataset
                pd.to_numeric(df.value_counts()[4:24].keys())
            else:
                pd.to_numeric(df.value_counts().keys())
            return True
        except (ValueError, AttributeError):
            pass
        return False

    arr = args[-1]
    if len(args) > 1:
        args = [open(args[0](0), 'w')]
    else:
        args = []
    data = list(map(result_cleaner, arr))
    df = pd.DataFrame(json_normalize(data))
    # df = pd.DataFrame(data)
    # df = pd.DataFrame([{k: str(v) for k, v in it.items()} for it in data])
    # print(df)
    na_value = None
    if 'nan' in kwargs:
        na_value = kwargs['nan']
    for col in df.columns:
        try:
            if is_numeric(df[col]):
                if na_value:
                    df[col] = df[col].str.replace(na_value, None)
                df[col] = pd.to_numeric(df[col].str.replace(",", '.'), errors='coerce')
            else:
                df[col] = pd.to_datetime(df[col].str.replace(",", '.'))
        except (AttributeError, KeyError, ValueError, OverflowError):
            pass
    profile_data = pandas_profiling.ProfileReport(df)
    html_report = pandas_profiling.templates.template('wrapper').render(content=profile_data.html)
    if len(args):
        args[0].write(html_report+"\n")
    else:
        yield html_report 
Example 80
Project: MLT   Author: Maddosaurus   File: find_top_n.py    Apache License 2.0 4 votes vote down vote up
def find_top_n(path, model, n):
    n = int(n)
    folderlist = [f for f in os.listdir(path) if not os.path.isfile(os.path.join(path, f))]
    datadf = pandas.DataFrame()
    
    print('Reading data')
    for folder in folderlist:
        # Get the data itself
        with open (os.path.join(path, folder, model+"_metrics.json")) as fmetric:
            jdata = json.load(fmetric)
            ndata = json_normalize(jdata)
            ndata = ndata.filter([
                'precision.mean', 'precision.sd',
                'recall.mean', 'recall.sd',
                'f1_score.mean', 'f1_score.sd',
                'auc.mean', 'auc.sd',
                'training_time_mean'
            ])

        # as well as the call params
        try:
            with open(os.path.join(path, folder, 'call_parameters.txt')) as cparms:
                call_vals = None
                for line in cparms:
                    try:
                        call_vals = line.split('[')[1].split(']')[0].replace("'", "")
                        call_vals = call_vals.split(', ')
                        call_df = pandas.DataFrame(data=[call_vals])
                        call_df['folder_name'] = folder
                        ndata = pandas.concat([ndata, call_df], axis=1)
                        datadf = datadf.append(ndata, ignore_index=True, sort=False)
                    except IndexError:
                        pass # TODO: Find a more efficient alternative
        except FileNotFoundError:
            pass

    print("Top {} by F1 score:".format(n))
    _print_as_ltx_table(datadf.nlargest(n, 'f1_score.mean'))

    print("Top {} by Precision:".format(n))
    _print_as_ltx_table(datadf.nlargest(n, 'precision.mean'))

    print("Top {} by Recall:".format(n))
    _print_as_ltx_table(datadf.nlargest(n, 'recall.mean'))

    print("Top {} by AUC:".format(n))
    _print_as_ltx_table(datadf.nlargest(n, 'auc.mean'))