Python demjson.decode() Examples

The following are code examples for showing how to use demjson.decode(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: backtrader-cn   Author: pandalibin   File: sina.py    GNU General Public License v3.0 6 votes vote down vote up
def jsonp2dict(jsonp):
    """
    解析jsonp类型的返回
    :param jsonp:
    :return:
    """

    try:
        l_index = jsonp.index("(") + 2
        r_index = jsonp.rindex(")") - 1
        jsonp_info = jsonp[l_index:r_index]
    except ValueError:
        logger.error("Input is not in a jsonp format. %s" % jsonp)
        return
    try:
        return demjson.decode(jsonp_info)
    except demjson.JSONDecodeError as e:
        if jsonp_info == "new Boolean(true)":
            return True
        elif jsonp_info == "null":
            return None
        else:
            logger.error("解析jsonp返回失败")
            logger.error(jsonp)
            raise e 
Example 2
Project: fabric8-analytics-rudra   Author: fabric8-analytics   File: npm_bigquery.py    Apache License 2.0 6 votes vote down vote up
def handle_corrupt_packagejson(content):
        """Find dependencies from corrupted/invalid package.json."""
        dependencies_pattern = re.compile(
            r'dependencies[\'"](?:|.|\s+):(?:|.|\s+)\{(.*?)\}', flags=re.DOTALL)
        dependencies = list()
        try:
            match = dependencies_pattern.search(content)
            for line in match[1].splitlines():
                for dep in line.split(','):
                    dependency_pattern = (r"(?:\"|\')(?P<pkg>[^\"]*)(?:\"|\')(?=:)"
                                          r"(?:\:\s*)(?:\"|\')?(?P<ver>.*)(?:\"|\')")
                    matches = re.search(dependency_pattern,
                                        dep.strip(), re.MULTILINE | re.DOTALL)
                    if matches:
                        dependencies.append('"{}": "{}"'.format(
                            matches['pkg'], matches['ver']))

            return demjson.decode('{"dependencies": {%s}}' % ', '.join(dependencies))
        except Exception as _exc:
            logger.error("IGNORE {}".format(str(_exc)))
            return {} 
Example 3
Project: zvt   Author: zvtvz   File: china_etf_list_spider.py    MIT License 6 votes vote down vote up
def run(self):
        # 抓取沪市 ETF 列表
        url = 'http://query.sse.com.cn/commonQuery.do?sqlId=COMMON_SSE_ZQPZ_ETFLB_L_NEW'
        response = requests.get(url, headers=DEFAULT_SH_ETF_LIST_HEADER)
        response_dict = demjson.decode(response.text)

        df = pd.DataFrame(response_dict.get('result', []))
        self.persist_etf_list(df, exchange='sh')
        self.logger.info('沪市 ETF 列表抓取完成...')

        # 抓取沪市 ETF 成分股
        self.download_sh_etf_component(df)
        self.logger.info('沪市 ETF 成分股抓取完成...')

        # 抓取深市 ETF 列表
        url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=1945'
        response = requests.get(url)

        df = pd.read_excel(io.BytesIO(response.content), dtype=str)
        self.persist_etf_list(df, exchange='sz')
        self.logger.info('深市 ETF 列表抓取完成...')

        # 抓取深市 ETF 成分股
        self.download_sz_etf_component(df)
        self.logger.info('深市 ETF 成分股抓取完成...') 
Example 4
Project: OpenData   Author: PKUJohnson   File: fund_agent.py    Apache License 2.0 6 votes vote down vote up
def _get_fund_list_onepage(self, company='', page_no = 1, page_size = 100):
        url    = 'http://fund.eastmoney.com/Data/Fund_JJJZ_Data.aspx?page=%d,%d&gsid=%s' % (page_no, page_size, company)
        prefix = 'var db='
        response = self.do_request(url)
        response = self._get_and_parse_js(url, prefix)
        if response is None:
            return None, '获取数据失败'

        jsonobj = demjson.decode(response)
        rsp = jsonobj['datas']
        datestr = jsonobj['showday']
        df = pd.DataFrame(rsp)
        if len(df) > 0:
            df.drop(df.columns[5:], axis=1, inplace=True)
            df.columns = ['fundcode', 'fundname', 'pingyin', 'nav', 'accu_nav']
            df['date'] = datestr[0]
            return df, ''
        else:
            return None, '' 
Example 5
Project: OpenData   Author: PKUJohnson   File: fund_agent.py    Apache License 2.0 6 votes vote down vote up
def _get_fundlist_by_type_page(self, type, page_no = 1, page_size = 100):
        url    = 'http://fund.eastmoney.com/Data/Fund_JJJZ_Data.aspx?page=%d,%d' % (page_no, page_size)
        prefix = 'var db='
        type_param = fund_type[type]

        response = self._get_and_parse_js(url, prefix, param=type_param)
        jsonobj = demjson.decode(response)
        rsp = jsonobj['datas']
        datestr = jsonobj['showday']
        df = pd.DataFrame(rsp)
        if len(df) > 0:
            df.drop(df.columns[5:], axis=1, inplace=True)
            df.columns = ['fundcode', 'fundname', 'pingyin', 'nav', 'accu_nav']
            df['date'] = datestr[0]
            return df, ''
        else:
            return None, '获取数据失败' 
Example 6
Project: kontalk-legacy-xmppserver   Author: daniele-athome   File: fileserver.py    GNU General Public License v3.0 6 votes vote down vote up
def makeService(self, options):
        from kontalk.fileserver.fileserver import Fileserver
        from kontalk.xmppserver import log

        # load configuration
        fp = open(options['config'], 'r')
        config = demjson.decode(fp.read(), allow_comments=True)
        fp.close()

        log.init(config)

        appl = MultiService()
        comp = Fileserver(config)
        comp.setServiceParent(appl)
        comp.setup().setServiceParent(appl)

        return appl 
Example 7
Project: kontalk-legacy-xmppserver   Author: daniele-athome   File: xmppnet.py    GNU General Public License v3.0 6 votes vote down vote up
def makeService(self, options):
        from kontalk.xmppserver.component.net import NetComponent
        from kontalk.xmppserver import log

        # load configuration
        fp = open(options['config'], 'r')
        config = demjson.decode(fp.read(), allow_comments=True)
        fp.close()

        log.init(config)

        appl = MultiService()
        comp = NetComponent(config)
        comp.setServiceParent(appl)
        comp.setup().setServiceParent(appl)
        return appl 
Example 8
Project: kontalk-legacy-xmppserver   Author: daniele-athome   File: xmppc2s.py    GNU General Public License v3.0 6 votes vote down vote up
def makeService(self, options):
        from kontalk.xmppserver.component.c2s.component import C2SComponent
        from kontalk.xmppserver import log

        # load configuration
        fp = open(options['config'], 'r')
        config = demjson.decode(fp.read(), allow_comments=True)
        fp.close()

        log.init(config)

        appl = MultiService()
        comp = C2SComponent(config)
        comp.setServiceParent(appl)
        [x.setServiceParent(appl) for x in comp.setup()]

        return appl 
Example 9
Project: kontalk-legacy-xmppserver   Author: daniele-athome   File: xmpprouter.py    GNU General Public License v3.0 6 votes vote down vote up
def makeService(self, options):
        from kontalk.xmppserver.component import router
        from kontalk.xmppserver import log

        # load configuration
        fp = open(options['config'], 'r')
        config = demjson.decode(fp.read(), allow_comments=True)
        fp.close()

        log.init(config)

        engine = router.Router()
        engine.logTraffic = config['debug']

        factory = router.XMPPRouterFactory(engine, config['secret'])
        factory.logTraffic = config['debug']

        return strports.service(str(config['bind']), factory) 
Example 10
Project: fooltrader   Author: foolcage   File: sina_category_spider.py    MIT License 6 votes vote down vote up
def download_sina_category_detail(self, response):
        if response.text == 'null' or response.text is None:
            return
        category_jsons = demjson.decode(response.text)
        for category in category_jsons:
            self.file_lock.acquire()
            if get_exchange(category['code']) == 'sh':
                df = self.sh_df
            elif get_exchange(category['code']) == 'sz':
                df = self.sz_df

            if category['code'] in df.index:
                current_ind = df.at[category['code'], self.category_type]

                if type(current_ind) == list and (response.meta['ind_name'] not in current_ind):
                    current_ind.append(response.meta['ind_name'])

                elif type(current_ind) == str and response.meta['ind_name'] != current_ind:
                    current_ind = [current_ind, response.meta['ind_name']]
                else:
                    current_ind = response.meta['ind_name']

                df.at[category['code'], self.category_type] = current_ind
            self.file_lock.release() 
Example 11
Project: jsonlike   Author: shaunvxc   File: api.py    MIT License 6 votes vote down vote up
def loads(content, try_yaml=False):
    try:
        json.loads(content)
    except Exception:
        cleaned = clean_json(content)
        try:
            # strip out HTML content and unescaped chars
            return json.loads(cleaned)
        except Exception:
            # try using demjson to decode a non-strict json string
            try:
                return demjson.decode(cleaned)
            except Exception:
                if try_yaml:
                    # try loading as yaml-- yaml is a superset of json..this could be dangerous in cases
                    return yaml.load(cleaned)
                raise 
Example 12
Project: akshare   Author: jindaxiang   File: zh_stock_ah_tx.py    MIT License 6 votes vote down vote up
def stock_zh_ah_daily(symbol="02318", start_year="2000", end_year="2019"):
    big_df = pd.DataFrame()
    for year in range(int(start_year), int(end_year)):
        hk_stock_payload_copy = hk_stock_payload.copy()
        hk_stock_payload_copy.update({"_var": f"kline_dayhfq{year}"})
        hk_stock_payload_copy.update({"param": f"hk{symbol},day,{year}-01-01,{int(year) + 1}-12-31,640,hfq"})
        hk_stock_payload_copy.update({"r": random.random()})
        res = requests.get(hk_stock_url, params=hk_stock_payload_copy, headers=hk_stock_headers)
        data_json = demjson.decode(res.text[res.text.find("{"): res.text.rfind("}") + 1])
        try:
            temp_df = pd.DataFrame(data_json["data"][f"hk{symbol}"]["hfqday"])
        except:
            continue
        temp_df.columns = ["日期", "开盘", "收盘", "最高", "最低", "成交量", "_", "_", "_"]
        temp_df = temp_df[["日期", "开盘", "收盘", "最高", "最低", "成交量"]]
        print("正在采集{}第{}年的数据".format(symbol, year))
        big_df = big_df.append(temp_df, ignore_index=True)
    return big_df 
Example 13
Project: akshare   Author: jindaxiang   File: sina_futures_index.py    MIT License 6 votes vote down vote up
def zh_subscribe_exchange_symbol(exchange="dce"):
    res = requests.get(zh_subscribe_exchange_symbol_url)
    data_json = demjson.decode(
        res.text[res.text.find("{"): res.text.find("};") + 1])
    if exchange == "czce":
        data_json["czce"].remove("郑州商品交易所")
        return pd.DataFrame(data_json["czce"])
    if exchange == "dce":
        data_json["dce"].remove("大连商品交易所")
        return pd.DataFrame(data_json["dce"])
    if exchange == "shfe":
        data_json["shfe"].remove("上海期货交易所")
        return pd.DataFrame(data_json["shfe"])
    if exchange == "cffex":
        data_json["cffex"].remove("中国金融期货交易所")
        return pd.DataFrame(data_json["cffex"]) 
Example 14
Project: akshare   Author: jindaxiang   File: sina_futures_index.py    MIT License 6 votes vote down vote up
def match_main_contract(exchange="dce"):
    subscribe_cffex_list = []
    exchange_symbol_list = zh_subscribe_exchange_symbol(
        exchange).iloc[:, 1].tolist()
    for item in exchange_symbol_list:
        zh_match_main_contract_payload.update({"node": item})
        res = requests.get(
            zh_match_main_contract_url,
            params=zh_match_main_contract_payload)
        data_json = demjson.decode(res.text)
        data_df = pd.DataFrame(data_json)
        try:
            main_contract = data_df.iloc[0, :3]
            # print(main_contract)
            subscribe_cffex_list.append(main_contract)
        except:
            print(item, "无主力连续合约")
            continue
    print("主力连续合约获取成功")
    return pd.DataFrame(subscribe_cffex_list) 
Example 15
Project: akshare   Author: jindaxiang   File: zh_futures_sina.py    MIT License 6 votes vote down vote up
def zh_subscribe_exchange_symbol(exchange="dce"):
    res = requests.get(zh_subscribe_exchange_symbol_url)
    data_json = demjson.decode(
        res.text[res.text.find("{"): res.text.find("};") + 1])
    if exchange == "czce":
        data_json["czce"].remove("郑州商品交易所")
        return pd.DataFrame(data_json["czce"])
    if exchange == "dce":
        data_json["dce"].remove("大连商品交易所")
        return pd.DataFrame(data_json["dce"])
    if exchange == "shfe":
        data_json["shfe"].remove("上海期货交易所")
        return pd.DataFrame(data_json["shfe"])
    if exchange == "cffex":
        data_json["cffex"].remove("中国金融期货交易所")
        return pd.DataFrame(data_json["cffex"]) 
Example 16
Project: akshare   Author: jindaxiang   File: zh_futures_sina.py    MIT License 6 votes vote down vote up
def match_main_contract(exchange="dce"):
    subscribe_cffex_list = []
    exchange_symbol_list = zh_subscribe_exchange_symbol(
        exchange).iloc[:, 1].tolist()
    for item in exchange_symbol_list:
        zh_match_main_contract_payload.update({"node": item})
        res = requests.get(
            zh_match_main_contract_url,
            params=zh_match_main_contract_payload)
        data_json = demjson.decode(res.text)
        data_df = pd.DataFrame(data_json)
        try:
            main_contract = data_df[data_df.iloc[:, 3:].duplicated()]
            print(main_contract["symbol"].values[0])
            subscribe_cffex_list.append(main_contract["symbol"].values[0])
        except:
            print(item, "无主力合约")
            continue
    print("主力合约获取成功")
    return ','.join(["nf_" + item for item in subscribe_cffex_list]) 
Example 17
Project: parsechain   Author: Suor   File: chains.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def ld(node):
        text = C.css('script[type="application/ld+json"]').inner_text(node)
        try:
            return json.loads(text)
        except ValueError as e:
            try:
                # Try parsing non-strict
                import demjson
                return demjson.decode(text)
            except:
                raise e  # reraise first one

    # Select 
Example 18
Project: Starx_Pixiv_Collector   Author: SuzukiHonoka   File: start.py    MIT License 5 votes vote down vote up
def get_illust_infos_from_illust_url(url):
    data_dict = {}
    illust_url_content = get_text_from_url(url)
    # illust_url_content.encoding = 'unicode_escape'
    new_soup = BeautifulSoup(illust_url_content,'html.parser')
    json_data = new_soup.find(name='meta',attrs={'name':'preload-data'}).attrs['content']
    format_json_data = demjson.decode(json_data)
    pre_catch_id = list(format_json_data['illust'].keys())[0]
    illust_info = format_json_data['illust'][pre_catch_id]
    # get each value
    data_dict['illustId'] = illust_info['illustId']
    data_dict['illustTitle'] = illust_info['illustTitle']
    data_dict['illustComment'] = illust_info['illustComment']
    data_dict['createDate'] = illust_info['createDate']
    data_dict['illustType'] = illust_info['illustType']
    data_dict['urls'] = illust_info['urls']
    # data_dict['tags']=illust_info['tags']
    data_dict['userId'] = illust_info['userId']
    data_dict['userName'] = illust_info['userName']
    data_dict['userAccount'] = illust_info['userAccount']
    data_dict['likeData'] = illust_info['likeData']
    data_dict['width'] = illust_info['width']
    data_dict['height'] = illust_info['height']
    data_dict['pageCount'] = illust_info['pageCount']
    data_dict['bookmarkCount'] = illust_info['bookmarkCount']
    data_dict['likeCount'] = illust_info['likeCount']
    data_dict['commentCount'] = illust_info['commentCount']
    data_dict['viewCount'] = illust_info['viewCount']
    data_dict['isOriginal'] = illust_info['isOriginal']
    per_tags = illust_info['tags']['tags']
    tags_list = []
    for tag in range(len(per_tags)):
        tags_list.append(per_tags[tag]['tag'])
    data_dict['tags'] = tags_list
    ###########################################################
    update_database(data_dict['illustId'], data_dict['illustTitle'], data_dict['illustType'], data_dict['userId'],
                    data_dict['userName'], data_dict['tags'], data_dict['urls'])
    return data_dict 
Example 19
Project: cms-grafana-builder   Author: sunny0826   File: aliyun_base.py    Apache License 2.0 5 votes vote down vote up
def line_template(self, template, line_name, line_id, metric, project, ycol, period=300):
        return demjson.decode(
            template.render(name=str(line_name), id=line_id, metric=metric, project=project, period=period, ycol=ycol)) 
Example 20
Project: cms-grafana-builder   Author: sunny0826   File: aliyun_base.py    Apache License 2.0 5 votes vote down vote up
def panels_template(self, index, template, title, targets, format, redline=80):
        return demjson.decode(
            template.render(id=(index + 3), h=8, w=12, x=(index % 2) * 12, y=(index % 8) * 8, title=str(title),
                            format=format, targets=targets, redline=redline)) 
Example 21
Project: cms-grafana-builder   Author: sunny0826   File: aliyun_base.py    Apache License 2.0 5 votes vote down vote up
def read_metric_config_map(self, metric):
        config.load_incluster_config()
        # config.load_kube_config()
        k8s_apps_v1 = client.CoreV1Api()
        namespace = getenv('INIT_POD_NAMESPACE', 'default')
        metrics = k8s_apps_v1.read_namespaced_config_map(name="grafana-cms-metric", namespace=namespace)
        return demjson.decode(metrics.data.get(metric))['metric_list'] 
Example 22
Project: cms-grafana-builder   Author: sunny0826   File: aliyun_slb.py    Apache License 2.0 5 votes vote down vote up
def _card_template(self, index, template, slb_id, slb_name, metric, project, thresholds):
        return demjson.decode(
            template.render(id=(index + 2), h=8, w=4, x=(index % 6) * 4, y=(index % 8) * 8, LoadBalancerId=slb_id,
                            metric=metric,
                            project=project, name=slb_name, thresholds=thresholds)) 
Example 23
Project: cms-grafana-builder   Author: sunny0826   File: aliyun_eip.py    Apache License 2.0 5 votes vote down vote up
def panels_template(self, index, template, title, targets, format, redline=80):
        return demjson.decode(
            template.render(id=(index + 3), h=8, w=24, x=0, y=(index % 8) * 8, title=str(title),
                            format=format, targets=targets, redline=redline)) 
Example 24
Project: fabric8-analytics-rudra   Author: fabric8-analytics   File: npm_bigquery.py    Apache License 2.0 5 votes vote down vote up
def construct_packages(self, content):
        """Construct package from content."""
        if content:
            content = content.decode() if not isinstance(content, str) else content
            dependencies = {}
            try:
                decoded_json = demjson.decode(content)
            except Exception as _exc:
                logger.error("IGNORE {}".format(str(_exc)))
                decoded_json = self.handle_corrupt_packagejson(content)
            if decoded_json and isinstance(decoded_json, dict):
                dependencies = decoded_json.get('dependencies', {})
            return list(dependencies.keys() if isinstance(dependencies, dict) else [])
        return [] 
Example 25
Project: LibCloud   Author: stewnorriss   File: scrape-ec2-prices.py    Apache License 2.0 5 votes vote down vote up
def scrape_ec2_pricing():
    result = defaultdict(OrderedDict)

    for url in LINUX_PRICING_URLS:
        response = requests.get(url)

        if re.match('.*?\.json$', url):
            data = response.json()
        elif re.match('.*?\.js$', url):
            data = response.content
            match = re.match('^.*callback\((.*?)\);?$', data,
                             re.MULTILINE | re.DOTALL)
            data = match.group(1)
            # demjson supports non-strict mode and can parse unquoted objects
            data = demjson.decode(data)

        regions = data['config']['regions']

        for region_data in regions:
            region_name = region_data['region']
            libcloud_region_name = REGION_NAME_MAP[region_name]
            instance_types = region_data['instanceTypes']

            for instance_type in instance_types:
                sizes = instance_type['sizes']

                for size in sizes:
                    price = size['valueColumns'][0]['prices']['USD']
                    result[libcloud_region_name][size['size']] = price

    return result 
Example 26
Project: zvt   Author: zvtvz   File: sh_stock_summary_recorder.py    MIT License 5 votes vote down vote up
def record(self, entity, start, end, size, timestamps):
        json_results = []
        for timestamp in timestamps:
            timestamp_str = to_time_str(timestamp)
            url = self.url.format(timestamp_str)
            response = requests.get(url=url, headers=DEFAULT_SH_SUMMARY_HEADER)

            results = demjson.decode(response.text[response.text.index("(") + 1:response.text.index(")")])['result']
            result = [result for result in results if result['productType'] == '1']
            if result and len(result) == 1:
                result_json = result[0]
                # 有些较老的数据不存在,默认设为0.0
                json_results.append({
                    'provider': 'exchange',
                    'timestamp': timestamp,
                    'name': '上证指数',
                    'pe': to_float(result_json['profitRate'], 0.0),
                    'total_value': to_float(result_json['marketValue1'] + '亿', 0.0),
                    'total_tradable_vaule': to_float(result_json['negotiableValue1'] + '亿', 0.0),
                    'volume': to_float(result_json['trdVol1'] + '万', 0.0),
                    'turnover': to_float(result_json['trdAmt1'] + '亿', 0.0),
                    'turnover_rate': to_float(result_json['exchangeRate'], 0.0),
                })

                if len(json_results) > self.batch_size:
                    return json_results

        return json_results 
Example 27
Project: zvt   Author: zvtvz   File: china_index_list_spider.py    MIT License 5 votes vote down vote up
def fetch_csi_index(self) -> None:
        """
        抓取上证、中证指数列表
        """
        url = 'http://www.csindex.com.cn/zh-CN/indices/index' \
              '?page={}&page_size={}&data_type=json&class_1=1&class_2=2&class_7=7&class_10=10'

        index_list = []
        page = 1
        page_size = 50
        while True:
            query_url = url.format(page, page_size)
            response = requests.get(query_url)
            response_dict = demjson.decode(response.text)
            response_index_list = response_dict.get('list', [])

            if len(response_index_list) == 0:
                break

            index_list.extend(response_index_list)

            self.logger.info(f'上证、中证指数第 {page} 页抓取完成...')
            page += 1
            self.sleep()

        df = pd.DataFrame(index_list)
        df = df[['base_date', 'base_point', 'index_code', 'indx_sname', 'online_date', 'class_eseries']]
        df.columns = ['timestamp', 'base_point', 'code', 'name', 'list_date', 'class_eseries']
        df['category'] = df['class_eseries'].apply(lambda x: x.split(' ')[0].lower())
        df = df.drop('class_eseries', axis=1)
        df = df.loc[df['code'].str.contains(r'^\d{6}$')]

        self.persist_index(df)
        self.logger.info('上证、中证指数列表抓取完成...')

        # 抓取上证、中证指数成分股
        self.fetch_csi_index_component(df)
        self.logger.info('上证、中证指数成分股抓取完成...') 
Example 28
Project: zvt   Author: zvtvz   File: china_etf_list_spider.py    MIT License 5 votes vote down vote up
def download_sh_etf_component(self, df: pd.DataFrame):
        """
        ETF_CLASS => 1. 单市场 ETF 2.跨市场 ETF 3. 跨境 ETF
                        5. 债券 ETF 6. 黄金 ETF
        :param df: ETF 列表数据
        :return: None
        """
        query_url = 'http://query.sse.com.cn/infodisplay/queryConstituentStockInfo.do?' \
                    'isPagination=false&type={}&etfClass={}'

        etf_df = df[(df['ETF_CLASS'] == '1') | (df['ETF_CLASS'] == '2')]
        etf_df = self.populate_sh_etf_type(etf_df)

        for _, etf in etf_df.iterrows():
            url = query_url.format(etf['ETF_TYPE'], etf['ETF_CLASS'])
            response = requests.get(url, headers=DEFAULT_SH_ETF_LIST_HEADER)
            response_dict = demjson.decode(response.text)
            response_df = pd.DataFrame(response_dict.get('result', []))

            etf_code = etf['FUND_ID']
            index_id = f'index_sh_{etf_code}'
            response_df = response_df[['instrumentId']]
            response_df['id'] = response_df['instrumentId'].apply(
                lambda code: f'{index_id}_{china_stock_code_to_id(code)}')
            df['entity_id'] = df['id']
            response_df['stock_id'] = response_df['instrumentId'].apply(lambda code: china_stock_code_to_id(code))
            response_df['index_id'] = index_id
            response_df.drop('instrumentId', axis=1, inplace=True)

            df_to_db(data_schema=self.data_schema, df=response_df, provider=self.provider)
            self.logger.info(f'{etf["FUND_NAME"]} - {etf_code} 成分股抓取完成...')

            self.sleep() 
Example 29
Project: zvt   Author: zvtvz   File: china_etf_list_spider.py    MIT License 5 votes vote down vote up
def populate_sh_etf_type(df: pd.DataFrame):
        """
        填充沪市 ETF 代码对应的 TYPE 到列表数据中
        :param df: ETF 列表数据
        :return: 包含 ETF 对应 TYPE 的列表数据
        """
        query_url = 'http://query.sse.com.cn/infodisplay/queryETFNewAllInfo.do?' \
                    'isPagination=false&type={}&pageHelp.pageSize=25'

        type_df = pd.DataFrame()
        for etf_class in [1, 2]:
            url = query_url.format(etf_class)
            response = requests.get(url, headers=DEFAULT_SH_ETF_LIST_HEADER)
            response_dict = demjson.decode(response.text)
            response_df = pd.DataFrame(response_dict.get('result', []))
            response_df = response_df[['fundid1', 'etftype']]

            type_df = pd.concat([type_df, response_df])

        result_df = df.copy()
        result_df = result_df.sort_values(by='FUND_ID').reset_index(drop=True)
        type_df = type_df.sort_values(by='fundid1').reset_index(drop=True)

        result_df['ETF_TYPE'] = type_df['etftype']

        return result_df 
Example 30
Project: zvt   Author: zvtvz   File: china_etf_day_kdata_recorder.py    MIT License 5 votes vote down vote up
def fetch_cumulative_net_value(self, security_item, start, end) -> pd.DataFrame:
        query_url = 'http://api.fund.eastmoney.com/f10/lsjz?' \
                    'fundCode={}&pageIndex={}&pageSize=200&startDate={}&endDate={}'

        page = 1
        df = pd.DataFrame()
        while True:
            url = query_url.format(security_item.code, page, to_time_str(start), to_time_str(end))

            response = requests.get(url, headers=EASTMONEY_ETF_NET_VALUE_HEADER)
            response_json = demjson.decode(response.text)
            response_df = pd.DataFrame(response_json['Data']['LSJZList'])

            # 最后一页
            if response_df.empty:
                break

            response_df['FSRQ'] = pd.to_datetime(response_df['FSRQ'])
            response_df['JZZZL'] = pd.to_numeric(response_df['JZZZL'], errors='coerce')
            response_df['LJJZ'] = pd.to_numeric(response_df['LJJZ'], errors='coerce')
            response_df = response_df.fillna(0)
            response_df.set_index('FSRQ', inplace=True, drop=True)

            df = pd.concat([df, response_df])
            page += 1

            self.sleep()

        return df 
Example 31
Project: zvt   Author: zvtvz   File: china_etf_day_kdata_recorder.py    MIT License 5 votes vote down vote up
def record(self, entity, start, end, size, timestamps):
        # 此 url 不支持分页,如果超过我们想取的条数,则只能取最大条数
        if start is None or size > self.default_size:
            size = 8000

        return {
            'security_item': entity,
            'level': self.level.value,
            'size': size
        }

        security_item = param['security_item']
        size = param['size']

        url = url.format(security_item.exchange, security_item.code, size)

        response = requests.get(url)
        response_json = demjson.decode(response.text)

        if response_json is None or len(response_json) == 0:
            return []

        df = pd.DataFrame(response_json)
        df.rename(columns={'day': 'timestamp'}, inplace=True)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df['name'] = security_item.name
        df['provider'] = 'sina'
        df['level'] = param['level']

        return df.to_dict(orient='records') 
Example 32
Project: OpenData   Author: PKUJohnson   File: fund_agent.py    Apache License 2.0 5 votes vote down vote up
def get_fund_company(self):
        url    = 'http://fund.eastmoney.com/Data/Fund_JJJZ_Data.aspx?t=3'
        prefix = 'var gs='
        response = self._get_and_parse_js(url, prefix)
        if response is None:
            return None, '获取数据失败'

        jsonobj = demjson.decode(response)
        df = pd.DataFrame(jsonobj['op'])
        df.columns = ['companyid', 'companyname']
        return df, '' 
Example 33
Project: KStock   Author: aseylys   File: gfc.py    GNU General Public License v3.0 5 votes vote down vote up
def request(symbols):
    url = buildUrl(symbols)
    req = Request(url)
    resp = urlopen(req)
    # remove special symbols such as the pound symbol
    content = resp.read().decode('ascii', 'ignore').strip()
    content = content[3:]
    return content 
Example 34
Project: KStock   Author: aseylys   File: gfc.py    GNU General Public License v3.0 5 votes vote down vote up
def getNews(symbol):
    url = buildNewsUrl(symbol)
 
    content = urlopen(url).read().decode('utf-8')
 
    content_json = demjson.decode(content)
 
    article_json = []
    news_json = content_json['clusters']
    for cluster in news_json:
        for article in cluster:
            if article == 'a':
                article_json.extend(cluster[article])
 
    return [[unescape(art['t']).strip(), art['u']] for art in article_json] 
Example 35
Project: PixivSpider   Author: Kerisa   File: __main__.py    MIT License 5 votes vote down vote up
def ParsePage(opener, img):
    log.debug('`ParsePage` open ' + img.webUrl)
    try:
        response = opener.open(img.webUrl)
        img.webContent = utils.Gzip(response.read())

        tmp = re.findall('<meta name="preload-data".+content=\'([{].+[}])\'>', img.webContent, re.S)
        img.jsonStr = tmp[0]
        img.jsonData = demjson.decode(img.jsonStr)

        img.title = img.jsonData['illust'][str(img.illustId)]['illustTitle']
        DetermineIllustPageType(img)
        DetermineIllustTags(img)

        if img.type == 'manga':
            return HandleManga(opener, img)
        elif img.type == 'gif':
            return HandleGif(opener, img)
        elif img.type == 'single':
            return SaveSingleImage(opener, img)
        else:
            log.warn('parse page error - %s', img.webUrl)

    except urllib.error.URLError as e:
        PrintUrlErrorMsg(e)
        return False


################################################################################ 
Example 36
Project: kontalk-legacy-xmppserver   Author: daniele-athome   File: test_router.py    GNU General Public License v3.0 5 votes vote down vote up
def load_configuration(self, filename):
        # load configuration
        fp = open(filename, 'r')
        self.config = demjson.decode(fp.read(), allow_comments=True)
        fp.close() 
Example 37
Project: kontalk-legacy-xmppserver   Author: daniele-athome   File: test_resolver_subscriptions.py    GNU General Public License v3.0 5 votes vote down vote up
def loadConfiguration(self, filename):
        # load configuration
        fp = open(filename, 'r')
        self.config = demjson.decode(fp.read(), allow_comments=True)
        fp.close() 
Example 38
Project: pysyncthru   Author: nielstron   File: __init__.py    MIT License 5 votes vote down vote up
def update(self) -> None:
        """
        Retrieve the data from the printer.
        Throws ValueError if host does not support SyncThru
        """
        url = '{}{}'.format(self.url, ENDPOINT)

        try:
            async with self._session.get(url) as response:
                json_dict = demjson.decode(await response.text(), strict=False)
        except aiohttp.ClientError:
            json_dict = {'status': {'status1': SyncThru.OFFLINE}}
        except demjson.JSONDecodeError:
            raise ValueError("Invalid host, does not support SyncThru.")
        self.data = json_dict 
Example 39
Project: simple-spiders   Author: duiliuliu   File: commons.py    GNU General Public License v3.0 5 votes vote down vote up
def jsonp(self):
        '''
        将jsonp格式的response解析为json对象
        '''
        return demjson.decode(
            re.match(".*?({.*}).*", self.text, re.S).group(1)) 
Example 40
Project: sesh-dash-beta   Author: GreatLakesEnergy   File: views.py    MIT License 5 votes vote down vote up
def add_report(request, site_id):
    """
    View to help in managing the reports
    """
    site = Sesh_Site.objects.filter(id=site_id).first()
    context_dict = {}
    context_dict['report_attributes'] = get_report_table_attributes(site)
    attributes = []

    # if the user does not belong to the organisation or if the user is not an admin
    if not(request.user.organisation == site.organisation and request.user.is_org_admin):
        return HttpResponseForbidden()

    if request.method == "POST":
        # Getting all the checked report attribute values
        for key, value in request.POST.items():
            if value == 'on':
                attributes.append(demjson.decode(key))

        Report_Job.objects.create(site=site,
                              attributes=attributes,
                              duration=request.POST.get('duration', 'daily'),
                              day_to_report=0)
        return redirect(reverse('manage_reports', args=[site.id]))

    user_sites = _get_user_sites(request)
    context_dict['site'] = site
    context_dict['permitted'] = get_org_edit_permissions(request.user)
    context_dict['sites_stats'] = get_quick_status(user_sites)
    return render(request, 'seshdash/settings/add_report.html', context_dict) 
Example 41
Project: sesh-dash-beta   Author: GreatLakesEnergy   File: views.py    MIT License 5 votes vote down vote up
def edit_report(request, report_id):
    """
    View to edit a report given,
    a report id as an parameter
    """
    context_dict = {}
    report = Report_Job.objects.filter(id=report_id).first()
    attribute_list = []

    if request.method == 'POST':
        for key, value in request.POST.items():
            if value == 'on':
                attribute_list.append(demjson.decode(key))

        report.attributes = attribute_list
        report.duration = request.POST['duration']
        report.save()
        return redirect(reverse('manage_reports', args=[report.site.id]))


    user_sites = _get_user_sites(request)
    context_dict['attributes'] = get_edit_report_list(report)
    context_dict['report'] = report
    context_dict['duration_choices'] = report.get_duration_choices()
    context_dict['permitted'] = get_org_edit_permissions(request.user)
    context_dict['sites_stats'] = get_quick_status(user_sites)
    return render(request, 'seshdash/settings/edit_report.html', context_dict) 
Example 42
Project: vxTrader   Author: vex1023   File: xqTrader.py    MIT License 5 votes vote down vote up
def to_text(value, encoding="utf-8"):
    if isinstance(value, six.text_type):
        return value
    if isinstance(value, six.binary_type):
        return value.decode(encoding)
    return six.text_type(value) 
Example 43
Project: vxTrader   Author: vex1023   File: xqTrader.py    MIT License 5 votes vote down vote up
def portfolio(self):

        url = 'https://xueqiu.com/p/' + self.portfolio_code
        r = self.client.get(url)
        r.raise_for_status()

        # 查找持仓的字符串段
        html = r.text
        pos_start = html.find('SNB.cubeInfo = ') + len('SNB.cubeInfo = ')
        pos_end = html.find('SNB.cubePieData')
        json_data = to_text(html[pos_start:pos_end - 2])
        logger.debug(json_data)
        p_info = json.decode(json_data, encoding='utf-8')

        # 修复雪球持仓错误
        positions = p_info['last_success_rebalancing']['holdings']
        logger.debug(p_info)

        df = pd.DataFrame(positions)
        df.rename(columns=_RENAME_DICT, inplace=True)
        df['symbol'] = df['symbol'].str.lower()
        df = df.set_index('symbol')
        hq = self.hq(df.index)
        df['lasttrade'] = hq['lasttrade']

        df.loc['cash', 'symbol_name'] = '人民币'
        df.loc['cash', 'current_amount'] = p_info['view_rebalancing']['cash_value']
        df.loc['cash', 'lasttrade'] = 1.0

        df['current_amount'] = df['current_amount'] * _BASE_MULTIPE
        df['enable_amount'] = df['current_amount']
        df['market_value'] = df['current_amount'] * df['lasttrade']
        net_value = df['market_value'].sum()
        df['weight'] = (df['market_value'] / net_value).round(4)

        return df[['symbol_name', 'current_amount', 'enable_amount', 'lasttrade', 'market_value', 'weight']] 
Example 44
Project: vxTrader   Author: vex1023   File: yjbTrader.py    MIT License 5 votes vote down vote up
def _trade_api(self, **kwargs):
        '''
        底层交易接口
        '''

        logger.debug('call params: %s' % kwargs)
        r = self.client.get(url='https://jy.yongjinbao.com.cn/winner_gj/gjzq/stock/exchange.action',
                            params=kwargs)
        logger.debug('return: %s' % r.text)

        # 解析返回的结果数据
        returnJson = r.json()['returnJson']
        if returnJson is None:
            return None

        data = demjson.decode(returnJson)
        if data['msg_no'] != '0':
            error_msg = data[data['error_grids']][1]
            logger.error(
                'error no: %s,error info: %s' % (error_msg.get('error_no', ''), error_msg.get('error_info', '')))
            raise TraderAPIError(error_msg.get('error_info', ''))

        data = data['Func%s' % data['function_id']]
        df = pd.DataFrame(data[1:])

        # 替换表头的命名
        df.rename(columns=RENAME_DICT, inplace=True)
        # 生成symbol
        if 'symbol' in df.columns:
            df['symbol'] = df['symbol'].apply(code_to_symbols)
        # FLOAT_COLUMNS和 df.columns取交集,以减少调用时间
        cols = list(set(FLOAT_COLUMNS).intersection(set(df.columns)))

        for col in cols:
            df[col] = pd.to_numeric(df[col], errors='ignore')

        return df 
Example 45
Project: CF-Cannon   Author: LevyHsu   File: attack.py    GNU General Public License v3.0 5 votes vote down vote up
def attack():
    attackinfo = demjson.decode(request.form['attackinfo'])
    status = Bypass.getstatus()
    if status['Satus'] == 'STOP':
        threading.Thread(target=Bypass.goAttack, args=(attackinfo['T'],attackinfo['url'],attackinfo['path'],attackinfo['charset'],attackinfo['is_protected_by_cf'],attackinfo['threadCount'],attackinfo['peerCount'],attackinfo['keywords'])).start()
        return demjson.encode({'status': 'success'}),json
    else:
        return demjson.encode({'status': 'failed'}),json 
Example 46
Project: Lyrixal   Author: La-Volpe   File: utilities.py    MIT License 5 votes vote down vote up
def fetch_lyrics(artist, song):
    request = ur.urlopen(url_builder(artist, song))
    response = request.read()
    xml = response.decode("utf-8")
    return xml
#Parses the xml and then returns the 
Example 47
Project: Lyrixal   Author: La-Volpe   File: utilities.py    MIT License 5 votes vote down vote up
def fetch_from_wiki(artist, song):
    url = wikia_url_builder(artist, song)
    request = ur.urlopen(url).read().decode('utf-8')
    request = request[7:len(request)]
    _json = json.decode(request)
    return _json
#Prepares the proper response: 
Example 48
Project: fooltrader   Author: foolcage   File: sina_category_spider.py    MIT License 5 votes vote down vote up
def download_sina_category(self, response):
        tmp_str = response.body.decode('GBK')
        json_str = tmp_str[tmp_str.index('{'):tmp_str.index('}') + 1]
        tmp_json = json.loads(json_str)
        for ind_code in tmp_json:
            for page in range(1, 4):
                yield Request(
                    url='http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeData?page={}&num=1024&sort=symbol&asc=1&node={}&symbol=&_s_r_a=page'.format(
                        page, ind_code),
                    meta={'ind_code': ind_code,
                          'ind_name': tmp_json[ind_code].split(',')[1]},
                    callback=self.download_sina_category_detail) 
Example 49
Project: fooltrader   Author: foolcage   File: stock_summary_spider.py    MIT License 5 votes vote down vote up
def download_sh_summary(self, response):
        search_date = response.meta['search_date']

        results = demjson.decode(response.text[response.text.index("(") + 1:response.text.index(")")])['result']
        result = [result for result in results if result['productType'] == '1']
        if result and len(result) == 1:
            result_json = result[0]
            self.file_lock.acquire()
            # 有些较老的数据不存在,默认设为0.0
            self.current_df.at[search_date, 'pe'] = to_float(result_json['profitRate'], 0.0)
            self.current_df.at[search_date, 'tCap'] = to_float(result_json['marketValue1'], 0.0) * 100000000
            self.current_df.at[search_date, 'mCap'] = to_float(result_json['negotiableValue1'], 0.0) * 100000000
            self.current_df.at[search_date, 'turnoverRate'] = to_float(result_json['exchangeRate'], 0.0)
            self.file_lock.release() 
Example 50
Project: PyStacks   Author: KablamoOSS   File: template.py    MIT License 5 votes vote down vote up
def templateCF(resources, path):
    compiled = {}
    resconf = {}
    for resource in resources:
        resconf[resource] = resources[resource]
        cfres = (template(path + "/" + resource, **resconf))
        try:
            compiled.update(demjson.decode(cfres))
        except demjson.JSONError as err:
            print output.boxwrap(text=cfres)
            print output.writecolour(str(err))
            print err.position
            sys.exit(1)

    return compiled 
Example 51
Project: jh-kaggle-util   Author: jeffheaton   File: tune_xgboost.py    Apache License 2.0 5 votes vote down vote up
def grid_search(params,grid,num):
    keys = set(grid.keys())
    l = [grid[x] for x in keys]
    perm = list(itertools.product(*l))
    jobs = []
    for i in perm:
        jobs.append({k:v for k,v in zip(keys,i)})

    print("Total number of jobs: {}".format(len(jobs)))
    column_step = []
    column_score = []
    column_jobs = []

    for i,job in enumerate(jobs):
        print("** Starting job: {}:{}/{}".format(num,i+1,len(jobs)))
        params2 = dict(params)
        update(params2,job)
        train.params = params2
        train.rounds = MAX_ROUNDS
        train.early_stop = EARLY_STOP
        result = train.run_cv()
        print("Result: {}".format(result))
        column_jobs.append(str(job))
        column_score.append(result[0])
        column_step.append(result[1])

    df = pd.DataFrame({'job':column_jobs,'step':column_step,'score':column_score},columns=['job','score','step'])
    df.sort_values(by=['score'],ascending=[True],inplace=True)
    print(df)
    path_tune = os.path.join(PATH, "tune-{}.csv".format(num))
    df.to_csv(path_tune, index=False)
    j = df.iloc[0]['job']
    j = demjson.decode(j)
    return j 
Example 52
Project: fb-crawler   Author: Ardenhong   File: fb_crawler.py    GNU General Public License v3.0 5 votes vote down vote up
def get_my_friends(page_source):
    '''從我的朋友頁面取得所有我的朋友名單
    Args:
        page_source: 網頁頁面

    '''
    soup = bs(page_source, 'html.parser')
    count = 0
    errorCount = 0

    for li in soup.findAll('li', class_="_698"):
        try:
            block = li.find('div', class_="fsl fwb fcb")

            pageName = block.text
            pageFBID = demjson.decode(block.a["data-gt"])["engagement"]["eng_tid"]
            print(pageName, pageFBID)
            count += 1

        except KeyError as e:
            print(sys.exc_info()[0], sys.exc_info()[1])
            errorCount += 1
            # raise e
        except UnicodeEncodeError:
            print("except UnicodeEncodeError")

    print("共{0}筆!\n Error數:{1}".format(count, errorCount)) 
Example 53
Project: cinder-driver   Author: Datera   File: dat_sreq.py    Apache License 2.0 5 votes vote down vote up
def gen_entries(logfiles, neg_filter, pos_filter):
    for log in sorted(logfiles):
        with io.open(log) as f:
            if is_gzip(f):
                f = gzip.open(log)
            buffer = ''
            load = False
            for line in f:
                line = line.rstrip()
                if line.endswith('}'):
                    buffer += line
                    load = True
                else:
                    buffer += line

                if load:
                    try:
                        data = json.loads(buffer)
                    except json.decoder.JSONDecodeError:
                        # This is needed because some JSON lines in the
                        # logfiles haven't been run through javascript's
                        # Stringify method.
                        data = demjson.decode(buffer)
                    load = False
                    buffer = ''
                    if neg_filters(data, neg_filter):
                        continue
                    if pos_filters(data, pos_filter):
                        yield data 
Example 54
Project: Siarobo   Author: siyanew   File: bot.py    MIT License 5 votes vote down vote up
def get_config():
    global config
    file = open(join(WD, "config.json"), "r")
    config = demjson.decode(file.read())
    file.close() 
Example 55
Project: Siarobo   Author: siyanew   File: soundcloud.py    MIT License 5 votes vote down vote up
def search(query):
    global guest_client_id

    search_url = 'https://api.soundcloud.com/search?q=%s&facet=model&limit=30&offset=0&linked_partitioning=1&client_id='+client_id

    url = search_url % query

    response = await get(url)
    r = demjson.decode(response)
    res = []
    for entity in r['collection']:
        if entity['kind'] == 'track':
            res.append([entity['title'], entity['permalink_url']])
    return res 
Example 56
Project: Siarobo   Author: siyanew   File: soundcloud.py    MIT License 5 votes vote down vote up
def getfile(url):
    response = await get(
        "https://api.soundcloud.com/resolve?url={}&client_id="+client_id.format(url))
    r = demjson.decode(response)
    return r['stream_url'] + "?client_id="+client_id 
Example 57
Project: akshare   Author: jindaxiang   File: zh_stock_ah_tx.py    MIT License 5 votes vote down vote up
def get_zh_stock_ah_page_count() -> int:
    hk_payload_copy = hk_payload.copy()
    hk_payload_copy.update({"reqPage": 1})
    res = requests.get(hk_url, params=hk_payload_copy, headers=hk_headers)
    data_json = demjson.decode(res.text[res.text.find("{"): res.text.rfind("}") + 1])
    page_count = data_json["data"]["page_count"]
    return page_count 
Example 58
Project: akshare   Author: jindaxiang   File: zh_stock_ah_tx.py    MIT License 5 votes vote down vote up
def stock_zh_ah_spot():
    big_df = pd.DataFrame()
    page_count = get_zh_stock_ah_page_count() + 1
    for i in range(1, page_count):
        hk_payload.update({"reqPage": i})
        res = requests.get(hk_url, params=hk_payload, headers=hk_headers)
        data_json = demjson.decode(res.text[res.text.find("{"): res.text.rfind("}") + 1])
        big_df = big_df.append(pd.DataFrame(data_json["data"]["page_data"]).iloc[:, 0].str.split("~", expand=True), ignore_index=True)
    big_df.columns = ["代码", "名称", "最新价", "涨跌幅", "涨跌额", "买入", "卖出", "成交量", "成交额", "今开", "昨收", "最高", "最低"]
    return big_df 
Example 59
Project: akshare   Author: jindaxiang   File: zh_stock_ah_tx.py    MIT License 5 votes vote down vote up
def stock_zh_ah_name():
    big_df = pd.DataFrame()
    page_count = get_zh_stock_ah_page_count() + 1
    for i in range(1, page_count):
        hk_payload.update({"reqPage": i})
        res = requests.get(hk_url, params=hk_payload, headers=hk_headers)
        data_json = demjson.decode(res.text[res.text.find("{"): res.text.rfind("}") + 1])
        big_df = big_df.append(pd.DataFrame(data_json["data"]["page_data"]).iloc[:, 0].str.split("~", expand=True), ignore_index=True)
    big_df.columns = ["代码", "名称", "最新价", "涨跌幅", "涨跌额", "买入", "卖出", "成交量", "成交额", "今开", "昨收", "最高", "最低"]
    code_name_dict = dict(zip(big_df["代码"], big_df["名称"]))
    return code_name_dict 
Example 60
Project: akshare   Author: jindaxiang   File: hf_futures_sina.py    MIT License 5 votes vote down vote up
def _get_real_name_list():
    """
    获取前端显示的名称列表
    ['NYBOT-棉花', 'LME镍3个月', 'LME铅3个月', 'LME锡3个月', 'LME锌3个月', 'LME铝3个月', 'LME铜3个月', 'CBOT-黄豆', 'CBOT-小麦', 'CBOT-玉米', 'CBOT-黄豆油', 'CBOT-黄豆粉', '日本橡胶', 'COMEX铜', 'NYMEX天然气', 'NYMEX原油', 'COMEX白银', 'COMEX黄金', 'CME-瘦肉猪', '布伦特原油', '伦敦金', '伦敦银', '伦敦铂金', '伦敦钯金']
    """
    url = "http://finance.sina.com.cn/money/future/hf.html"
    res = requests.get(url)
    res.encoding = "gb2312"
    dem_text = res.text[
        res.text.find("var oHF_1 = ") + 12 : res.text.find("var oHF_2") - 2
    ].replace("\n\t", "")
    json_data = demjson.decode(dem_text)
    name_list = [item[0].strip() for item in json_data.values()]
    return name_list 
Example 61
Project: CTPOrderService   Author: lllzzz   File: Service.py    Mozilla Public License 2.0 5 votes vote down vote up
def run(self):
        for msg in self.srv.listen():
            if msg['type'] == 'message':
                data = msg['data']
                if data == 'stop': # 从外部停止服务
                    self.stop()
                    continue
                dataObj = JSON.decode(data)
                self.callback(msg['channel'], dataObj) 
Example 62
Project: backtrader-cn   Author: pandalibin   File: sina.py    GNU General Public License v3.0 4 votes vote down vote up
def login(self, username, password):
        """
        # 新浪微博登录
        :param username: 微博手机号
        :param password: 微博密码
        :return:
        """
        if username == "" or password == "":
            raise StockMatchError("用户名或密码不能为空")
        post_data = {
            "entry": "finance",
            "gateway": "1",
            "from": None,
            "savestate": "30",
            "qrcode_flag": True,
            "useticket": "0",
            "pagerefer": "http://jiaoyi.sina.com.cn/jy/index.php",
            "vsnf": "1",
            "su": base64.b64encode(username.encode("utf-8")).decode("utf-8"),
            "service": "sso",
            "servertime": get_unix_timestamp(False),
            "nonce": "RA12UM",
            # "pwencode": "rsa2",  # 取消掉使用rsa2加密密码
            "sp": password,
            "sr": "1280*800",
            "encoding": "UTF-8",
            "cdult": "3",
            "domain": "sina.com.cn",
            "prelt": "56",
            "returntype": "TEXT",
        }
        session = requests.Session()
        session.headers.update(conf.SINA_CONFIG["request_headers"])
        res = session.post(conf.SINA_CONFIG["login_url"], data=post_data, params={
            "client": "ssologin.js(v1.4.19)",
            "_": get_unix_timestamp(),
        })
        res.encoding = "gb2312"
        info = json.loads(res.content)
        if info["retcode"] != "0":
            logger.error(info["reason"])
            raise LoginFailedError(info["reason"])
        logger.info("用户%s登录成功" % username)
        return session, info['uid'] 
Example 63
Project: simple-spiders   Author: duiliuliu   File: commons.py    GNU General Public License v3.0 4 votes vote down vote up
def html(self, encoding=None, **kwargs):
        '''
        将response解析为HtmlElement对象,可通过css选择器或者xpath语法获取数据

        如:
            >>> doc = response.html()
            >>> # 通过xpath获取a元素里的href
            >>> links = doc.xpath('//a/@href')
            >>> # 通过xpath获取span元素中的text
            >>> spans = doc.xpath('//span/text()')
            >>> # 更多用法,请自行查询css选择器与xpath语法进行使用

            常用方法:
            find, findall, findtext, get, getchildren, getiterator, getnext, getparent, getprevious, getroottree, index, insert, items, iter, iterancestors, iterchildren, iterdescendants, iterfind, itersiblings, itertext, keys, makeelement, remove, replace, values, xpath

            >>> .drop_tree():
            Drops the element and all its children. Unlike el.getparent().remove(el) this does not remove the tail text; with drop_tree the tail text is merged with the previous element.
            >>> .drop_tag():
            Drops the tag, but keeps its children and text.
            >>> .find_class(class_name):
            Returns a list of all the elements with the given CSS class name. Note that class names are space separated in HTML, so doc.find_class_name('highlight') will find an element like <div class="sidebar highlight">. Class names are case sensitive.
            >>> .find_rel_links(rel):
            Returns a list of all the <a rel="{rel}"> elements. E.g., doc.find_rel_links('tag') returns all the links marked as tags.
            >>> .get_element_by_id(id, default=None):
            Return the element with the given id, or the default if none is found. If there are multiple elements with the same id (which there shouldn't be, but there often is), this returns only the first.
            >>> .text_content():
            Returns the text content of the element, including the text content of its children, with no markup.
            >>> .cssselect(expr):
            Select elements from this element and its children, using a CSS selector expression. (Note that .xpath(expr) is also available as on all lxml elements.)
            >>> .label:
            Returns the corresponding <label> element for this element, if any exists (None if there is none). Label elements have a label.for_element attribute that points back to the element.
            >> .base_url:
            The base URL for this element, if one was saved from the parsing. This attribute is not settable. Is None when no base URL was saved.
            >>> .classes:
            Returns a set-like object that allows accessing and modifying the names in the 'class' attribute of the element. (New in lxml 3.5).
            >>> .set(key, value=None):
            Sets an HTML attribute. If no value is given, or if the value is None, it creates a boolean attribute like <form novalidate></form> or <div custom-attribute></div>. In XML, attributes must have at least the empty string as their value like <form novalidate=""></form>, but HTML boolean attributes can also be just present or absent from an element without having a value.
        '''
        if not self.encoding and self.content and len(self.content) > 3:
            if encoding is not None:
                try:
                    return html.fromstring(
                        self.content.decode(encoding), **kwargs
                    )
                except UnicodeDecodeError:
                    pass
        return html.fromstring(self.text, **kwargs) 
Example 64
Project: vxTrader   Author: vex1023   File: yjbTrader.py    MIT License 4 votes vote down vote up
def login(self):

        self.pre_login()

        login_params = {
            "function_id": 200,
            "login_type": "stock",
            "version": 200,
            "identity_type": "",
            "remember_me": "",
            "input_content": 1,
            "content_type": 0,
            "loginPasswordType": "B64",
            "disk_serial_id": self.disk_serial_id,
            "cpuid": self.cpuid,
            "machinecode": self.machinecode,
            "mac_addr": self.mac_address,
            "account_content": self._account,
            "password": urllib.parse.unquote(self._password),
            "validateCode": self.vcode
        }
        logger.debug('login_params is: %s' % login_params)

        r = self._session.post(
            'https://jy.yongjinbao.com.cn/winner_gj/gjzq/exchange.action',
            params=login_params)
        r.raise_for_status()

        logger.debug('Login respone: %s' % r.text)

        returnJson = r.json()['returnJson']
        data = demjson.decode(returnJson)
        error_msg = dict()
        if data['msg_no'] != '0':
            if 'msg_info' in data.keys() and data['msg_info'] != '':
                error_msg['error_info'] = data['msg_info']
            else:
                error_msg = data[data['error_grids']][1]

            if error_msg['error_info'].find('验证码') != -1:
                logger.warning('vcode error : %s' % error_msg['error_info'])
                raise VerifyCodeError(error_msg['error_info'])
            else:
                logger.error('login Failed :%s' % error_msg['error_info'])
                raise LoginFailedError(error_msg['error_info'])

        return 
Example 65
Project: electricitymap-contrib   Author: tmrowco   File: US_PJM.py    GNU General Public License v3.0 4 votes vote down vote up
def extract_data(session=None):
    """
    Makes a request to the PJM data url.
    Finds timestamp of current data and converts into a useful form.
    Finds generation data inside script tag.
    Returns a tuple of generation data and datetime.
    """

    s = session or requests.Session()
    req = requests.get(url)
    soup = BeautifulSoup(req.content, 'html.parser')

    try:
        time_div = soup.find("div", id="asOfDate").text
    except AttributeError:
        raise LookupError('No data is available for US-PJM.')

    time_pattern = re.compile(r"""(\d{1,2}     #Hour can be 1/2 digits.
                                   :           #Separator.
                                   \d{2})\s    #Minutes must be 2 digits with a space after.
                                   (a.m.|p.m.) #Either am or pm allowed.""", re.X)

    latest_time = re.search(time_pattern, time_div)

    time_data = latest_time.group(1).split(":")
    am_or_pm = latest_time.group(2)
    hour = int(time_data[0])
    minute = int(time_data[1])

    # Time format used by PJM is slightly unusual and needs to be converted so arrow can use it.
    if am_or_pm == "p.m." and hour != 12:
        # Time needs to be in 24hr format
        hour += 12
    elif am_or_pm == "a.m." and hour == 12:
        # Midnight is 12 a.m.
        hour = 0

    arr_dt = arrow.now('America/New_York').replace(hour=hour, minute=minute)
    future_check = arrow.now('America/New_York')

    if arr_dt > future_check:
        # Generation mix lags 1-2hrs behind present.
        # This check prevents data near midnight being given the wrong date.
        arr_dt.shift(days=-1)

    dt = arr_dt.floor('minute').datetime

    generation_mix_div = soup.find("div", id="rtschartallfuelspjmGenFuelM_container")
    generation_mix_script = generation_mix_div.next_sibling

    pattern = r'series: \[(.*)\]'
    script_data = re.search(pattern, str(generation_mix_script)).group(1)

    # demjson is required because script data is javascript not valid json.
    raw_data = demjson.decode(script_data)
    data = raw_data["data"]

    return data, dt 
Example 66
Project: electricitymap-contrib   Author: tmrowco   File: US_PJM.py    GNU General Public License v3.0 4 votes vote down vote up
def get_exchange_data(interface, session=None):
    """
    This function can fetch 5min data for any PJM interface in the current day.
    Extracts load and timestamp data from html source then joins them together.
    Returns a list of tuples.
    """

    base_url = 'http://www.pjm.com/Charts/InterfaceChart.aspx?open='
    url = base_url + exchange_mapping[interface]

    s = session or requests.Session()
    req = s.get(url)
    soup = BeautifulSoup(req.content, 'html.parser')

    scripts = soup.find("script", {"type": "text/javascript",
                                   "src": "/assets/js/Highcharts/HighCharts/highcharts.js"})

    exchange_script = scripts.find_next_sibling("script")

    load_pattern = r'var load = (\[(.*)\])'
    load = re.search(load_pattern, str(exchange_script)).group(1)
    load_vals = demjson.decode(load)[0]

    # Occasionally load_vals contains a null at the end of the list which must be caught.
    actual_load = [float(val) for val in load_vals if val is not None]

    time_pattern = r'var timeArray = (\[(.*)\])'
    time_array = re.search(time_pattern, str(exchange_script)).group(1)
    time_vals = demjson.decode(time_array)

    flows = zip(actual_load, time_vals)

    arr_date = arrow.now('America/New_York').floor('day')

    converted_flows = []
    for flow in flows:
        arr_time = arrow.get(flow[1], 'h:mm A')
        arr_dt = arr_date.replace(hour=arr_time.hour, minute=arr_time.minute).datetime
        converted_flow = (flow[0], arr_dt)
        converted_flows.append(converted_flow)

    return converted_flows 
Example 67
Project: akshare   Author: jindaxiang   File: zh_stock_a_sina.py    MIT License 4 votes vote down vote up
def stock_zh_a_spot():
    """
    从新浪财经-A股获取所有A股的实时行情数据, 大量抓取容易封IP
    http://vip.stock.finance.sina.com.cn/mkt/#qbgg_hk
    :return: pandas.DataFrame
                symbol    code  name   trade pricechange changepercent     buy  \
    0     sh600000  600000  浦发银行  12.920      -0.030        -0.232  12.920
    1     sh600004  600004  白云机场  18.110      -0.370        -2.002  18.110
    2     sh600006  600006  东风汽车   4.410      -0.030        -0.676   4.410
    3     sh600007  600007  中国国贸  17.240      -0.360        -2.045  17.240
    4     sh600008  600008  首创股份   3.320      -0.030        -0.896   3.310
            ...     ...   ...     ...         ...           ...     ...
    3755  sh600096  600096   云天化   5.270      -0.220        -4.007   5.270
    3756  sh600097  600097  开创国际  10.180      -0.120        -1.165  10.180
    3757  sh600098  600098  广州发展   6.550      -0.040        -0.607   6.540
    3758  sh600099  600099  林海股份   6.540      -0.150        -2.242   6.540
    3759  sh600100  600100  同方股份   8.200      -0.100        -1.205   8.200
            sell settlement    open    high     low    volume     amount  \
    0     12.930     12.950  12.950  13.100  12.860  46023920  597016896
    1     18.120     18.480  18.510  18.510  17.880  24175071  437419344
    2      4.420      4.440   4.490   4.490   4.410   4304900   19130233
    3     17.280     17.600  17.670  17.670  17.220    684801   11879731
    4      3.320      3.350   3.360   3.360   3.300   8284294   27579688
          ...        ...     ...     ...     ...       ...        ...
    3755   5.280      5.490   5.490   5.500   5.220  16964636   90595172
    3756  10.190     10.300  10.220  10.340  10.090   1001676   10231669
    3757   6.550      6.590   6.560   6.620   6.500   1996449   13098901
    3758   6.580      6.690   6.650   6.680   6.530   1866180   12314997
    3759   8.210      8.300   8.300   8.310   8.120  12087236   99281447
          ticktime      per     pb        mktcap           nmc  turnoverratio
    0     15:00:00    6.984  0.790  3.792289e+07  3.631006e+07        0.16376
    1     15:00:07   32.927  2.365  3.747539e+06  3.747539e+06        1.16826
    2     15:00:02   15.926  1.207  8.820000e+05  8.820000e+05        0.21525
    3     15:00:02   22.390  2.367  1.736555e+06  1.736555e+06        0.06798
    4     15:00:07   22.912  1.730  1.887569e+06  1.600444e+06        0.17185
            ...      ...    ...           ...           ...            ...
    3755  15:00:00   56.728  1.566  7.523847e+05  6.963668e+05        1.28386
    3756  15:00:00   17.552  1.434  2.452734e+05  2.303459e+05        0.44268
    3757  15:00:00   25.476  1.059  1.785659e+06  1.785659e+06        0.07323
    3758  15:00:00  540.496  3.023  1.433045e+05  1.433045e+05        0.85167
    3759  15:00:07   -6.264  1.465  2.430397e+06  2.430397e+06        0.40782
    """
    big_df = pd.DataFrame()
    page_count = get_zh_a_page_count()
    zh_sina_stock_payload_copy = zh_sina_a_stock_payload.copy()
    for page in range(1, page_count+1):
        print(page)
        zh_sina_stock_payload_copy.update({"page": page})
        res = requests.get(
            zh_sina_a_stock_url,
            params=zh_sina_stock_payload_copy)
        data_json = demjson.decode(res.text)
        big_df = big_df.append(pd.DataFrame(data_json), ignore_index=True)
    return big_df 
Example 68
Project: akshare   Author: jindaxiang   File: zh_stock_kcb_sina.py    MIT License 4 votes vote down vote up
def stock_zh_kcb_daily(symbol="sh688008", factor=""):
    """
    从新浪财经-A股获取某个股票的历史行情数据, 大量抓取容易封IP
    :param symbol: str e.g., sh600000
    :param factor: str 默认为空, 不复权; qfq, 前复权因子; hfq, 后复权因子;
    :return: pandas.DataFrame
    不复权数据
                日期     开盘价     最高价     最低价     收盘价        成交    盘后量      盘后额
    0   2019-07-22  91.300  97.200  66.300  74.920  58330685  40778  3055088
    1   2019-07-23  70.020  78.880  70.000  74.130  23906020  43909  3254974
    2   2019-07-24  74.130  76.550  72.500  75.880  21608530  23149  1756546
    3   2019-07-25  75.000  79.980  74.600  78.000  24626920  66921  5219838
    4   2019-07-26  76.780  76.780  70.300  71.680  16831530  49106  3519918
    ..         ...     ...     ...     ...     ...       ...    ...      ...
    67  2019-10-31  59.790  60.500  57.800  58.290   2886407   3846   224183
    68  2019-11-01  57.900  59.960  57.600  59.250   2246059      0        0
    69  2019-11-04  60.040  61.880  60.040  61.740   3945106   1782   110021
    70  2019-11-05  61.100  62.780  60.850  62.160   4187105    400    24864
    71  2019-11-06  62.320  62.620  60.900  61.130   2331354   1300    79469

    后复权因子
             date          hfq_factor
    0  2019-07-22  1.0000000000000000
    1  1900-01-01  1.0000000000000000

    前复权因子
                 date          qfq_factor
    0  2019-07-22  1.0000000000000000
    1  1900-01-01  1.0000000000000000
    """
    res = requests.get(zh_sina_kcb_stock_hist_url.format(symbol, datetime.datetime.now().strftime("%Y_%m_%d"), symbol))
    data_json = demjson.decode(res.text[res.text.find("["):res.text.rfind("]")+1])
    data_df = pd.DataFrame(data_json)
    data_df.columns = ["日期", "开盘价", "最高价", "最低价", "收盘价", "成交", "盘后量", "盘后额"]
    if not factor:
        return data_df
    if factor == "hfq":
        res = requests.get(zh_sina_kcb_stock_hfq_url.format(symbol))
        hfq_factor_df = pd.DataFrame(
            eval(res.text.split("=")[1].split("\n")[0])['data'])
        hfq_factor_df.columns = ["date", "hfq_factor"]
        return hfq_factor_df
    if factor == "qfq":
        res = requests.get(zh_sina_kcb_stock_qfq_url.format(symbol))
        qfq_factor_df = pd.DataFrame(
            eval(res.text.split("=")[1].split("\n")[0])['data'])
        qfq_factor_df.columns = ["date", "qfq_factor"]
        return qfq_factor_df 
Example 69
Project: akshare   Author: jindaxiang   File: zh_stock_zrbg_hx.py    MIT License 4 votes vote down vote up
def stock_zh_a_scr_report(report_year=2018, page=1):
    """
    获取和讯财经-上市公司社会责任报告数据, 从2010年至今(年度)
    因为股票数量大, 所以获取某年需要遍历所有页
    :param report_year: int 年份
    :param page: int 具体某页
    :return: pandas.DataFrame
            股票名称   股东责任    总得分 等级  员工责任  环境责任   社会责任 供应商、客户和消费者权益责任
    0    陆家嘴(600663)  23.97  42.97  C  4.00  0.00  15.00           0.00
    1   世荣兆业(002016)  24.61  42.44  C  2.83  0.00  15.00           0.00
    2    万科A(000002)  23.18  42.18  C  4.00  0.00  15.00           0.00
    3   华夏幸福(600340)  22.76  41.76  C  4.00  0.00  15.00           0.00
    4   万业企业(600641)  22.03  41.03  C  4.00  0.00  15.00           0.00
    5   华联控股(000036)  24.96  40.98  C  1.02  0.00  15.00           0.00
    6   中国国贸(600007)  22.43  40.98  C  3.55  0.00  15.00           0.00
    7    新黄浦(600638)  22.25  40.92  C  3.67  0.00  15.00           0.00
    8   金科股份(000656)  22.47  40.62  C  4.00  0.00  14.15           0.00
    9   中华企业(600675)  21.44  40.44  C  4.00  0.00  15.00           0.00
    10  京投发展(600683)  21.39  40.39  C  4.00  0.00  15.00           0.00
    11  浦东金桥(600639)  21.32  40.32  C  4.00  0.00  15.00           0.00
    12  中航善达(000043)  22.80  40.06  C  2.26  0.00  15.00           0.00
    13   新华联(000620)  21.00  40.00  D  4.00  0.00  15.00           0.00
    14  首开股份(600376)  20.77  39.77  D  4.00  0.00  15.00           0.00
    15  深物业A(000011)  23.49  39.73  D  1.50  0.00  14.74           0.00
    16  江苏租赁(600901)  20.62  39.62  D  4.00  0.00  15.00           0.00
    17  中国太保(601601)  20.61  39.61  D  4.00  0.00  15.00           0.00
    18  中国平安(601318)  20.59  39.59  D  4.00  0.00  15.00           0.00
    19  深深房A(000029)  22.45  39.47  D  2.02  0.00  15.00           0.00
    """
    hx_params_copy = hx_params.copy()
    hx_params_copy.update({"date": "{}-12-31".format(str(report_year))})
    hx_params_copy.update({"page": page})
    res = requests.get(hx_url, headers=hx_headers, params=hx_params_copy)
    temp_df = res.text[res.text.find("(") + 1:res.text.rfind(")")]
    py_obj = demjson.decode(temp_df)
    industry = [item["industry"] for item in py_obj["list"]]
    stock_number = [item["stockNumber"] for item in py_obj["list"]]
    industry_rate = [item["industryrate"] for item in py_obj["list"]]
    price_limit = [item["Pricelimit"] for item in py_obj["list"]]
    looting_chips = [item["lootingchips"] for item in py_obj["list"]]
    r_scramble = [item["rscramble"] for item in py_obj["list"]]
    strong_stock = [item["Strongstock"] for item in py_obj["list"]]
    s_cramble = [item["Scramble"] for item in py_obj["list"]]
    return pd.DataFrame([industry, stock_number, industry_rate, price_limit, looting_chips, r_scramble, strong_stock, s_cramble],
                        index=["股票名称", "股东责任", "总得分", "等级", "员工责任", "环境责任", "社会责任", "供应商、客户和消费者权益责任"]).T 
Example 70
Project: akshare   Author: jindaxiang   File: zh_stock_index_sina.py    MIT License 4 votes vote down vote up
def stock_zh_index_spot():
    """
    从新浪财经-指数获取所有指数的实时行情数据, 大量抓取容易封IP
    http://vip.stock.finance.sina.com.cn/mkt/#hs_s
    :return: pandas.DataFrame
           symbol   name      trade pricechange changepercent buy sell settlement  \
    0    sh000001   上证指数  2891.3431     -18.527        -0.637   0    0  2909.8697
    1    sh000002   A股指数  3029.2626     -19.372        -0.635   0    0  3048.6349
    2    sh000003   B股指数   255.2202      -2.955        -1.145   0    0   258.1752
    3    sh000004   工业指数  2265.4238     -19.259        -0.843   0    0  2284.6832
    4    sh000005   商业指数  2625.3647     -31.024        -1.168   0    0  2656.3890
    ..        ...    ...        ...         ...           ...  ..  ...        ...
    635  sh000079   公用等权  1855.8680     -26.981        -1.433   0    0  1882.8489
    636  sh000090   上证流通  1085.5027      -7.803        -0.714   0    0  1093.3057
    637  sh000091   沪财中小  7327.8375     -95.106        -1.281   0    0  7422.9430
    638  sh000092   资源50  2068.8152     -21.561        -1.031   0    0  2090.3766
    639  sh000093  180分层  9200.0115     -89.053        -0.959   0    0  9289.0642
              open       high        low     volume        amount    code  \
    0    2911.3500  2917.8293  2891.2043  135519463  152576392736  000001
    1    3050.1878  3057.0351  3029.1168  135417123  152529532272  000002
    2     258.2375   258.2375   255.2202     102341      46860464  000003
    3    2284.4955  2288.8948  2265.4238   81989859  100573430149  000004
    4    2654.1212  2661.1885  2624.7153    9706663   10959125999  000005
    ..         ...        ...        ...        ...           ...     ...
    635  1880.9163  1881.3908  1855.6935    3771090    2602021459  000079
    636  1093.7894  1095.7378  1085.5027  132459529  141897444366  000090
    637  7416.8197  7417.4444  7326.1919   14811864   17551488362  000091
    638  2089.1099  2089.8730  2068.8152    9466656    8101366245  000092
    639  9287.6393  9302.3461  9200.0115   49017964   59840452615  000093
         ticktime
    0    15:02:03
    1    15:02:03
    2    15:02:03
    3    15:02:03
    4    15:02:03
    ..        ...
    635  15:02:03
    636  15:02:03
    637  15:02:03
    638  15:02:03
    639  15:02:03
    """
    big_df = pd.DataFrame()
    page_count = get_zh_index_page_count()
    zh_sina_stock_payload_copy = zh_sina_index_stock_payload.copy()
    for page in range(1, page_count+1):
        print(page)
        zh_sina_stock_payload_copy.update({"page": page})
        res = requests.get(
            zh_sina_index_stock_url,
            params=zh_sina_stock_payload_copy)
        data_json = demjson.decode(res.text)
        big_df = big_df.append(pd.DataFrame(data_json), ignore_index=True)
    return big_df