#! /usr/bin/env python # -*- coding:utf-8 -*- """ @author : MG @Time : 2018/8/27 9:48 @File : coin.py @contact : mmmaaaggg@163.com @desc : v1 接口将于2018年12月关闭,此后将只能试用pro接口 """ import datetime from cryptocmd import CmcScraper import pandas as pd from tasks import app import requests from cryptocmd.utils import InvalidCoinCode, get_url_data, extract_data, download_coin_data from sqlalchemy.types import String, Date, Integer from sqlalchemy.dialects.mysql import DOUBLE, DATETIME from tasks.backend import engine_md from ibats_utils.db import with_db_session, alter_table_2_myisam, bunch_insert_on_duplicate_update, execute_sql from ibats_utils.mess import str_2_date, date_2_str, str_2_datetime from tasks.config import config import logging DEBUG = False logger = logging.getLogger() DATE_FORMAT_STR_CMC = '%d-%m-%Y' DATE_FORMAT_STR = '%Y-%m-%d' DATETIME_FORMAT_STR = '%Y-%m-%dT%H:%M:%S.%fZ' def get_coin_ids(coin_code): """ This method fetches the name(id) of currency from the given code :param coin_code: coin code of a cryptocurrency e.g. btc :return: coin-id for the a cryptocurrency on the coinmarketcap.com """ ids = [] try: url = 'https://api.coinmarketcap.com/v1/ticker/?limit=0' json_resp = get_url_data(url).json() coin_code = coin_code.upper() for coin in json_resp: if coin['symbol'] == coin_code: ids.append(coin['id']) if len(ids) == 0: raise InvalidCoinCode('This coin code is unavailable on "coinmarketcap.com"') except Exception as e: raise e return ids def download_coin_data_by_id(coin_id, start_date, end_date): """ Download HTML price history for the specified cryptocurrency and time range from CoinMarketCap. :param coin_id: coin_id of a cryptocurrency e.g. btc :param start_date: date since when to scrape data (in the format of dd-mm-yyyy) :param end_date: date to which scrape the data (in the format of dd-mm-yyyy) :return: returns html of the webpage having historical data of cryptocurrency for certain duration """ if start_date is None: # default start date on coinmarketcap.com start_date = '28-4-2013' if end_date is None: yesterday = datetime.date.today() - datetime.timedelta(1) end_date = yesterday.strftime('%d-%m-%Y') # coin_id = get_coin_id(coin_code) # Format the dates as required for the url. start_date = datetime.datetime.strptime(start_date, '%d-%m-%Y').strftime('%Y%m%d') end_date = datetime.datetime.strptime(end_date, '%d-%m-%Y').strftime('%Y%m%d') url = 'https://coinmarketcap.com/currencies/{0}/historical-data/?start={1}&end={2}'.format(coin_id, start_date, end_date) try: html = get_url_data(url).text return html except Exception as e: print("Error fetching price data for {} for interval '{}' and '{}'", coin_id, start_date, end_date) if hasattr(e, 'message'): print('Error message (download_data) :', e.message) else: print('Error message (download_data) :', e) class CmcScraperV1(CmcScraper): """扩展原有类,支持根据id进行数据下载""" def __init__(self, coin_code, coin_id=None, start_date=None, end_date=None, all_time=False): """ :param coin_code: coin code of cryptocurrency e.g. btc :param coin_id: default None, somtimes has more than one coin, like: ACC has three coins: adcoin, accelerator-network, acchain, with different ids :param start_date: date since when to scrape data (in the format of dd-mm-yyyy) :param end_date: date to which scrape the data (in the format of dd-mm-yyyy) :param all_time: 'True' if need data of all time for respective cryptocurrency """ CmcScraper.__init__(self, coin_code, start_date, end_date, all_time) self.coin_id = coin_id def _download_data(self, **kwargs): """ This method downloads the data. :param forced: (optional) if ``True``, data will be re-downloaded. :return: """ forced = kwargs.get('forced') if self.headers and self.rows and not forced: return if self.all_time: self.start_date, self.end_date = None, None if self.coin_id is None: table = download_coin_data(self.coin_code, self.start_date, self.end_date) else: table = download_coin_data_by_id(self.coin_id, self.start_date, self.end_date) # self.headers, self.rows, self.start_date, self.end_date = extract_data(table) self.end_date, self.start_date, self.headers, self.rows = extract_data(table) @app.task def import_coin_info(chain_param=None, ): """插入基础信息数据到 cmc_coin_v1_info""" table_name = "cmc_coin_v1_info" logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # url = 'https://api.coinmarketcap.com/v2/listings/' # dtype = { # 'id': String(60), # 'name': String(60), # 'symbol': String(20), # 'website_slug': String(60), # } url = 'https://api.coinmarketcap.com/v1/ticker/?limit=0' dtype = { 'id': String(60), 'name': String(60), 'symbol': String(20), 'rank': Integer, 'price_usd': DOUBLE, 'price_btc': DOUBLE, '24h_volume_usd': DOUBLE, 'market_cap_usd': DOUBLE, 'available_supply': DOUBLE, 'total_supply': DOUBLE, 'max_supply': DOUBLE, 'percent_change_1h': DOUBLE, 'percent_change_24h': DOUBLE, 'percent_change_7d': DOUBLE, 'last_updated': DATETIME, } rsp = requests.get(url) if rsp.status_code != 200: raise ValueError('请求 listings 相应失败') json = rsp.json() data_df = pd.DataFrame(json) data_df['last_updated'] = data_df['last_updated'].apply( lambda x: None if x is None else datetime.datetime.fromtimestamp(float(x))) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST , ADD PRIMARY KEY (`id`)""".format(table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) # 更新 code_mapping 表 # update_from_info_table(table_name) def rename_by_dic(name, names): """模糊匹配名称,如果找到,则重命名,否则保留原名""" name = name.lower() for candidate in names: if name.find(candidate) != -1: return candidate return name @app.task def import_coin_daily(chain_param=None, id_set=None, begin_time=None): """插入历史数据到 cmc_coin_v1_daily 试用 v1 接口,该接口可能在2018年12月底到期""" table_name = "cmc_coin_v1_daily" info_table_name = "cmc_coin_v1_info" logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if has_table: sql_str = """ SELECT id, symbol, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.id, symbol, ifnull(trade_date,date('2013-04-28')) date_frm, null delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info LEFT OUTER JOIN (SELECT id, adddate(max(date),1) trade_date FROM {table_name} GROUP BY id) daily ON info.id = daily.id ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY id""".format(table_name=table_name, info_table_name=info_table_name) else: logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) sql_str = """ SELECT id, symbol, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT id, symbol, null date_frm, null delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info ) tt ORDER BY id""".format(info_table_name=info_table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 stock_date_dic = { (coin_id, symbol): (date_from if begin_time is None else min([date_from, begin_time]), date_to) for coin_id, symbol, date_from, date_to in table.fetchall() if id_set is None or coin_id in id_set} # 设置 dtype dtype = { 'id': String(60), 'date': Date, 'open': DOUBLE, 'high': DOUBLE, 'low': DOUBLE, 'close': DOUBLE, 'volume': DOUBLE, 'market_cap': DOUBLE, } col_names = dtype.keys() data_df_list = [] dic_count = len(stock_date_dic) data_count = 0 # 获取接口数据 logger.info('%d coins will been import into %s', dic_count, table_name) try: for data_num, ((coin_id, symbol), (date_from, date_to)) in enumerate(stock_date_dic.items(), start=1): logger.debug('%d/%d) %s[%s] [%s - %s]', data_num, dic_count, coin_id, symbol, date_from, date_to) date_from_str = None try: if date_from is None: scraper = CmcScraperV1(symbol, coin_id) else: date_from_str = date_2_str(str_2_date(date_from, DATE_FORMAT_STR), DATE_FORMAT_STR_CMC) scraper = CmcScraperV1(symbol, coin_id, start_date=date_from_str) data_df = scraper.get_dataframe() except Exception as exp: logger.exception("scraper('%s', '%s', start_date='%s')", symbol, coin_id, date_from_str) continue if data_df is None or data_df.shape[0] == 0: logger.warning('%d/%d) %s has no data during %s %s', data_num, dic_count, coin_id, date_from, date_to) continue data_df.rename(columns={col_name: rename_by_dic(col_name, col_names) for col_name in data_df.columns}, inplace=True) data_df.rename(columns={'market cap': 'market_cap'}, inplace=True) data_df['market_cap'] = data_df['market_cap'].apply(lambda x: 0 if isinstance(x, str) else x) data_df['volume'] = data_df['volume'].apply(lambda x: 0 if isinstance(x, str) else x) logger.info('%d/%d) %d data of %s between %s and %s', data_num, dic_count, data_df.shape[0], coin_id, data_df['date'].min(), data_df['date'].max()) data_df['id'] = coin_id data_df_list.append(data_df) data_count += data_df.shape[0] # 仅供调试使用 if DEBUG and len(data_df_list) > 10: break if data_count > 10000: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logging.info("%s %d 条信息被更新", table_name, data_count) data_df_list, data_count = [], 0 finally: # 导入数据库 创建 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST , CHANGE COLUMN `date` `date` DATE NOT NULL AFTER `id`, ADD PRIMARY KEY (`id`, `date`)""".format(table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) @app.task def import_coin_latest(chain_param=None, ): """插入最新价格数据到 cmc_coin_pro_latest """ table_name = 'cmc_coin_pro_latest' has_table = engine_md.has_table(table_name) # 设置 dtype dtype = { 'id': Integer, 'name': String(60), 'slug': String(60), 'symbol': String(20), 'date_added': DATETIME, 'last_updated': DATETIME, 'market_cap': DOUBLE, 'circulating_supply': DOUBLE, 'max_supply': DOUBLE, 'num_market_pairs': DOUBLE, 'percent_change_1h': DOUBLE, 'percent_change_24h': DOUBLE, 'percent_change_7d': DOUBLE, 'price': DOUBLE, 'total_supply': DOUBLE, 'volume_24h': DOUBLE, 'cmc_rank': DOUBLE, } header = { 'Content-Type': 'application/json', 'X-CMC_PRO_API_KEY': config.CMC_PRO_API_KEY } params = { # 'CMC_PRO_API_KEY': config.CMC_PRO_API_KEY, 'limit': 5000, 'start': 1 } # https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest?sort=market_cap&start=0&limit=10&cryptocurrency_type=tokens&convert=USD,BTC url = "https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest" rsp = requests.get(url=url, params=params, headers=header) if rsp.status_code != 200: logger.error('获取数据异常[%d] %s', rsp.status_code, rsp.content) return ret_dic = rsp.json() data_list = ret_dic['data'] data_dic_list = [] for dic in data_list: data_dic = {} for key, val in dic.items(): if key == 'quote': for sub_key, sub_val in val['USD'].items(): data_dic[sub_key] = sub_val else: data_dic[key] = val data_dic_list.append(data_dic) data_df = pd.DataFrame(data_dic_list) # 数据整理 data_df['date_added'] = data_df['date_added'].apply(lambda x: str_2_datetime(x, DATETIME_FORMAT_STR)) data_df['last_updated'] = data_df['last_updated'].apply(lambda x: str_2_datetime(x, DATETIME_FORMAT_STR)) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST , CHANGE COLUMN `last_updated` `last_updated` DATETIME NOT NULL AFTER `id`, ADD PRIMARY KEY (`id`, `last_updated`)""".format(table_name=table_name) execute_sql(create_pk_str, engine_md) @app.task def merge_latest(chain_param=None, ): """ 将 cmc_coin_v1_daily 历史数据 以及 cmc_coin_pro_latest 最新价格数据 合并到 cmc_coin_merged_latest :return: """ table_name = 'cmc_coin_merged_latest' logger.info("开始合并数据到 %s 表", table_name) has_table = engine_md.has_table(table_name) create_sql_str = """CREATE TABLE {table_name} ( `id` VARCHAR(60) NOT NULL, `date` DATE NOT NULL, `datetime` DATETIME NULL, `name` VARCHAR(60) NULL, `symbol` VARCHAR(20) NULL, `close` DOUBLE NULL, `volume` DOUBLE NULL, `market_cap` DOUBLE NULL, PRIMARY KEY (`id`, `date`)) ENGINE = MyISAM""".format(table_name=table_name) with with_db_session(engine_md) as session: if not has_table: session.execute(create_sql_str) logger.info("创建 %s 表", table_name) session.execute('truncate table {table_name}'.format(table_name=table_name)) insert_sql_str = """INSERT INTO `{table_name}` (`id`, `date`, `datetime`, `name`, `symbol`, `close`, `volume`, `market_cap`) select daily.id, `date`, `date`, `name`, `symbol`, `close`, `volume`, `market_cap` from cmc_coin_v1_daily daily left join cmc_coin_v1_info info on daily.id = info.id""".format(table_name=table_name) session.execute(insert_sql_str) session.commit() insert_latest_sql_str = """INSERT INTO `{table_name}` (`id`, `date`, `datetime`, `name`, `symbol`, `close`, `volume`, `market_cap`) select info.id, date(latest.last_updated), latest.last_updated, latest.name, latest.symbol, price, volume_24h, market_cap from cmc_coin_pro_latest latest left join ( select latest.name, latest.symbol, max(latest.last_updated) last_updated from cmc_coin_pro_latest latest group by latest.name, latest.symbol ) g on latest.name = g.name and latest.symbol = g.symbol and latest.last_updated = g.last_updated left outer join cmc_coin_v1_info info on latest.name = info.name and latest.symbol = info.symbol on duplicate key update `datetime`=values(`datetime`), `name`=values(`name`), `symbol`=values(`symbol`), `close`=values(`close`), `volume`=values(`volume`), `market_cap`=values(`market_cap`)""".format(table_name=table_name) session.execute(insert_latest_sql_str) session.commit() data_count = session.execute("select count(*) from {table_name}".format(table_name=table_name)).scalar() logger.info("%d 条记录插入到 %s", data_count, table_name) if __name__ == "__main__": DEBUG = True # import_coin_info() import_coin_daily() # import_coin_latest() # merge_latest()