import requests, time, json, demjson from datetime import datetime import pandas as pd from urllib.request import Request, urlopen from html.parser import unescape def get_price_data(query): r = requests.get("https://finance.google.com/finance/getprices", params=query) lines = r.text.splitlines() data = [] index = [] basetime = 0 for price in lines: cols = price.split(",") if cols[0][0] == 'a': basetime = int(cols[0][1:]) index.append(datetime.fromtimestamp(basetime)) data.append([float(cols[4]), float(cols[2]), float(cols[3]), float(cols[1]), int(cols[5])]) elif cols[0][0].isdigit(): date = basetime + (int(cols[0])*int(query['i'])) index.append(datetime.fromtimestamp(date)) data.append([float(cols[4]), float(cols[2]), float(cols[3]), float(cols[1]), int(cols[5])]) return pd.DataFrame(data, index = index, columns = ['Open', 'High', 'Low', 'Close', 'Volume']) def buildNewsUrl(symbol, qs='&start=0&num=5'): return 'http://www.google.com/finance/company_news?output=json&q=' \ + symbol + qs def request(symbols): url = buildUrl(symbols) req = Request(url) resp = urlopen(req) # remove special symbols such as the pound symbol content = resp.read().decode('ascii', 'ignore').strip() content = content[3:] return content def getNews(symbol): url = buildNewsUrl(symbol) content = urlopen(url).read().decode('utf-8') content_json = demjson.decode(content) article_json = [] news_json = content_json['clusters'] for cluster in news_json: for article in cluster: if article == 'a': article_json.extend(cluster[article]) return [[unescape(art['t']).strip(), art['u']] for art in article_json]