# -*- coding: utf-8 -*- # author:zhengk import requests import json import re import jieba.posseg as pseg from wordcloud import WordCloud, ImageColorGenerator import matplotlib.pyplot as plt import numpy as np import PIL.Image as Image import os def get_news_hot(loop=5): max_behot_time = 0 headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'} with open('hot_news.txt', 'w', encoding='utf-8') as f: for x in range(loop): url = "https://www.toutiao.com/api/pc/feed/?category=news_hot&utm_source=toutiao" \ "&widen=1&max_behot_time=" + str(max_behot_time) + "&max_behot_time_tmp=" + str(max_behot_time) res = requests.get(url, headers=headers) if res.status_code == 200: data = json.loads(res.text) for news in data['data']: f.write(news['title']) max_behot_time = data['next']['max_behot_time'] def extract_words(): with open('hot_news.txt', 'r', encoding='utf-8') as f: news_subjects = f.readlines() stop_words = set(line.strip() for line in open('stopwords.txt', encoding='utf-8')) news_list = [] for subject in news_subjects: if subject.isspace(): continue p = re.compile("n[a-z0-9]{0,2}") word_list = pseg.cut(subject) for word, flag in word_list: if not word in stop_words and p.search(flag) != None: news_list.append(word) content = {} for item in news_list: content[item] = content.get(item, 0) + 1 return content def draw_word_cloud(content): d = os.path.dirname(__file__) img = Image.open(os.path.join(d, "toutiao.jpg")) width = img.width / 80 height = img.height / 80 alice_coloring = np.array(img) my_wordcloud = WordCloud(background_color="white", max_words=500, mask=alice_coloring, max_font_size=200, random_state=42, font_path=(os.path.join(d, "../common/font/PingFang.ttc"))) my_wordcloud = my_wordcloud.generate_from_frequencies(content) image_colors = ImageColorGenerator(alice_coloring) plt.figure(figsize=(width, height)) plt.imshow(my_wordcloud.recolor(color_func=image_colors)) plt.imshow(my_wordcloud) plt.axis("off") # 通过设置subplots_adjust来控制画面外边框 plt.subplots_adjust(bottom=.01, top=.99, left=.01, right=.99) plt.savefig("toutiao_wordcloud.png") plt.show() if __name__ == '__main__': get_news_hot(5) content = extract_words() draw_word_cloud(content)