#!/usr/bin/python
# -*- coding: UTF-8 -*-
import jieba
import re
from scipy.misc import imread # 这是一个处理图像的函数
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt
def stop_words(texts):
words_list = []
jieba.add_word('测试')
word_generator = jieba.cut(texts, cut_all=False) # 返回的是一个迭代器
with open('./stopwords.txt') as f:
str_text = f.read()
unicode_text = str(str_text) # 把str格式转成unicode格式
f.close() # stopwords文本中词的格式是'一词一行'
for word in word_generator:
if word.strip() not in unicode_text:
words_list.append(word)
return ' '.join(words_list) # 注意是空格
def start(text):
# back_color = imread('bg.jpg')
wc = WordCloud(
background_color='white', # 背景颜色
max_words=1000, # 最大词数
# mask=back_color,
max_font_size=200, # 显示字体的最大值
font_path="PingFang.ttc",
width=1920, # 图片的宽
height=1080 #图片的长
)
# 正则取出中文
text = ''.join(re.findall(r'[\u4e00-\u9fa5]',text))
text = stop_words(text)
wc.generate(text)
# 绘制词云
plt.figure()
wc.recolor(random_state=None, color_func=None, colormap=None)
plt.axis('off')
# 保存图片
wc.to_file('result.png')
if __name__ == '__main__':
start("这是一段很长的文本")