Python3使用jieba分词并生成WordCloud词云图

#!/usr/bin/python
# -*- coding: UTF-8 -*-
import jieba
import re
from scipy.misc import imread  # 这是一个处理图像的函数
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt

def stop_words(texts):
    words_list = []
    jieba.add_word('测试')
    word_generator = jieba.cut(texts, cut_all=False)  # 返回的是一个迭代器
    with open('./stopwords.txt') as f:
        str_text = f.read()
        unicode_text = str(str_text)  # 把str格式转成unicode格式
        f.close()  # stopwords文本中词的格式是'一词一行'
    for word in word_generator:
        if word.strip() not in unicode_text:
            words_list.append(word)
    return ' '.join(words_list)  # 注意是空格


def start(text):
    # back_color = imread('bg.jpg') 
    wc = WordCloud(
        background_color='white',  # 背景颜色
        max_words=1000,  # 最大词数
        # mask=back_color,
        max_font_size=200,  # 显示字体的最大值
        font_path="PingFang.ttc",
        width=1920,  # 图片的宽
        height=1080  #图片的长
    )
    # 正则取出中文
    text = ''.join(re.findall(r'[\u4e00-\u9fa5]',text))
    text = stop_words(text)
    wc.generate(text)
    
    # 绘制词云
    plt.figure()
    wc.recolor(random_state=None, color_func=None, colormap=None)
    plt.axis('off')
    # 保存图片
    wc.to_file('result.png')


if __name__ == '__main__':
    start("这是一段很长的文本")
上一页Python3实现Webhook 下一页Python3使用Pool进程池实现多进程并发