首页 > 技术文章 > 词云 wordcloud

wang-daydayup 2020-09-16 14:31 原文

import jieba  # 分词
import numpy as np  # 矩阵运算
import pymysql
from PIL import Image  # 图片处理
from matplotlib import pyplot as plt  # 绘图,数据可视化
from wordcloud import WordCloud  # 词云

conn = pymysql.connect(host="localhost", port=3306, user="pig", passwd="123456", db="runoob", charset='utf8')

cursor = conn.cursor()
cursor.execute('select `inq` from `t_movie`')
data = cursor.fetchall()
text = ' '.join([_[0] for _ in data])
cursor.close()
conn.close()

fenci_text = jieba.cut(text, cut_all=False)
# words = []
# for t in fenci_text:
#     if t not in words:
#         words.append(t)

print(text)
print('-' * 30)
show_text = ' '.join(fenci_text)
print(show_text)
img = Image.open(r'./static/img/123.jpg')
img_array = np.array(img)

wc = WordCloud(background_color='white',  # 设置背景色
               mask=img_array,  # 设置背景图片
               font_path=r'C:\Windows\Fonts\FZSTK.TTF',  # 字体,微软雅黑一直找不到
               collocations=False,  # 避免重复单词
               max_words=1000,  # 词云显示的最大词数
               max_font_size=500,  # 字体最大值
               min_font_size=20,  # 字体最小值
               )
wc.generate_from_text(show_text)

# 绘制图片
fig = plt.figure(1)
plt.imshow(wc)
plt.axis('off')  # 隐藏坐标轴

plt.show()
wc.to_file('ttt.png')  #保存图片

 

推荐阅读