# -*- coding:utf-8 -*-
"""
author:Mr Yang
data:2019/09/26
"""
import itchat
import matplotlib.pyplot as plt
import matplotlib
import os
import re
import numpy as np
from PIL import Image
from wordcloud.wordcloud import WordCloud
class WeiXinAnalyze():
def __init__(self):
self.sex_dict = {}
self.city_dict = {}
self.self_nickname = ''
self.wordList = []
self.save_path = 'result'
def wx_login(self):
"""微信登录及数据统计方法"""
itchat.login()
data = itchat.get_friends(update=True)
self.self_nickname = data[0]['NickName'] # 获取此微信昵称
for item in data:
sex = item.get('Sex') # 性别
province = item.get('Province') # 省份
city = item.get('City') # 城市
if sex or sex == 0:
if sex not in self.sex_dict:
self.sex_dict[sex] = 1
else:
self.sex_dict[sex] += 1
if province and city:
address = '-'.join([province, city])
if address not in self.city_dict:
self.city_dict[address] = 1
else:
self.city_dict[address] += 1
if item["Signature"]:
signature = re.sub(r'
self.wordList.append(signature)
if not os.path.exists(self.save_path):
os.mkdir(self.save_path)
def get_sex_info(self):
"""分析性别方法"""
sex_dict = {}
sex_dict['男'], sex_dict['女'], sex_dict['其它'] = self.sex_dict.pop(1), self.sex_dict.pop(2), self.sex_dict.pop(0)
# 数据准备
data = sex_dict.items()
labels = [i[0] for i in data]
nums = [i[1] for i in data]
# 使用Matplotlib画出饼图
matplotlib.rcParams['font.sans-serif'] = ['SimHei'] # 使用指定的汉字字体类型(此处为黑体)
plt.title('微信好友性别统计({})'.format(self.self_nickname))
plt.pie(x=nums, labels=labels, autopct='%1.2f%%')
plt.savefig(os.path.join(self.save_path,'wx_sex_data.png'))
def get_address_info(self):
"""分析地址方法"""
data = self.city_dict.items()
sort_data = sorted(data, key=lambda i: i[1], reverse=True)[:20]
x = [i[0] for i in sort_data]
y = [i[1] for i in sort_data]
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
fig, ax = plt.subplots()
rects = ax.barh(x, y, color='greenyellow', align="center")
ax.set_yticks(x) # 设置标度的位置
ax.set_yticklabels(x) # 设置纵坐标的每一个刻度的属性值
ax.invert_yaxis() # 反转标度值
ax.set_xlabel('人数(位)') # 设置横坐标的单位
ax.set_title('微信好友所在城市统计({})'.format(self.self_nickname)) # 设定图片的标题
for rect, y, num in zip(rects, x, y):
x = rect.get_width()
plt.text(x + 0.05, y, "%d" % int(num))
plt.savefig(os.path.join(self.save_path,'wx_address_data.png'))
def wx_ciyun(self):
"""微信词云方法"""
text = " ".join(self.wordList)
# 获取当前文件的执行路径
src_dir = os.getcwd()
# 生成词云形状的图片地址
imagePath = os.path.join(src_dir , "timg.jpg")
# 生成词云字体地址(防止中文乱码)
font = os.path.join(src_dir ,"SimHei.ttf")
# 生成的词云图地址
resultPath = os.path.join(self.save_path,'wx_ciyun.png')
# 开始生成图片
bg = np.array(Image.open(imagePath))
wc = WordCloud(
mode='RGBA', # 设置透明底色
mask=bg, # 造型遮盖
background_color="white", # 背景颜色
max_font_size=80, # 字体最大值
min_font_size=10, # 字体最小值
max_words=5000, # 词云显示的最大词数
random_state=100, # 设置有多少种随机生成状态,即有多少种配色方案
font_path=font, # 设置字体
).generate(text)
wc.to_file(resultPath)
if __name__ == '__main__':
weixin = WeiXinAnalyze()
weixin.wx_login()
weixin.get_sex_info()
weixin.get_address_info()
weixin.wx_ciyun()
最后看结果:
好累,第一次接触数据分析。