从书籍配套资源下载源代码,获得数据文件sitka_weather_07-2014.csv到project文件夹下
打开srv文件,获取srv首行头文件数据,且对应其其索引打印出来
# highs_lows.py
import csvfrom sqlalchemy import columnfile_name = 'sitka_weather_07-2014.csv'
with open(file_name) as f:reader = csv.reader(f)header_row = next(reader)# 调用next()一次,因此得到的是文件的第一行数据for index, column_header in enumerate(header_row): # 使用enumerate()获得每个元素的索引及其值print(index, column_header)
获取Max temperature列的数据
# highs_lows.py--snip--with open(filename) as f:highs=[]--snip--for row in reader:highs.append(int(row[1]))# 从首行文件头可知处于第二列的 Max TemperatureF 及为高温度print(highs)
将所获取的highs利用matplotlib的pyplot模块,用plot()绘制出折线图
# highs_lows.py
--snip--
fig = plt.figure(dpi=128, figsize=(10, 6))
plt.plot(highs, c='red')plt.title('Daily high temperatures, July 2014', fontsize=24)
plt.xlabel('Daily', fontsize=16)
plt.ylabel('Temperature (F)', fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)plt.show()
使用datetime模块中的datetime类,调用strptime()方法接收各种实参,并因此决定解读日期
>>>from datetime import datetime
>>>first_data = datetime.strptime('2014-7-1', '%Y-%m-%d')
>>>Xprint(first_date)
2014-07-01 00:00:00
# highs_lows.py
import csv
from matplotlib import pyplot as plt
from datetime import datetimefile_name = 'sitka_weather_07-2014.csv'
with open(file_name) as f:reader = csv.reader(f)header_row = next(reader)head_file = {}dates, highs = [], []# 调用next()一次,因此得到的是文件的第一行数据for index, column_header in enumerate(header_row):# 使用enumerate()获得每个元素的索引及其值head_file[index] = [column_header]#print(index, column_header)for row in reader:# 提取每行数据highs.append(int(row[1]))# 从首行文件头可知处于第二列的 Max TemperatureF 及为高温度current_date = datetime.strptime(row[0], "%Y-%m-%d")dates.append(current_date)# print(highs)fig = plt.figure(dpi=128, figsize=(10, 6))
plt.plot(dates, highs, c='red')plt.title('Daily high temperatures, July 2014', fontsize=24)
plt.xlabel('Daily', fontsize=16)
fig.autofmt_xdate()
# 绘制倾斜的日期
plt.ylabel('Temperature (F)', fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)plt.show()
新增lows列表变量,记录2014的最低温度值,用于后续plot()方法中绘制出来
# highs_lows.py
import csv
from matplotlib import pyplot as plt
from datetime import datetimefile_name = 'sitka_weather_2014.csv'
with open(file_name) as f:reader = csv.reader(f)header_row = next(reader)head_file = {}dates, highs, lows = [], [], []# 调用next()一次,因此得到的是文件的第一行数据for index, column_header in enumerate(header_row):# 使用enumerate()获得每个元素的索引及其值head_file[index] = [column_header]#print(index, column_header)for row in reader:# 提取每行数据highs.append(int(row[1]))lows.append(int(row[3]))# 从首行文件头可知处于第二列的 Max TemperatureF 及为高温度current_date = datetime.strptime(row[0], "%Y-%m-%d")dates.append(current_date)# print(highs)fig = plt.figure(dpi=128, figsize=(10, 6))
plt.plot(dates, highs, c='red')
plt.plot(dates, lows, c='blue')plt.title('Daily high temperatures, 2014', fontsize=20)
plt.xlabel('Daily', fontsize=16)
fig.autofmt_xdate()
# 绘制倾斜的日期
plt.ylabel('Temperature (F)', fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=10)plt.show()
之前的案例已经显示了最高、低温度的折线,使用fill_between()
方法,接收一个x值和两个y值,并填充两个y值系列之间的空间
# highs_lows.py
fig=plt.figure(dpi=128, figsize=(10, 6))
plt.plot(dates, highs, c='red', alpha=0.5)
plt.plot(dates, lows, c='blue', alpha=0.5)
plt.fill_between(dates, highs, lows, facecolor='blue', alpha=0.1)
# alpha参数指定颜色的透明度,0为完全透明,1为完全不透明
本节将使用到JSON格式的交易收盘价数据,并使用json模块处理它们,对收盘价数据进行可视化,以探索价格变化的周期性
需下载btc_close_2017.json文件
这个文件里是python列表,每个元素是包含5个键值对的字典dic:日期,月份,周数,周几,收盘价
从Github上下载btc_close_2017.json数据
# btc_close_2017.py
from urllib.request import urlopen
import json
json_url &#61; <.json文件url路径> # &#39;https://raw.githubsercontent.com/muxuezi/btc/master/btc_close_2017.json&#39;
response&#61;urlopen(json_url)
# python向Github服务器发送请求btc_close_2017.json
req&#61;response.read()
# 读取数据
with open(<.json文件物理地址>, &#39;wb&#39;) as f:# 将数据写入文件f.write(req)file_urllib&#61;json.loads(req)
# 加载json格式&#xff0c;将文件内容转换成python能处理的格式&#xff0c;与直接下载的文件内容一致
print(file_urllib)
另外可以使用requests模块get&#xff08;&#xff09;方法获取数据
import requests
json_url &#61; <.json文件url地址>
req&#61;requests.get(json.url)with open (<.json文件物理地址>, &#39;w&#39;) as f:f.write(req.text)# req的text属性可直接读取文件数据&#xff0c;返回字符串
file_requests&#61;req.json()
# 将json文件的数据转换成python列表file_requests&#xff0c;与此前file_urllib内容相同
import json
filename&#61;&#39;btc_close_2017.json&#39;
with open(filename) as f:btc_data&#61;json.load(f)for btc_dict in btc_date:date &#61; btc_dict[&#39;date&#39;]month &#61; int(btc_dict[&#39;month&#39;])week &#61; int(btc_dict[&#39;week&#39;])weekday &#61; btc_dict[&#39;weekday&#39;]close &#61; float(btc_dict[&#39;close&#39;])print(&#39; the date is {}, the month is {}, the week is {}, the weekday is {}, the close price is {} RMB&#39;.format(date, month, week, weekday, close))
即可打印列表中每个字典的键值对
本例使用pygal来实现收盘价的折线图
# btc_close_2017.py
import json
import pygal
filename&#61;&#39;btc_close_2017.json&#39;
with open(filename) as f:btc_data&#61;json.load(f)dates&#61;[]
months&#61;[]
weeks&#61;[]
weekdays&#61;[]
close&#61;[]
for btc_dict in btc_date:dates.append(btc_dict[&#39;date&#39;])months.append(int(btc_dict[&#39;month&#39;]))weeks.append(int(btc_dict[&#39;week&#39;]))weekdays.append(btc_dict[&#39;weekday&#39;])close.append(float(btc_dict[&#39;close&#39;]))
print(len(dates))
line_chart &#61; pygal.Line(x_label_rotation&#61;20, show_minor_x_labels&#61;False)
# x_laber_rotation&#61;20,令x坐标标签顺时针转20&#xff0c;show_minor_x_laberls&#61;False&#xff0c;令不用显示所有x轴标签
line_chart._title &#61; "close price"
line_chart.x_labels &#61; dates
N &#61; 20
line_chart._x_labels_major &#61; dates[::N]
# 设置_xlabels_major属性&#xff0c;令x轴坐标隔20个显示一次
line_chart.add(&#39;close price&#39;, close)
line_chart.render_to_file(&#39;images/close price picture.svg&#39;)
研究时间序列的趋势&#xff0c;周期性&#xff0c;噪声&#xff1b;一般对非线性的趋势消除&#xff0c;进行logtransformation对数变换
import math
-snip-
print(len(dates))
line_chart &#61; pygal.Line(x_label_rotation&#61;20, show_minor_x_labels&#61;False)
# x_laber_rotation&#61;20,令x坐标标签顺时针转20&#xff0c;show_minor_x_laberls&#61;False&#xff0c;令不用显示所有x轴标签
line_chart._title &#61; "close price logtransfomation"
line_chart.x_labels &#61; dates
N &#61; 20
line_chart.x_labels_major &#61; dates[::N]
# 设置_xlabels_major属性&#xff0c;令x轴坐标隔20个显示一次
close_log&#61;[math.log10(n) for n in close]
line_chart.add(&#39;close price logtransformation&#39;, close_log)
line_chart.render_to_file(&#39;images/close price logtransformation picture.svg&#39;)
利用json文件中的数据&#xff0c;绘制日均值&#xff0c;以及每周各天的日均值。可以将之前绘图代码封装成函数draw_line(x_data, y_data, title, y_legend)
&#xff0c;以便重复调用
python中的groupby函数主要的作用是进行数据的分组以及分组后的组内运算&#xff0c;for key group in groupby(列表, lambda c: c.function())
&#xff0c;实际上挑选规则是通过函数完成的&#xff0c;只要作用于函数的两个元素key相同&#xff0c;就能被分到同一组&#xff0c;返回key对应的每一组group;
y_list &#61; [v for _, v in y]
这一行代码还不理解&#xff0c;显然-
就是key, [ ]就是一个生成器&#xff0c;用于获取对应key的group中的元素
from itertools import groupby
# groupby函数的主要作用是进行数据分组以及分组后的组内运算
def draw_line(x_data, y_data, title, y_legend):xy_map &#61; []for x, y in groupby(sorted(zip(x_data, y_data)), key&#61;lambda _: _[0]):y_list &#61; [v for _, v in y]xy_map.append([x, sum(y_list)/len(y_list)])x_unique, y_mean &#61; [*zip(*xy_map)]line_chart &#61; pygal.Line()line_chart.title &#61; titleline_chart.x_labels &#61; x_uniqueline_chart.add(y_legend, y_mean)line_chart.render_to_file(&#39;images/&#39;&#43;title&#43;&#39;.svg&#39;)return line_chart
查看月日均值
# btc_close_2017.py
-snip-
idx_month &#61; dates.index(&#39;2017-12-01&#39;)
# index()方法用于从列表中找出某个值第一个匹配项的索引值
line_chart_month &#61; draw_line(months[:idx_month], close[:idx_month],&#39;close monthly average price&#39;, &#39;monthly average price&#39;)
line_chart_month
查看周日均值
# btc_close_2017.py
-snip-
idx_month &#61; dates.index(&#39;2017-12-01&#39;)
# index()方法用于从列表中找出某个值第一个匹配项的索引值
line_chart_month &#61; draw_line(weeks[1:idx_month], close[1:idx_month],&#39;close weekly average price&#39;, &#39;weekly average price&#39;)
line_chart_month