0. 摘要
本文主要描述如何搭建一个RNN的基本训练架构.主要从如何读取CSV文件中的数据,构建数据管道. 如何搭建模型,如何在主函数中开启训练.
未完,待更新
1. RNN的基础知识
一个最简单的RNN结构展开图, X(t)表示循环神经网络在时刻t的输入,H是循环神经网络的主体结构,循环的过程就是H被不断执行的过程.O(t)代表t时刻循环神经网路的输出.
简单描述一下具体的过程:
在t时刻,H会读取输入层的输入x(t),并输出一个值O(t),同时H的状态值会从当前步传递到下一步.
也就是讲:H的输入除了来自输入层的输入x(t),还依赖于上一时刻的H(t-1).
一般情况下,输入层的x为下图所示:
为了研究方便,下文的叙述令in_putsize = 1.下面使用
2. 一个RNN小实验
2.1 数据集准备
链接:https://pan.baidu.com/s/1HorFNE5mlDgAvFXU6_JXfQ
提取码:x521
train_data: 里面包含6列数据, 每一行作为一个训练输入, 其对应的输出在train_label所对应的行.
train_label: 里面包含1列数据
2.2 设计设计pytorch的数据管道
设计dataloader data_loader.py
import torch
import numpy as np
class ImageFolder(torch.utils.data.Dataset):
def __init__(self, train_data):
self.data_sorce = np.array(train_data.iloc[:, 0:6])
self.data_lable = np.array(train_data.iloc[:, 6:7])
def __getitem__(self, index): # index = 0
id_data = list(self.data_sorce)[index] # id_data = list(data_sorce)[index]
id_lable = list(self.data_lable)[index] # list(data_lable)[index]
data = torch.Tensor(id_data).unsqueeze(1)
label = torch.Tensor(id_lable)
return data, label
def __len__(self):
return len(list(self.data_lable))
简单讲解一些这段代码:
这个是pytorch的 快速数据管道的构造的基本 架构, 一般情况下需要重新构造下面三个方法
__init__(self, train_data): 类初始化的时候, 需要传入一个靶向文件
__getitem__(self, index): 这个方法会在程序运行的时候,自动调用函数体里面的相关内容,同样也是构造子类的输出
__len__(self): 就如同名字一样,返回的数据集的长度
搭建模型文件, modelnet.py
import torch
from torch import nn
class Rnn(nn.Module):
def __init__(self, INPUT_SIZE):
super(Rnn, self).__init__()
self.hidden_size = 20
self.num_layers = 2
self.rnn = nn.RNN(
input_size=INPUT_SIZE,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
batch_first=True
)
self.out = nn.Linear(self.hidden_size, 1)
def forward(self, x):
r_out, h_state = self.rnn(x, None)#
outs = []
for time in range(r_out.size(1)):
outs.append(self.out(r_out[:, time, :]))
return torch.stack(outs, dim=1), h_state
该modelnet.py文件中的输入参数有:
INPUT_SIZE:(非超参数) 该参数应等于 输入序列的特征的维度
self.hidden_size:(超参数)=>输出结果特征层的维度
self.num_layers:(超参数)=>循环神经网络的层数
由于每个时间间隔的输入都会对应相应的输出(输出也具有维度[hadden_sizes]的向量), 本文中hadden_sizes设置大于1,
通过一个全连接层将输出的特征向量转换为标量作为最终的输出,当然可以根据具体的需求来转化.转换过程如下代码所示.
outs = []
for time in range(r_out.size(1)):
outs.append(self.out(r_out[:, time, :]))
for循环,可以得到每一个时间间隔所对应的 输出. last_result = torch.stack(outs, dim=1) 的size为[batch,time_setp,1]
下面贴主函数的代码: main.py
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn
from modelnet import Rnn
from sklearn.model_selection import train_test_split
import pandas as pd
from data_loader import ImageFolder
from tqdm import trange
'''1.数据准备阶段
'''
# 导入训练数据文件
data_path = './Zhou_X.csv'
label_path = './Zhou_Y.csv'
# 指定输出的文件
txt_path = 'Zhou_X.txt'
# 定义一些超参数
INPUT_SIZE = 1 # 输入序列的特征维度
LR = 0.0001 # 学习率
BATCHSIZE_PER_CARD = 1 # batch_size
total_epoch = 40 # 总的训练循环次数
# 读取csv文件, sep = ' '代表表中的数据分割方式是' ' header = None是文件中没有关键字
data_train = pd.read_csv(data_path,sep = ' ',header=None)
data_label = pd.read_csv(label_path,header=None)
# 训练数据 和 标签放在同一个容器中, axis是按列拼接
data_get = pd.concat([data_train,data_label], axis=1)
# data.iloc[:, 0:13] # 按位置取某几列
# 将数据集 划分为训练集 和测试集
train_data, test_data = train_test_split(data_get,\
train_size=0.7,random_state=0.1,\
shuffle=False)
#################################################################
'''2.开始构造 训练框架'''
# 2.1构造pipe line
train_batchsize = torch.cuda.device_count() * BATCHSIZE_PER_CARD
val_batchsize = torch.cuda.device_count() * BATCHSIZE_PER_CARD
train_dataset = ImageFolder(train_data)
test_dataset = ImageFolder(test_data)
data_loader = torch.utils.data.DataLoader(
train_dataset,
batch_size=train_batchsize,
shuffle=True,
num_workers=0)
val_data_loader = torch.utils.data.DataLoader(
test_dataset,
batch_size=val_batchsize,
shuffle=True,
num_workers=0)
# 2.2 选择模型
model = Rnn(INPUT_SIZE)
print(model)
# 2.3 定义优化器和损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
loss_func = nn.MSELoss()
# loss_func = nn.CrossEntropyLoss()
# 2.4 启动训练
global_step = 1
for epoch in range(1, total_epoch + 1):
print('\n---------- Epoch:' + str(epoch) + ' ----------')
data_loader_iter = iter(data_loader)
train_epoch_loss = 0
test_epoch_loss = 0
with trange(len(data_loader_iter)) as t:
for index in t:
train_data_input, train_data_label = data_loader_iter.next()
# x = torch.from_numpy(train_data_input)
prediction, h_state = model(train_data_input)
loss = loss_func(prediction[:,-1,:][0], train_data_label[0])
optimizer.zero_grad()
loss.backward()
optimizer.step()
global_step +=1
train_epoch_loss = train_epoch_loss + loss
t.set_postfix(step= global_step,loss = loss.data.numpy(), aver_loss= train_epoch_loss.data.numpy()/(index+1), learing_rate = LR)
data_loader_iter = iter(val_data_loader)
with trange(len(data_loader_iter)) as t:
for index in t:
with torch.no_grad():
train_data_input, train_data_label = data_loader_iter.next()
prediction, h_state = model(train_data_input)
loss = loss_func(prediction[:,-1,:][0], train_data_label[0])
test_epoch_loss = test_epoch_loss + loss
t.set_postfix(step= global_step,loss = loss.data.numpy(), aver_loss= test_epoch_loss.data.numpy()/(index+1), learing_rate = LR)
# 整体预测
all_data = ImageFolder(data_get)
all_data_loader = torch.utils.data.DataLoader(
all_data,
batch_size=1,
shuffle=False,
num_workers=0)
data_loader_iter = iter(all_data_loader)
input_caclaue = []
loss_caclaue = []
result = []
with trange(len(data_loader_iter)) as t:
for index in t:
with torch.no_grad():
train_data_input, train_data_label = data_loader_iter.next()
prediction, h_state = model(train_data_input)
loss = loss_func(prediction[:,-1,:][0], train_data_label[0])
input_caclaue.append([train_data_input.data.numpy(),train_data_label.data.numpy()])
result.append(prediction[:,-1,:][0].data.numpy())
loss_caclaue.append(loss.data.numpy())
t.set_postfix(step = index,loss = loss.data.numpy(), aver_loss= test_epoch_loss/(index+1))
with open(txt_path,"w") as f:
for index in range(len(result)):
pass # index = 0
str1 = ''
for iterm in input_caclaue[index][0][0]:
str1 = str1 + str(iterm[0]) + ' '
str2 = str(input_caclaue[index][1][0][0])
str3 = str(result[index][0])
str4 = str(loss_caclaue[index])
str_all = str1 + str2 + ' ' + str3 + ' ' + str4 + '\n'
f.write(str_all) # 自带文件关闭功能,不需要再写f.close()