import tushare as ts ts.set_token('token') pro = ts.pro_api() data = pro.daily(ts_code='000008.SZ',start_date='20190101',end_date='20201201', fields='ts_code,trade_date,open,close') data = data.reindex(index=data.index[::-1]) data = data.reset_index(drop=True)
data['rof']=data['close']-data['open'] narray=data[['rof', 'close']].values x=[] y=[] day=10 for i in range(len(narray) - day - 1): x.append(narray[i:i+day, 1]) if narray[i+day, 0] > 0: y.append(1) else: y.append(0)
利用Sklearn库的train_test_split函数按照7:3的比例划分训练集和测试集
from sklearn.model_selection import train_test_split x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3)
在获得训练数据和测试数据后,可以利用Sklearn库中的SVM模块建立模型了。
from sklearn.svm import SVC svm_model=SVC(kernel='linear') svm_model.fit(x_train,y_train) predict=svm_model.predict(x_train)
利用metric模块计算预测结果的准确性
from sklearn.metrics import accuracy_score print(accuracy_score(y_train,predict)) predict1=svm_model.predict(x_test) print(accuracy_score(y_test,predict1))
输出结果:
0.5772870662460567 0.6131386861313869
可以看到训练集数据预测的准确率约为58%,测试集的预测结果准确率约为61%.
完整代码为:
import tushare as ts def cleandata(ts_code,start_date,end_date,day=10): ts.set_token('token') pro = ts.pro_api() data = pro.daily(ts_code=ts_code,start_date=start_date,end_date=end_date, fields='ts_code,trade_date,open,close') data = data.reindex(index=data.index[::-1]) data = data.reset_index(drop=True) data['rof'] = data['close']-data['open'] narray=data[['rof', 'close']].values x=[] y=[] day=10 for i in range(len(narray) - day - 1): x.append(narray[i:i+day, 1]) if narray[i+day, 0] > 0: y.append(1) else: y.append(0) from sklearn.model_selection import train_test_split x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3) return x_train, y_train, x_test, y_test if __name__=='__main__': x_train, y_train, x_test, y_test=cleandata('000008.SZ','20190101','20201201') from sklearn.svm import SVC svm_model=SVC(kernel='linear') svm_model.fit(x_train,y_train) predict=svm.predict(x_train) from sklearn.metrics import accuracy_score print(accuracy_score(y_train,predict)) predict1=svm_model.predict(x_test) print(accuracy_score(y_test,predict1))