今天想和大家分享一下如何利用 Python 拟合具有非平稳特征的神经网络,从而对股票进行预测。
建筑行业市值前六公司
中国建筑 – 601668.SH 中国交建 – 601800.SH 中国中铁 – 601390.SH 中国铁建 – 601186.SH 中国中冶 – 601618.SH 中国电建 – 601669.SH
建模计算分析
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
import math import numpy as np import pandas as pd import seaborn as sns sns.set_style('whitegrid') import sklearn.neural_network from datetime import datetime import matplotlib.pyplot as plt from sklearn import preprocessing from pandas import Series,DataFrame from statsmodels.tsa.stattools import adfuller from scipy.stats import norm, t, skew, kurtosis, kurtosistest, beta |
对中国电建 – 601669.SH 进行预测
1 2 3 4 5 |
# 前复权数据 data = pd.read_csv('建筑.csv',index_col=0) data.head(3).append(data.tail(3)) |
1 2 3 4 5 6 |
China_DJ = data['601669'] new_index = pd.to_datetime(China_DJ.index) Y= Series(China_DJ.values,new_index) Y.head(6) |
1 2 3 4 5 6 |
#收益率 Y_pct = Y.pct_change() Y_pct= Y_pct[1:].copy() Y_pct.head() |
1 2 3 4 5 6 |
#转换到 0 、 1 f = lambda x: 1 if x > 0 else -1 Y_pct = Y_pct.apply(f) Y_pct.head() |
1 2 3 4 |
Y_pct = Y_pct.shift(-1,freq='1d') Y_pct.head() |
1 2 3 4 5 6 |
#用 X 表示每日价格,来预测未来 601669 的收益 new_index1 = pd.to_datetime(data.index) X = DataFrame(data.values,new_index1) X.tail() |
1 2 3 4 |
X = X[:-2] X.index |
1 2 3 |
Y.index |
1 2 3 |
NN = sklearn.neural_network.MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(10, 5)) |
1 2 3 4 |
NN = NN.fit(X, Y) NN |
MLPClassifier(activation=’relu’, alpha=1e-05, batch_size=’auto’, beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=(10, 5), learning_rate=’constant’, learning_rate_init=0.001, max_iter=200, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=None, shuffle=True, solver=’lbfgs’, tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False)
1 2 3 |
NN.predict(X) |
array([ 1, -1, -1, 1, 1, 1, 1, 1, -1, 1, 1, -1, -1, -1, -1, -1, 1, -1, -1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1], dtype=int64)
1 2 3 4 5 |
def check_accuracy(predictions, Y): correct = len(Y4[predictions == Y]) return correct / float(len(Y)) |
1 2 3 4 |
predictions = NN.predict(X) check_accuracy(predictions, Y) |
0.61
1 2 3 4 5 6 7 8 9 10 |
imputer = preprocessing.Imputer() scaler = preprocessing.MinMaxScaler() X = imputer.fit_transform(X) X = scaler.fit_transform(X) NN = NN.fit(X, Y) NN |
MLPClassifier(activation=’relu’, alpha=1e-05, batch_size=’auto’, beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=(10, 5), learning_rate=’constant’, learning_rate_init=0.001, max_iter=200, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=None, shuffle=True, solver=’lbfgs’, tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False)
1 2 3 |
NN.predict(X) |
array([-1, -1, -1, 1, -1, 1, -1, 1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, 1, -1, 1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, 1, -1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1, -1], dtype=int64)
1 2 3 4 |
predictions = NN.predict(X) check_accuracy(predictions, Y4) |
0.71
可以预测第二天的方向超过 71%的时间
1 2 3 4 5 |
# 前复权数据 OOS_pricing_data = pd.read_csv('建筑 2.csv',index_col=0) OOS_pricing_data.head(3).append(OOS_pricing_data.tail(3)) |
1 2 3 4 5 6 7 8 |
Y1 = OOS_pricing_data['601669'] new_index = pd.to_datetime(Y1.index) Y5 = Series(Y1.values,new_index) Y5 = Y5.pct_change() Y5 = Y5[1:] Y5.head() |
1 2 3 4 5 6 |
#转换到 0 、 1 f = lambda x: 1 if x > 0 else -1 Y5 = Y5.apply(f) Y5.head() |
1 2 3 4 |
Y5 = Y5.shift(-1,freq='1d') Y5.head() |
1 2 3 4 5 |
new_index2 = pd.to_datetime(OOS_pricing_data.index) X11 = DataFrame(OOS_pricing_data.values,new_index2) X11.head() |
1 2 3 4 |
X11 = X11[:-1] X11.index |
1 2 3 |
Y5.index |
1 2 3 4 5 |
X11 = imputer.fit_transform(X11) X11 = scaler.fit_transform(X11) OOS_predictions = NN.predict(X11) |
1 2 3 |
check_accuracy(OOS_predictions, OOS_Y) |
result: 0.5034013605442177
50%
只有 50%的准确率
可能是在不同时期之间的不稳定造成的,这导致学习神经网络,很适合现在的条件训练数据,但不适合在不同条件下测试数据。也有可能是神经网络是适合噪声而没有体现出真正的信号,很难讲。
1 2 3 4 5 6 |
new_index3 = pd.to_datetime(data.index) Y6 = pd.DataFrame(data.values,new_index3) Y6.columns = ['601668','601800','601390','601186','601618','601669'] Y6.head() |
1 2 3 4 |
corr_df = pd.rolling_corr(Y6 , window=30) corr_df |
看看平稳性
1 2 3 4 5 6 7 8 9 10 11 12 |
fig = plt.figure(figsize=(16,8.5)) plt.plot(corr_df[:,'601668','601669']) plt.plot(corr_df[:,'601800','601669']) plt.plot(corr_df[:,'601390','601669']) plt.plot(corr_df[:,'601186','601669']) plt.plot(corr_df[:,'601618','601669']) ts = corr_df[:, '601618','601669'] plt.hlines(ts.mean(), ts.index[30-1], ts.index[-1], linestyles='dashed') plt.ylabel('Pearson Correlation Coefficient') plt.legend(['601668 x 601669', '601800 x 601669', '601390 x 601669', '601186 x 601669', '601618 x 601669','601618 x 601669 AVG']) |
1 2 3 |
adfuller(data['601668']) |
1 2 3 |
adfuller(data['601800']) |
1 2 3 |
adfuller(data['601390']) |
1 2 3 |
adfuller(data['601186']) |
1 2 3 |
adfuller(data['601618']) |
1 |
adfuller(data['601669']) |