Get the past csv data of the Nikkei Stock Average and save it.
#Code when reading csv
import pandas as pd
from io import StringIO
import urllib
def read_csv(url):
res = urllib.request.urlopen(url)
res=res.read().decode('shift_jis')
df = pd.read_csv(StringIO( res) )
return df
#Acquisition of time series data of Nikkei Stock Average
import pandas as pd
from io import StringIO
import urllib
#Get time series data of Nikkei Stock Average using the above function
url = "https://indexes.nikkei.co.jp/nkave/historical/nikkei_stock_average_daily_jp.csv"
def read_csv(url):
res = urllib.request.urlopen(url)
res = res.read().decode('shift_jis')
df = pd.read_csv(StringIO(res))
#I'm removing the last line I don't need
df = df.drop(df.shape[0]-1)
return df
#Save in a dataframe called df and output
df = read_csv(url)
#output
df
Make predictions by focusing on the closing price of time series data.
#Acquisition of time series data Part 1,The acquisition code of the time series data of the Nikkei Stock Average is posted.
#After making the index a date, make it a time series
df["Data date"] = pd.to_datetime(df["Data date"], format='%Y/%m/%d')
df = df.set_index('Data date')
#From the column'Open price', 'High price', 'Low price'And sort by oldest date
df = df.drop(['Open price', 'High price', 'Low price'], axis=1)
df = df.sort_index(ascending=True)
df
import pandas as pd
#df and df_Join the two tables of tweets with index as the key and delete Nan
df_tweets = pd.read_csv('./6050_stock_price_prediction_data/df_tweets.csv', index_col='date')
table = df_tweets.join(df, how='right').dropna()
# table.Output as csv
table.to_csv("./6050_stock_price_prediction_data/table.csv")
table
As explained in Python: Stock Price Forecast Part 1 This time, we will use technical analysis to forecast stock prices.
Feature quantities of changes in the Nikkei Stock Average over the past three days over time and changes in the PN value Forecast the stock price up and down the next day.
After dividing the training data into two parts, training data and test data, the training data is standardized, and then the test data is standardized using the average and variance of the training data.
#Basic template
from sklearn.model_selection import train_test_split
X = table.values[:, 0]
y = table.values[:, 1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, shuffle=False)
X_train_std = (X_train - X_train.mean()) / X_train.std()
X_test_std = (X_test - X_train.mean()) / X_train.std()
#Creation of training data
from sklearn.model_selection import train_test_split
X = table.values[:, 0]
y = table.values[:, 1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, shuffle=False)
X_train_std = (X_train - X_train.mean()) / X_train.std()
X_test_std = (X_test - X_train.mean()) / X_train.std()
# df_Create a table called train, set index to date, column name to pn value, close price and df_train.Output in data folder with the name csv
df_train = pd.DataFrame(
{'pn': X_train_std,
'closing price': y_train},
columns=['pn', 'closing price'],
index=table.index[:len(X_train_std)])
df_train.to_csv('./6050_stock_price_prediction_data/df_train.csv')
#Similarly for test data, df_Create a table called test and df_test.Output in data folder with the name csv
df_test = pd.DataFrame(
{'pn': X_test_std,
'closing price': y_test},
columns=['pn', 'closing price'],
index=table.index[len(X_train_std):])
df_test.to_csv('./6050_stock_price_prediction_data/df_test.csv')
#First of all df_train.Read csv and display the change of PN value and stock price.
# pn.rates_diff, exchange_rates_View the diff to get an overview of the above program.
rates_fd = open('./6050_stock_price_prediction_data/df_train.csv', 'r')
rates_fd.readline() #Every line is read up to the end of the file.
next(rates_fd) #The first line is skipped.
exchange_dates = []
pn_rates = []
pn_rates_diff = []
exchange_rates = []
exchange_rates_diff = []
prev_pn = df_train['pn'][0]
prev_exch = df_train['closing price'][0]
for line in rates_fd:
splited = line.split(",")
time = splited[0] # table.1st column date of csv
pn_val = float(splited[1]) # table.2nd column PN value of csv
exch_val = float(splited[2]) # table.Closing price of stock price in the third column of csv
exchange_dates.append(time) #date
pn_rates.append(pn_val)
pn_rates_diff.append(pn_val - prev_pn) #Change in PN value
exchange_rates.append(exch_val)
exchange_rates_diff.append(exch_val - prev_exch) #Stock price changes
prev_pn = pn_val
prev_exch = exch_val
rates_fd.close()
print(pn_rates_diff)
print(exchange_rates_diff)
#Let's display the change in PN value and stock price every 3 days.
import numpy as np
INPUT_LEN = 3
data_len = len(pn_rates_diff)
tr_input_mat = []
tr_angle_mat = []
for i in range(INPUT_LEN, data_len):
tmp_arr = []
for j in range(INPUT_LEN):
tmp_arr.append(exchange_rates_diff[i-INPUT_LEN+j])
tmp_arr.append(pn_rates_diff[i-INPUT_LEN+j])
tr_input_mat.append(tmp_arr) #Changes in stock prices and negatives and positives over the last 3 days on day i
if exchange_rates_diff[i] >= 0: #Up and down of stock price on day i, 1 if positive, 0 if negative
tr_angle_mat.append(1)
else:
tr_angle_mat.append(0)
train_feature_arr = np.array(tr_input_mat)
train_label_arr = np.array(tr_angle_mat)
# train_feature_arr, train_label_View arr to get an overview of the code above.
print(train_feature_arr)
print(train_label_arr)
# test_feature_arr, test_label_Create arr in the same way.
rates_fd = open('./6050_stock_price_prediction_data/df_test.csv', 'r')
rates_fd.readline() #Every line is read up to the end of the file.
next(rates_fd) #The first line is skipped.
exchange_dates = []
pn_rates = []
pn_rates_diff = []
exchange_rates = []
exchange_rates_diff = []
prev_pn = df_test['pn'][0]
prev_exch = df_test['closing price'][0]
for line in rates_fd:
splited = line.split(",")
time = splited[0] # table.1st column date of csv
pn_val = float(splited[1]) # table.2nd column PN value of csv
exch_val = float(splited[2]) # table.Closing price of stock price in the third column of csv
exchange_dates.append(time) #date
pn_rates.append(pn_val)
pn_rates_diff.append(pn_val - prev_pn) #Change in PN value
exchange_rates.append(exch_val)
exchange_rates_diff.append(exch_val - prev_exch) #Stock price changes
prev_pn = pn_val
prev_exch = exch_val
rates_fd.close()
INPUT_LEN = 3
data_len = len(pn_rates_diff)
test_input_mat = []
test_angle_mat = []
for i in range(INPUT_LEN, data_len):
test_arr = []
for j in range(INPUT_LEN):
test_arr.append(exchange_rates_diff[i - INPUT_LEN + j])
test_arr.append(pn_rates_diff[i - INPUT_LEN + j])
test_input_mat.append(test_arr) #Changes in stock prices and negatives and positives over the last 3 days on day i
if exchange_rates_diff[i] >= 0: #Up and down of stock price on day i, 1 if positive, 0 if negative
test_angle_mat.append(1)
else:
test_angle_mat.append(0)
test_feature_arr = np.array(test_input_mat)
test_label_arr = np.array(test_angle_mat)
# train_feature_arr, train_label_arr,test_feature_arr, test_label_Prediction model with arr as a feature(Logistic regression, SVM, random forest)Build and measure prediction accuracy
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
for model in [LogisticRegression(), RandomForestClassifier(n_estimators=200, max_depth=8, random_state=0), SVC()]:
model.fit(train_feature_arr, train_label_arr)
print("--Method:", model.__class__.__name__, "--")
print("Cross validatin scores:{}".format(model.score(test_feature_arr, test_label_arr)))
Recommended Posts