--Create your own stock price dataset and implement the AR (1) model.
--You can get the csv of the stock price by accessing the following URL and searching for the stock name.
--Implement the AR (1) model based on the stock price csv obtained above.
--A model that predicts the stock price of the day based on the stock price of the previous day.
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
#Read the data as df.
df = pd.read_csv(file_nm, encoding='cp932') # file_Specify the path of the stock price csv file in nm.
#Since the above csv has useless information, format it as df.
date = df.index[1:] #Generate a date to store the value of the date.
df = pd.DataFrame(df.values[1:,:], columns=df.loc['date'].values) #Delete extra information.
df['date'] = date
df = df[['date', 'Open price', 'High price', 'Low price', 'closing price', 'Volume', 'closing price調整値']]
#Narrow the df column to date and closing price.
df = df[['date','closing price']]
#Change the data type of the value stored in the closing price from string to float32.
df.loc[:,'closing price'] = df['closing price'].astype(np.float32)
#June 28, 2013 0,July 1, 2013 1,July 2, 2013 2, ...Generate a date number column.
date_num = list(range(len(df.date)))
df['Date number'] = date_num
df = df[['date', 'date番号', 'closing price']]
#Generate a column of closing prices one day ago.
df_2 = df.copy()
df_2['Date number'] = df_2['Date number'] + 1
df = pd.merge(df, df_2[['Date number' ,'closing price']], on='Date number', how='left')
df = df.rename(columns={'closing price_x':'Stock price of the day', 'closing price_y':'Stock price of the previous day'})
#Exclude rows that contain missing values.
df = df.dropna()
#Feature value,X to store the objective variable,Generate y.
y = df.Stock price of the day.values
X = df.Stock price of the previous day.values
def reshape_row(arr):
"""X,Change the shape of y.(Change to row vector.)
"""
return arr.reshape(-1, 1)
X, y = reshape_row(X), reshape_row(y)
# X,7 for train data and validation data:Divide by a ratio of 3.
train_X, val_X, train_y, val_y = train_test_split(X, y, test_size=0.3)
#Linear regression model(AR(1))Calculate the coefficient estimate of.
model = LinearRegression()
model.fit(train_X, train_y)
#Display the coefficient estimates of the intercept and the stock price of the previous day.
print("The intercept is", model.intercept_[0])
print("The coefficient estimate of the stock price on the previous day is", model.coef_[0][0])
#Evaluate the model.
true_y = val_y
pred_y = model.predict(val_X)
mean_squared_error(true_y, pred_y)
Recommended Posts