ML Application to Predicting Stock Prices using ARIMA¶

Install the required libraries

In [25]:
%%capture
!pip install yfinance
!pip install pmdarima
In [26]:
%%capture
import yfinance as yf
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
import pmdarima as pm
from datetime import datetime
In [27]:
# Define the company's stock symbol
stock_symbol = "NKE"

# Import the data of last five years
start_date = "2018-04-21"
end_date = "2023-04-20"

# Save the imported data into a data frame
data = yf.download(stock_symbol, start=start_date, end=end_date)

# Print the data
data.head()
[*********************100%%**********************]  1 of 1 completed
Out[27]:
Open High Low Close Adj Close Volume
Date
2018-04-23 66.000000 67.029999 65.949997 66.879997 63.032211 6338400
2018-04-24 67.309998 67.510002 66.720001 66.970001 63.117039 8176100
2018-04-25 66.839996 67.000000 66.250000 66.669998 62.834290 5188900
2018-04-26 66.779999 68.290001 66.639999 68.050003 64.134903 5355200
2018-04-27 68.470001 70.000000 68.449997 69.559998 65.558022 7814900
In [28]:
# Extracting the adjusted closing price data from the stock data and converting it into a pandas series
stock_price = data['Adj Close']
stock_price_series = pd.Series(stock_price)

print(stock_price_series)
Date
2018-04-23     63.032211
2018-04-24     63.117039
2018-04-25     62.834290
2018-04-26     64.134903
2018-04-27     65.558022
                 ...    
2023-04-13    125.172531
2023-04-14    124.697304
2023-04-17    124.934914
2023-04-18    124.954720
2023-04-19    124.420090
Name: Adj Close, Length: 1257, dtype: float64
In [29]:
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(stock_price_series, test_size=0.2, shuffle=False)
In [30]:
# Build the model
model = pm.auto_arima(train_data, seasonal=False, suppress_warnings=True)
In [31]:
# Train the model on the training data
model.fit(train_data)
Out[31]:
ARIMA(order=(2, 1, 2), scoring_args={}, suppress_warnings=True)
In [32]:
%%capture
# Predict/ Validate the model on test data
predictions = model.predict(n_periods=len(test_data))
In [33]:
# Check the accuracy of the model. Here we are calculating Root Mean Square Error to check accuracy.
rmse = np.sqrt(mean_squared_error(test_data, predictions))
print('Root Mean Squared Error, RMSE:', rmse)
Root Mean Squared Error, RMSE: 29.22101157293209

Plot the actual and predicted values on a graph

In [34]:
import plotly.graph_objects as go
import plotly.offline as pyo

# create traces for actual and predicted values
actual_trace = go.Scatter(x=test_data.index, y=test_data.values, name='Actual')
predicted_trace = go.Scatter(x=test_data.index, y=predictions, name='Predicted')

# combine the traces into a data list
data = [actual_trace, predicted_trace]

# create the layout
layout = go.Layout(title='Actual vs Predicted Values')

# create the figure
fig = go.Figure(data=data, layout=layout)

# show the figure
pyo.iplot(fig)
In [35]:
# Predict a single value from a given date input
future_date = datetime(2022, 1, 2)
future_data = model.predict(n_periods=1, exogenous=None, return_conf_int=False, alpha=0.05, start=None)
print('Predicted Adjusted Closing Price for {}: '.format(future_date), future_data)
Predicted Adjusted Closing Price for 2022-01-02 00:00:00:  1005    129.307367
dtype: float64
In [36]:
# Print the predicted value for the input date
print('The predicted value for {} is: {}'.format(future_date, future_data))
The predicted value for 2022-01-02 00:00:00 is: 1005    129.307367
dtype: float64