Dr.Manish Kumar Jain: MR Python

from pandas import DataFrame
from sklearn import linear_model
import statsmodels.api as sm

#statsmodels is a Python module that provides classes and functions
#for the estimation of many different statistical models,
#as well as for conducting statistical tests, and statistical
#data exploration.

Stock_Market = {'Year': [2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016],
'Month': [12, 11,10,9,8,7,6,5,4,3,2,1,12,11,10,9,8,7,6,5,4,3,2,1],
'Interest_Rate': [2.75,2.5,2.5,2.5,2.5,2.5,2.5,2.25,2.25,2.25,2,2,2,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75],
'Unemployment_Rate': [5.3,5.3,5.3,5.3,5.4,5.6,5.5,5.5,5.5,5.6,5.7,5.9,6,5.9,5.8,6.1,6.2,6.1,6.1,6.1,5.9,6.2,6.2,6.1],
'Stock_Index_Price': [1464,1394,1357,1293,1256,1254,1234,1195,1159,1167,1130,1075,1047,965,943,958,971,949,884,866,876,822,704,719]
}

df = DataFrame(Stock_Market,columns=['Year','Month','Interest_Rate',
'Unemployment_Rate','Stock_Index_Price'])

X = df[['Interest_Rate','Unemployment_Rate']] # here we have 2 variables for multiple regression. If you just want to use one variable for simple linear regression, then use X = df['Interest_Rate'] for example.Alternatively, you may add additional variables within the brackets
Y = df['Stock_Index_Price']

# with sklearn
regr = linear_model.LinearRegression()
regr.fit(X, Y)

print('Intercept: \n', regr.intercept_)
print('Coefficients: \n', regr.coef_)

# prediction with sklearn
New_Interest_Rate = 2.75
New_Unemployment_Rate = 5.3
print ('Predicted Stock Index Price: \n',
regr.predict([[New_Interest_Rate ,New_Unemployment_Rate]]))

# with statsmodels
X = sm.add_constant(X) # adding a constant

#OLS stands for ordinary least squares
#As we know, the simplest linear regression algorithm assumes
#that the relationship between an independent variable (x)
#and dependent variable (y) is of the following form: y = mx + c,
#which is the equation of a line.
#In line with that, OLS is an estimator in which the values of m and
#c (from the above equation) are chosen in such a
#way as to minimize the sum of the squares of the differences between the
#observed dependent variable and predicted dependent variable.
#That’s why it’s named ordinary
#least squares.Also, it should be noted that when the sum of
#the squares of the differences is minimum,
#the loss is also minimum—hence the prediction is better.

model = sm.OLS(Y, X).fit()
predictions = model.predict(X)

print_model = model.summary()
print(print_model)

#Stock_Index_Price = (Intercept) + (Interest_Rate coef)*X1 + (Unemployment_Rate coef)*X2
#Stock_Index_Price = (1798.4040) + (345.5401)*X1 + (-250.1466)*X2
Interest Rate = 2.75 (i.e., X1= 2.75)
Unemployment Rate = 5.3 (i.e., X2= 5.3)

#Stock_Index_Price = (1798.4040) + (345.5401)*(2.75) + (-250.1466)*(5.3) = 1422.86

Dr.Manish Kumar Jain

Sunday, 15 March 2020

MR Python

1 comment:

Blog Archive