Multi-var Logistic Regression, Gradient Descent, Python

# Import modules that I need
import xlrd
import numpy as np
import matplotlib.pyplot as plt

# Opening & Reading the excel doc
base = xlrd.open_workbook(‘data_3_logreg.xls’)
first = base.sheet_by_index(0)

# Extracting data in X1, X2, X3 (b/w 1 & 100) and Y (0 or 1)
Group_X1 = []
Group_X2 = []
Group_X3 = []
Group_Y = []
Group_X1.append(first.col_values(0))
Group_X2.append(first.col_values(1))
Group_X3.append(first.col_values(2))
Group_Y.append(first.col_values(3))
X1 = []
X2 = []
X3 = []
Y = []
for i in Group_X1:
   for x in i:
      X1.append(x)
del X1[0]
for i in Group_X2:
   for x in i:
      X2.append(x)
del X2[0]
for i in Group_X3:
   for x in i:
      X3.append(x)
del X3[0]
for i in Group_Y:
   for y in i:
      Y.append(y)
del Y[0]

# Compute the sigmoid function of f(X) = 1/1+exp(b0 + b1X1 + b2X2 * b3X3)
def exp_X(b0, b1, b2, b3):
  h0 = []
  for i in range(len(X1)):
    a = np.exp(-(b0 + b1*X1[i] + b2*X2[i] + b3*X3[i]))
    b = 1 / (1 + a)
    h0.append(b)
  return h0

# Compute the current cost/energy/utility
def cost_fct(b0, b1, b2, b3):
  cost = 0
  X = exp_X(b0, b1, b2, b3)
  for i in range(len(X1)):
    a = np.log(X[i])
    b = np.log(1-X[i])
    c = -Y[i]*a – ((1-Y[i])*b)
    cost += c
  tot = cost / (len(X))
  return “cost=”, tot 

# compute the derivative
def step_gradient(b_current, m1_current, m2_current, m3_current, X, learningRate):
  b_gradient = 0
  m1_gradient = 0
  m2_gradient = 0
  m3_gradient = 0
  N = float(len(X))
  for i in range(len(X)):
     h_0 = exp_X(b_current, m1_current, m2_current, m3_current)
     y = Y[i]
     b_gradient += (h_0[i] – y)
     m1_gradient += X1[i] * ((h_0[i]) – y)
     m2_gradient += X2[i] * ((h_0[i]) – y)
     m3_gradient += X3[i] * ((h_0[i]) – y)
  new_b = b_current – (learningRate * b_gradient)
  new_m1 = m1_current – (learningRate * m1_gradient)
  new_m2 = m2_current – (learningRate * m2_gradient)
  new_m3 = m3_current – (learningRate * m3_gradient)
  return [new_b, new_m1, new_m2, new_m3]

# Compute the gradient descent
def gradient_descent_runner(X, starting_b, starting_m1, starting_m2,
                                                        starting_m3, learning_rate, num_iterations):
  b = starting_b
  m1 = starting_m1
  m2 = starting_m2
  m3 = starting_m3
  for i in range(num_iterations):
    b, m1, m2, m3 = step_gradient(b, m1, m2, m3, X, learning_rate) 
  return [b, m1, m2, m3]

# run the gradient descent function
def ranthis():
  learning_rate = 0.0001
  initial_b = 0 # initial y-intercept guess
  initial_m1 = 0 # initial slope1 guess
  initial_m2 = 0 # initial slope2 guess
  initial_m3 = 0 # initial slope3 guess
  X = exp_X(initial_b, initial_m1, initial_m2, initial_m3)
  num_iterations = 10
  print “Starting gradient descent at b = {0}, m1 = {1}, m2 = {2}, m3 = {3},\
  error = {4}”.format(initial_b, initial_m1, initial_m2, initial_m3,
                                      cost_fct(initial_b, initial_m1, initial_m2, initial_m3))
  print “Running…”
  [b, m1, m2, m3] = gradient_descent_runner(X, initial_b, initial_m1, initial_m2, initial_m3,
                                                                                    learning_rate, num_iterations)
  print “After {0} iterations b = {1}, m1 = {2}, m2 = {3}, m3 = {4} \
  error = {5}”.format(num_iterations, b, m1, m2, m3,
  cost_fct(b, m1, m2, m3))
  return b, m1, m2, m3

gd = ranthis()
b0 = gd[0]
b1 = gd[1]
b2 = gd[2]
b3 = gd[3]

#Classify the new Y and plot it
def classifier():
  Y_hat = []
  index_0 = []
  index_1 = []
  X = exp_X(b0, b1, b2, b3)
  X_set = exp_X(0.01, 0.001, 0.01, 0.001) # this is the real function plot
  a = 0
  for i in X:
    if i < 0.5:
      Y_hat.append(0)
      index_0.append(a)
   else:
      Y_hat.append(1)
      index_1.append(a)
    a += 1
  X_0 = []
  X_1 = []
  n_0 = []
  n_1 = []
  X1_0 = []
  X1_1 = []
  X2_0 = []
  X2_1 = []
  X3_0 = []
  X3_1 = []
for i in index_0:
  X_0.append(X1[i])
  n_0.append(X_set[i])
  X1_0.append(X1[i])
  X2_0.append(X2[i])
  X3_0.append(X3[i])
for i in index_1:
  X_1.append(X1[i])
  n_1.append(X_set[i])
  X1_1.append(X1[i])
  X2_1.append(X2[i])
  X3_1.append(X3[i])
  plt.axis([0, 160, 0.4, 1])
  plt.ylabel(‘outcome’)
  plt.xlabel(‘features’)
  plt.plot(X_0, n_0, ‘ro’)
  plt.plot(X_1, n_1, ‘go’)
  plt3d = plt.figure().gca(projection=’3d’)
  plt3d.scatter(X1_0, X2_0, X3_0, c=’r’, marker=’o’)
  plt3d.scatter(X1_1, X2_1, X3_1, c=’b’, marker=’x’)
  plt3d.set_xlabel(‘X1’)
  plt3d.set_ylabel(‘X2’)
  plt3d.set_zlabel(‘X3’)
  return plt.show()

print classifier()

# Looks like that (classification in blue crosses and red circles:
Screen Shot 2016-03-16 at 13.25.17

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google+ photo

You are commenting using your Google+ account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

w

Connecting to %s