Logistic regression, gradient descent, Python

# Import needed modules
import xlrd
import numpy as np
import matplotlib.pyplot as plt

# Opening & Reading the excel doc
base = xlrd.open_workbook(‘data_logreg.xls’)
first = base.sheet_by_index(0)

# Extracting data in X1 (b/w 1 & 100) and Y (0 or 1)
Group_Y = []
Group_Y.append(first.col_values(5))
X1 = []
Y = []
for i in Group_X1:
  for x in i:
    X1.append(x)
del X1[0]
for i in Group_Y:
  for y in i:
    Y.append(y)
del Y[0]

# Compute the sigmoid function of f(X) = 1/1+exp(b0 + b1X)
def logX(b0, b1):
  h0 = []
  for i in range(len(X1)):
    a = np.exp(-(b0 + b1*X1[i]))
    b = 1 / (1 + a)
    h0.append(b)
  return h0

# Compute the current cost/energy/utility
def cost_fct(b0, b1):
  rss = 0
  X = logX(b0, b1)
  for i in range(len(X1)):
    a = np.log(X[i])
    b = np.log(1-X[i])
    c = -Y[i]*a – ((1-Y[i])*b)
    rss += c
  rss_final = rss / (len(X))
  return “cost=”, rss_final

# Compute the derivative
def step_gradient(b_current, m_current, X, learningRate):
  b_gradient = 0
  m_gradient = 0
  N = float(len(X))
  for i in range(len(X)):
    h_0 = logX(b_current, m_current)
    y = Y[i]
    b_gradient += (h_0[i] – y)
   m_gradient += X1[i] * ((h_0[i]) – y)
   new_b = b_current – (learningRate * b_gradient)
   new_m = m_current – (learningRate * m_gradient)
  return [new_b, new_m]

# Compute the gradient descent
def gradient_descent_runner(X, starting_b, starting_m, learning_rate, num_iterations):
  b = starting_b
  m = starting_m
  for i in range(num_iterations):
    b, m = step_gradient(b, m, X, learning_rate) # DB: potential bug, you overwrite b and m in each iteration
  return [b, m]

# Run the gradient descent 
def ranthis():
  learning_rate = 0.0001
  initial_b = 0 # initial y-intercept guess
  initial_m = 0 # initial slope guess
  X = logX(initial_b, initial_m)
  num_iterations = 1000
  print “Starting gradient descent at b = {0}, m = {1}, error = {2}”.format(initial_b, initial_m, cost_fct(initial_b, initial_m))
  print “Running…”
  [b, m] = gradient_descent_runner(X, initial_b, initial_m, learning_rate, num_iterations)
  print “After {0} iterations b = {1}, m = {2}, error = {3}”.format(num_iterations, b, m, cost_fct(b, m))
  return b, m

Run the gradient descent function
betas = ranthis()
b0 = betas[0]
b1 = betas[1]

# Classify the new Y and plot it
def classifier():
  Y_hat = []
  index_0 = []
  index_1 = []
  X = logX(b0, b1)
  X_set = logX(0.0001, 0.03)
  a = 0
  for i in X:
    if i < 0.5:
      Y_hat.append(0)
       index_0.append(a)
    else:
      Y_hat.append(1)
       index_1.append(a)
    a += 1
  X_0 = []
  X_1 = []
  n_0 = []
  n_1 = []
  for i in index_0:
    X_0.append(X1[i])
    n_0.append(X_set[i])
  for i in index_1:
    X_1.append(X1[i])
    n_1.append(X_set[i])
  plt.axis([0, 100, 0.4, 1])
  plt.ylabel(‘outcome’)
  plt.xlabel(‘features’)
  plt.plot(X_0, n_0, ‘ro’)
  plt.plot(X_1, n_1, ‘go’)
  return plt.show()

print classifier()

#Results looks like this:
Screen Shot 2016-03-16 at 13.25.26

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google+ photo

You are commenting using your Google+ account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

w

Connecting to %s