import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
%matplotlib inline
np.random.seed(10)
# generate two classes of samples X_1 and X_2
X_1 = np.random.randn(1500,2)
X_2 = 1.2*np.random.randn(1500,2)-4.5
# divide them into:
# training set
X_1_train = X_1[:1000, :]
X_2_train = X_2[:1000, :]
# test set
X_1_test = X_1[1000:, :]
X_2_test = X_2[1000:, :]
# X_train is training set used in algorithm
X_train = np.vstack([X_1_train, X_2_train])
# y is a set of tagrets
# first 1000 samples are from class 0
y = np.zeros((2000,1)).reshape(2000,1)
# last 1000 samples are from class 1
y[ 1000:, 0] = 1
X_test = np.vstack([X_1_test, X_2_test])
print('Shape of training set is (%i, %i) .'% X_train.shape)
print('Shape of test set is (%i, %i) .'% X_test.shape)
print('Shape of target vector is (%i, %i) .' % y.shape)
# plot train and test set
plt.scatter(X_1_train[:,0], X_1_train[:, 1], label = 'class 0 train')
plt.scatter(X_2_train[:,0], X_2_train[:, 1], label = 'class 1 train')
plt.scatter(X_1_test[:,0], X_1_test[:,1],label = 'class 0 test')
plt.scatter(X_2_test[:,0], X_2_test[:,1], label = 'class 1 test')
plt.legend()
plt.show()
plt.show()
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def Log_Regression_Vectorized(X,Y,alpha, num_it):
m = X.shape[1] # number of training examples
n_x = X.shape[0] # number of features
# initilization
J = 0
w = np.random.randn(n_x, 1) # we could also initialize with zeros
b = 0
dw = np.zeros((n_x, m))
db = 0
J_all = np.zeros(num_it)
w_all = np.zeros((n_x,num_it) )
b_all = np.zeros(num_it)
for i in range(num_it):
# calculation of Z, A, J
Z = np.dot(w.T, X) + b
A = sigmoid(Z)
J = -1/m *np.sum(Y*np.log(A) + (1-Y)*np.log(1-A))
# calculation of derivatives
dZ = A - Y
dw = 1/m*np.dot(X,dZ.T)
db = 1/m*np.sum(dZ)
# update of parameters
w += -alpha*dw
b += -alpha*db
J_all[i] = J
w_all[:, i] = w[:,0]
b_all[i] = b
assert(J_all.shape == (num_it,) )
assert(w_all.shape == (n_x, num_it))
assert(b_all.shape == (num_it,))
return J_all, w_all, b_all
X_train.T.shape
J, w, b = Log_Regression_Vectorized(X_train.T,y.T,alpha=0.05,num_it=10000)
print(J.shape)
print(w.shape)
print(b.shape)
N = 1000
xx = np.linspace(0,10000,10000)
plt.plot(xx, J, label = 'J')
plt.plot(xx, w[0,:], label='w1')
plt.plot(xx, w[1,:], label='w2')
plt.plot(xx, b, label ='b')
plt.xlabel('number of iterations')
plt.legend()
plt.grid()
plt.show()
# value of the cost function with the final values od parameters
J[-1]
w1 = w[0, -1]
w2 = w[1, -1]
b1=b[-1]
x_axis = np.linspace(-6,2 )
yy_lr = -(w1/w2)*x_axis - b1/w2
plt.figure(figsize =(10,7))
plt.scatter(X_1_test[:,0], X_1_test[:,1], label = 'class 0', color = 'blue')
plt.scatter(X_2_test[:,0], X_2_test[:,1], label = 'class 1', color = 'orange')
plt.xlabel('feature1')
plt.ylabel('feature2')
plt.plot(x_axis, yy_lr, label = 'LogReg', c='r')
plt.legend()
plt.show()