In [2]:
np.random.seed(10)

# generate two classes of samples X_1 and X_2
X_1 =  np.random.randn(1500,2) 
X_2 = 1.2*np.random.randn(1500,2)-4.5

# divide them into:
# training set
X_1_train = X_1[:1000, :]
X_2_train = X_2[:1000, :]

# test set
X_1_test = X_1[1000:, :]
X_2_test = X_2[1000:, :]



# X_train is training set used in algorithm
X_train = np.vstack([X_1_train, X_2_train])

# y is a set of tagrets
# The first 1000 samples are from class 0
y = np.zeros((2000,1)) 
# The last 1000 samples are from class 1
y[ 1000:, 0] = 1

X_test = np.vstack([X_1_test, X_2_test])
print('Shape of the training set is  (%i, %i) .'% X_train.shape)
print('Shape of the test set is (%i, %i) .'% X_test.shape)
print('Shape of the target vector is (%i, %i) .' % y.shape)


# plot training and test set
plt.scatter(X_1_train[:,0], X_1_train[:, 1], label = 'class 0 train', color = 'Orange')
plt.scatter(X_2_train[:,0], X_2_train[:, 1], label = 'class 1 train', color = 'Blue')

plt.scatter(X_1_test[:,0], X_1_test[:,1],label = 'class 0 test', color = 'r')
plt.scatter(X_2_test[:,0], X_2_test[:,1], label = 'class 1 test', color = 'g')

plt.legend()
plt.show()
Shape of the training set is  (2000, 2) .
Shape of the test set is (1000, 2) .
Shape of the target vector is (2000, 1) .