def L_model_forward(X, parameters, activation):
caches = [] # this will be a list of caches
A = X # this is the first matrix of activations, it is actually A^[0]
L = len(parameters) // 2 # number of layers in the neural network, for every layer we have 2 parameters
# so, number of layers is equal to half of number of parameters
# Implement [LINEAR -> RELU]*(L-1). Add "cache" to the "caches" list.
for l in range(1, L): # we go through the every layer in the neural network
A_prev = A
W = parameters["W" + str(l)]
b = parameters["b" + str(l)]
A, cache = linear_activation_forward(A_prev, W, b, activation )
caches.append(cache)
W = parameters["W" + str(L)]
b = parameters["b" + str(L)]
AL, cache = linear_activation_forward(A, W, b, "sigmoid") # in the last layer we have sigmoid activation function
caches.append(cache)
#print(caches.shape)
return AL, caches