def L_model_forward(X, parameters, activation):
  
    caches = [] # this will be a list of caches
    A = X # this is the first matrix of activations, it is actually A^[0]
    L = len(parameters) // 2  # number of layers in the neural network, for every layer we have 2 parameters
                            # so, number of layers is equal to half of number of parameters
    
    # Implement [LINEAR -> RELU]*(L-1). Add "cache" to the "caches" list.
    for l in range(1, L): # we go through the every layer in the neural network
        A_prev = A 
        W = parameters["W" + str(l)]
        b = parameters["b" + str(l)]
        A, cache = linear_activation_forward(A_prev, W, b, activation )
        caches.append(cache)
       
 
    W = parameters["W" + str(L)]
    b = parameters["b" + str(L)]
    AL, cache = linear_activation_forward(A, W, b, "sigmoid") # in the last layer we have sigmoid activation function
    caches.append(cache)
  
    #print(caches.shape)
    return AL, caches