In [5]:
def linear_activation_forward(A_prev, W, b, activation):
    
    if activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b) # we calculate Z and caches A_prev, W and b
        A, activation_cache = sigmoid(Z) # we calculate activations in the layer  and cache Z

    
    if activation == "relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)
        
    if activation == "tanh":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = tanh(Z) # caches Z
        
    cache = (linear_cache, activation_cache) # here we cache A_pre,W ,b and Z

    return A, cache