def linear_activation_forward(A_prev, W, b, activation):
if activation == "sigmoid":
Z, linear_cache = linear_forward(A_prev, W, b) # we calculate Z and caches A_prev, W and b
A, activation_cache = sigmoid(Z) # we calculate activations in the layer and cache Z
if activation == "relu":
Z, linear_cache = linear_forward(A_prev, W, b)
A, activation_cache = relu(Z)
if activation == "tanh":
Z, linear_cache = linear_forward(A_prev, W, b)
A, activation_cache = tanh(Z) # caches Z
cache = (linear_cache, activation_cache) # here we cache A_pre,W ,b and Z
return A, cache