add regularization and graph

This commit is contained in:
dadel 2018-01-03 14:43:31 +01:00
parent c97f9d6676
commit 9495c7db09
2 changed files with 151 additions and 78 deletions

File diff suppressed because one or more lines are too long

65
mlp.py Normal file → Executable file
View File

@ -1,25 +1,33 @@
#!/usr/bin/env python3
import numpy as np import numpy as np
try:
import matplotlib.pyplot as mp
except:
mp = None
def sigmoid(x): def sigmoid(x):
return 1/(1+np.exp(-x)) return 1/(1+np.exp(-x))
def deriv_sigmoid(x): def deriv_sigmoid(x):
a = sigmoid(x) a = sigmoid(x)
return a * (1 - a) return a * (1 - a)
def tanh(x): def tanh(x):
ep = np.exp(x) ep = np.exp(x)
en = np.exp(-x) en = np.exp(-x)
return (ep - en)/(ep + en) return (ep - en)/(ep + en)
def deriv_tanh(x): def deriv_tanh(x):
a = tanh(x) a = tanh(x)
return 1 - (a * a) return 1 - (a * a)
class MultiLayerPerceptron(object):
@staticmethod def relu(x):
def relu(x):
ret = 0 ret = 0
#fixme should map to compare #fixme should map to compare
if x > 0: if x > 0:
@ -28,16 +36,14 @@ class MultiLayerPerceptron(object):
ret = np.zeros(x.shape) ret = np.zeros(x.shape)
return ret return ret
@staticmethod def deriv_relu(x):
def deriv_relu(x):
ret = 0 ret = 0
if z < 0: if z < 0:
ret = 0.01 ret = 0.01
else: else:
ret = 1 ret = 1
@staticmethod def leaky_relu(x):
def leaky_relu(x):
ret = 0.01 * x ret = 0.01 * x
#fixme should map to compare #fixme should map to compare
if x > 0: if x > 0:
@ -46,13 +52,16 @@ class MultiLayerPerceptron(object):
ret = np.ones(x.shape)*0.01 ret = np.ones(x.shape)*0.01
return ret return ret
class MultiLayerPerceptron(object):
functions = { functions = {
"sigmoid": {"function": sigmoid, "derivative": deriv_sigmoid}, "sigmoid": {"function": sigmoid, "derivative": deriv_sigmoid},
"tanh": {"function": tanh, "derivative": deriv_tanh}, "tanh": {"function": tanh, "derivative": deriv_tanh},
"relu": {"function": relu, "derivative": deriv_relu}, "relu": {"function": relu, "derivative": deriv_relu},
} }
def __init__(self, L=1, n=None, g=None, alpha=0.01): def __init__(self, L=1, n=None, g=None, alpha=0.01, lambd=0):
"""Initializes network geometry and parameters """Initializes network geometry and parameters
:param L: number of layers including output and excluding input. Defaut 1. :param L: number of layers including output and excluding input. Defaut 1.
:type L: int :type L: int
@ -84,6 +93,7 @@ class MultiLayerPerceptron(object):
self._Z = None self._Z = None
self._m = 0 self._m = 0
self._alpha = alpha self._alpha = alpha
self._lambda = lambd
def set_all_input_examples(self, X, m=1): def set_all_input_examples(self, X, m=1):
"""Set the input examples. """Set the input examples.
@ -153,6 +163,9 @@ class MultiLayerPerceptron(object):
def get_output(self): def get_output(self):
return self._A[self._L] return self._A[self._L]
def get_weights(self):
return self._W[1:]
def back_propagation(self, get_cost_function=False): def back_propagation(self, get_cost_function=False):
"""Back propagation """Back propagation
@ -169,8 +182,12 @@ class MultiLayerPerceptron(object):
dA = [None] + [None] * self._L dA = [None] + [None] * self._L
dA[l] = -self._Y/self._A[l] + ((1-self._Y)/(1-self._A[l])) dA[l] = -self._Y/self._A[l] + ((1-self._Y)/(1-self._A[l]))
if get_cost_function: if get_cost_function:
wnorms = 0
for w in self._W[1:]:
wnorms += np.linalg.norm(w)
J = -1/m * ( np.dot(self._Y, np.log(self._A[l]).T) + \ J = -1/m * ( np.dot(self._Y, np.log(self._A[l]).T) + \
np.dot((1 - self._Y), np.log(1-self._A[l]).T) ) np.dot((1 - self._Y), np.log(1-self._A[l]).T) ) + \
self._lambda/(2*m) * wnorms # regularization
#dZ = self._A[l] - self._Y #dZ = self._A[l] - self._Y
for l in range(self._L, 0, -1): for l in range(self._L, 0, -1):
@ -182,12 +199,13 @@ class MultiLayerPerceptron(object):
# dW[l] = 1/m * np.dot(dZ, self._A[l-1].T) # dW[l] = 1/m * np.dot(dZ, self._A[l-1].T)
# db[l] = 1/m * np.sum(dZ, axis=1, keepdims=True) # db[l] = 1/m * np.sum(dZ, axis=1, keepdims=True)
for l in range(self._L, 0, -1): for l in range(self._L, 0, -1):
self._W[l] = self._W[l] - self._alpha * dW[l] self._W[l] = self._W[l] - self._alpha * dW[l] - \
(self._alpha*self._lambda/m * self._W[l]) # regularization
self._b[l] = self._b[l] - self._alpha * db[l] self._b[l] = self._b[l] - self._alpha * db[l]
return J return J
def minimize_cost(self, min_cost, max_iter=100000, alpha=None): def minimize_cost(self, min_cost, max_iter=100000, alpha=None, plot=False):
"""Propagate forward then backward in loop while minimizing the cost function. """Propagate forward then backward in loop while minimizing the cost function.
:param min_cost: cost function value to reach in order to stop algo. :param min_cost: cost function value to reach in order to stop algo.
@ -199,15 +217,24 @@ class MultiLayerPerceptron(object):
if alpha is None: if alpha is None:
alpha = self._alpha alpha = self._alpha
self.propagate() self.propagate()
if plot:
y=[]
x=[]
for i in range(max_iter): for i in range(max_iter):
J = self.back_propagation(True) J = self.back_propagation(True)
if plot:
y.append(J[0][0])
x.append(nb_iter)
self.propagate() self.propagate()
nb_iter = i + 1 nb_iter = i + 1
if J <= min_cost: if J <= min_cost:
break break
if mp and plot:
mp.plot(x,y)
mp.show()
return {"iterations": nb_iter, "cost_function": J} return {"iterations": nb_iter, "cost_function": J}
def learning(self, X, Y, m, min_cost=0.05, max_iter=100000, alpha=None): def learning(self, X, Y, m, min_cost=0.05, max_iter=100000, alpha=None, plot=False):
"""Tune parameters in order to learn examples by propagate and backpropagate. """Tune parameters in order to learn examples by propagate and backpropagate.
:param X: the inputs training examples :param X: the inputs training examples
@ -220,12 +247,12 @@ class MultiLayerPerceptron(object):
""" """
self.set_all_training_examples(X, Y, m) self.set_all_training_examples(X, Y, m)
self.prepare() self.prepare()
res = self.minimize_cost(min_cost, max_iter, alpha) res = self.minimize_cost(min_cost, max_iter, alpha, plot)
return res return res
if __name__ == "__main__": if __name__ == "__main__":
mlp = MultiLayerPerceptron(L=2, n=[2, 3, 1], g=["tanh", "sigmoid"], alpha=2) mlp = MultiLayerPerceptron(L=2, n=[2, 3, 1], g=["tanh", "sigmoid"], alpha=2, lambd=0.005)
#mlp = MultiLayerPerceptron(L=1, n=[2, 1], g=["sigmoid"], alpha=0.1) #mlp = MultiLayerPerceptron(L=1, n=[2, 1], g=["sigmoid"], alpha=0.1)
X = np.array([[0, 0], X = np.array([[0, 0],
@ -238,7 +265,15 @@ if __name__ == "__main__":
[1], [1],
[0]]) [0]])
res = mlp.learning(X.T, Y.T, 4) res = mlp.learning(X.T, Y.T, 4, max_iter=5000, plot=True)
print(res) print(res)
print(mlp.get_output()) print(mlp.get_output())
print(mlp.get_weights())
#mlp.set_all_training_examples(X.T, Y.T, 4)
#mlp.prepare()
#print(mlp.propagate())
#for i in range(100):
# print(mlp.back_propagation())
# mlp.propagate()
#print(mlp.propagate())