245 lines
7.9 KiB
Python
245 lines
7.9 KiB
Python
|
import numpy as np
|
||
|
|
||
|
def sigmoid(x):
|
||
|
return 1/(1+np.exp(-x))
|
||
|
|
||
|
def deriv_sigmoid(x):
|
||
|
a = sigmoid(x)
|
||
|
return a * (1 - a)
|
||
|
|
||
|
def tanh(x):
|
||
|
ep = np.exp(x)
|
||
|
en = np.exp(-x)
|
||
|
return (ep - en)/(ep + en)
|
||
|
|
||
|
def deriv_tanh(x):
|
||
|
a = tanh(x)
|
||
|
return 1 - (a * a)
|
||
|
|
||
|
class MultiLayerPerceptron(object):
|
||
|
|
||
|
@staticmethod
|
||
|
def relu(x):
|
||
|
ret = 0
|
||
|
#fixme should map to compare
|
||
|
if x > 0:
|
||
|
ret = x
|
||
|
elif type(x) is np.ndarray:
|
||
|
ret = np.zeros(x.shape)
|
||
|
return ret
|
||
|
|
||
|
@staticmethod
|
||
|
def deriv_relu(x):
|
||
|
ret = 0
|
||
|
if z < 0:
|
||
|
ret = 0.01
|
||
|
else:
|
||
|
ret = 1
|
||
|
|
||
|
@staticmethod
|
||
|
def leaky_relu(x):
|
||
|
ret = 0.01 * x
|
||
|
#fixme should map to compare
|
||
|
if x > 0:
|
||
|
ret = x
|
||
|
elif type(x) is np.ndarray:
|
||
|
ret = np.ones(x.shape)*0.01
|
||
|
return ret
|
||
|
|
||
|
functions = {
|
||
|
"sigmoid": {"function": sigmoid, "derivative": deriv_sigmoid},
|
||
|
"tanh": {"function": tanh, "derivative": deriv_tanh},
|
||
|
"relu": {"function": relu, "derivative": deriv_relu},
|
||
|
}
|
||
|
|
||
|
def __init__(self, L=1, n=None, g=None, alpha=0.01):
|
||
|
"""Initializes network geometry and parameters
|
||
|
:param L: number of layers including output and excluding input. Defaut 1.
|
||
|
:type L: int
|
||
|
:param n: list of number of units per layer including input. Default [2, 1].
|
||
|
:type n: list
|
||
|
:param g: list of activation functions name per layer excluding input.
|
||
|
Possible names are: "sigmoid", "tanh". Default ["sigmoid"].
|
||
|
:type g: list
|
||
|
:param alpha: learning rate. Default 0.01.
|
||
|
"""
|
||
|
w_rand_factor = 1
|
||
|
self._L = L
|
||
|
if n is None:
|
||
|
n = [2, 1]
|
||
|
self._n = n
|
||
|
if g is None:
|
||
|
g = [MultiLayerPerceptron.functions["sigmoid"]]
|
||
|
else:
|
||
|
g = [MultiLayerPerceptron.functions[fct] for fct in g]
|
||
|
self._g = [None] + g
|
||
|
self._W = [None] + [np.random.randn(n[l+1], n[l])*w_rand_factor for l in range(L)]
|
||
|
self._b = [None] + [np.zeros((n[l+1], 1)) for l in range(L)]
|
||
|
assert(len(self._g) == len(self._W))
|
||
|
assert(len(self._g) == len(self._b))
|
||
|
assert(len(self._g) == len(self._n))
|
||
|
self._A = None
|
||
|
self._X = None
|
||
|
self._Y = None
|
||
|
self._Z = None
|
||
|
self._m = 0
|
||
|
self._alpha = alpha
|
||
|
|
||
|
def set_all_input_examples(self, X, m=1):
|
||
|
"""Set the input examples.
|
||
|
|
||
|
:param X: matrix of dimensions (n[0], m). Accepts also a list (len m) of lists (len n[0])
|
||
|
:param m: number of training examples.
|
||
|
:type m: int
|
||
|
"""
|
||
|
if type(X) is list:
|
||
|
assert(len(X) == m)
|
||
|
self._X = np.matrix(X).T
|
||
|
else:
|
||
|
assert(X.shape == (self._n[0], self._m))
|
||
|
self._X = X
|
||
|
self._m = m
|
||
|
assert((self._m == m) or (self._m == 0))
|
||
|
self._m = m
|
||
|
|
||
|
def set_all_expected_output_examples(self, Y, m=1):
|
||
|
"""Set the output examples
|
||
|
|
||
|
:param Y: matrix of dimensions (n[L], m). Accepts also a list (len m) of lists (len n[L])
|
||
|
:param m: number of training examples.
|
||
|
:type m: int
|
||
|
"""
|
||
|
if type(Y) is list:
|
||
|
assert(len(Y) == m)
|
||
|
self._Y = np.matrix(Y).T
|
||
|
else:
|
||
|
assert(Y.shape == (self._n[self._L], self._m))
|
||
|
self._Y = Y
|
||
|
assert((self._m == m) or (self._m == 0))
|
||
|
self._m = m
|
||
|
|
||
|
def set_all_training_examples(self, X, Y, m=1):
|
||
|
"""Set all training examples
|
||
|
|
||
|
:param X: matrix of dimensions (n[0], m). Accepts also a list (len m) of lists (len n[0])
|
||
|
:param Y: matrix of dimensions (n[L], m). Accepts also a list (len m) of lists (len n[L])
|
||
|
:param m: number of training examples.
|
||
|
:type m: int
|
||
|
"""
|
||
|
self._m = m
|
||
|
self.set_all_input_examples(X, m)
|
||
|
self.set_all_expected_output_examples(Y, m)
|
||
|
|
||
|
def prepare(self):
|
||
|
"""Prepare network"""
|
||
|
assert(self._X is not None)
|
||
|
assert(self._m > 0)
|
||
|
m = self._m
|
||
|
self._A = [self._X]
|
||
|
self._A += [np.empty((self._n[l+1], m)) for l in range(self._L)]
|
||
|
self._Z = [None] + [np.empty((self._n[l+1], m)) for l in range(self._L)]
|
||
|
|
||
|
def propagate(self):
|
||
|
"""Forward propagation
|
||
|
|
||
|
:return: matrix of computed outputs (n[L], m)
|
||
|
"""
|
||
|
for l0 in range(self._L):
|
||
|
l = l0 + 1
|
||
|
self._Z[l] = np.dot(self._W[l], self._A[l-1]) + self._b[l]
|
||
|
self._A[l] = self._g[l]["function"](self._Z[l])
|
||
|
return self._A[self._L]
|
||
|
|
||
|
def get_output(self):
|
||
|
return self._A[self._L]
|
||
|
|
||
|
def back_propagation(self, get_cost_function=False):
|
||
|
"""Back propagation
|
||
|
|
||
|
:param get_cost_function: if True the cost function J
|
||
|
will be computed and returned.
|
||
|
J = -1/m((Y(A.T)) + (1-Y)(A.T))
|
||
|
:return: the cost function if get_cost_function==True else None
|
||
|
"""
|
||
|
J = None
|
||
|
l = self._L
|
||
|
m = self._m
|
||
|
dW = [None] + [None] * self._L
|
||
|
db = [None] + [None] * self._L
|
||
|
dA = [None] + [None] * self._L
|
||
|
dA[l] = -self._Y/self._A[l] + ((1-self._Y)/(1-self._A[l]))
|
||
|
if get_cost_function:
|
||
|
J = -1/m * ( np.dot(self._Y, np.log(self._A[l]).T) + \
|
||
|
np.dot((1 - self._Y), np.log(1-self._A[l]).T) )
|
||
|
|
||
|
#dZ = self._A[l] - self._Y
|
||
|
for l in range(self._L, 0, -1):
|
||
|
dZ = dA[l] * self._g[l]["derivative"](self._Z[l])
|
||
|
dW[l] = 1/self._m * np.dot(dZ, self._A[l-1].T)
|
||
|
db[l] = 1/m * np.sum(dZ, axis=1, keepdims=True)
|
||
|
dA[l-1] = np.dot(self._W[l].T, dZ)
|
||
|
# dZ = np.dot(self._W[l+1].T, dZ) * self._g[l]["derivative"](self._Z[l])
|
||
|
# dW[l] = 1/m * np.dot(dZ, self._A[l-1].T)
|
||
|
# db[l] = 1/m * np.sum(dZ, axis=1, keepdims=True)
|
||
|
for l in range(self._L, 0, -1):
|
||
|
self._W[l] = self._W[l] - self._alpha * dW[l]
|
||
|
self._b[l] = self._b[l] - self._alpha * db[l]
|
||
|
|
||
|
return J
|
||
|
|
||
|
def minimize_cost(self, min_cost, max_iter=100000, alpha=None):
|
||
|
"""Propagate forward then backward in loop while minimizing the cost function.
|
||
|
|
||
|
:param min_cost: cost function value to reach in order to stop algo.
|
||
|
:param max_iter: maximum number of iterations to reach min cost befor stoping algo. (Default 100000).
|
||
|
:param alpha: learning rate, if None use the instance alpha value. Default None.
|
||
|
|
||
|
"""
|
||
|
nb_iter = 0
|
||
|
if alpha is None:
|
||
|
alpha = self._alpha
|
||
|
self.propagate()
|
||
|
for i in range(max_iter):
|
||
|
J = self.back_propagation(True)
|
||
|
self.propagate()
|
||
|
nb_iter = i + 1
|
||
|
if J <= min_cost:
|
||
|
break
|
||
|
return {"iterations": nb_iter, "cost_function": J}
|
||
|
|
||
|
def learning(self, X, Y, m, min_cost=0.05, max_iter=100000, alpha=None):
|
||
|
"""Tune parameters in order to learn examples by propagate and backpropagate.
|
||
|
|
||
|
:param X: the inputs training examples
|
||
|
:param Y: the expected outputs training examples
|
||
|
:param m: the number of examples
|
||
|
:param min_cost: cost function value to reach in order to stop algo. Default 0.0.5
|
||
|
:param max_iter: maximum number of iterations to reach min cost befor stoping algo. (Default 100000).
|
||
|
:param alpha: learning rate, if None use the instance alpha value. Default None.
|
||
|
|
||
|
"""
|
||
|
self.set_all_training_examples(X, Y, m)
|
||
|
self.prepare()
|
||
|
res = self.minimize_cost(min_cost, max_iter, alpha)
|
||
|
return res
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
mlp = MultiLayerPerceptron(L=2, n=[2, 3, 1], g=["tanh", "sigmoid"], alpha=2)
|
||
|
#mlp = MultiLayerPerceptron(L=1, n=[2, 1], g=["sigmoid"], alpha=0.1)
|
||
|
|
||
|
X = np.array([[0, 0],
|
||
|
[0, 1],
|
||
|
[1, 0],
|
||
|
[1, 1]])
|
||
|
|
||
|
Y = np.array([[0],
|
||
|
[1],
|
||
|
[1],
|
||
|
[0]])
|
||
|
|
||
|
res = mlp.learning(X.T, Y.T, 4)
|
||
|
print(res)
|
||
|
print(mlp.get_output())
|
||
|
|