update jupyter file

This commit is contained in:
dadel 2018-01-01 20:46:31 +01:00
parent c04c6c307c
commit c97f9d6676
1 changed files with 301 additions and 2 deletions

301
mlp.ipynb
View File

@ -1,6 +1,305 @@
{
"cells": [],
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"def sigmoid(x):\n",
" return 1/(1+np.exp(-x))\n",
"\n",
"def deriv_sigmoid(x):\n",
" a = sigmoid(x)\n",
" return a * (1 - a)\n",
"\n",
"def tanh(x):\n",
" ep = np.exp(x)\n",
" en = np.exp(-x)\n",
" return (ep - en)/(ep + en)\n",
"\n",
"def deriv_tanh(x):\n",
" a = tanh(x)\n",
" return 1 - (a * a)\n",
"\n",
"class MultiLayerPerceptron(object):\n",
"\n",
" @staticmethod\n",
" def relu(x):\n",
" ret = 0\n",
" #fixme should map to compare\n",
" if x > 0:\n",
" ret = x\n",
" elif type(x) is np.ndarray:\n",
" ret = np.zeros(x.shape)\n",
" return ret\n",
"\n",
" @staticmethod\n",
" def deriv_relu(x):\n",
" ret = 0\n",
" if z < 0:\n",
" ret = 0.01\n",
" else:\n",
" ret = 1\n",
"\n",
" @staticmethod\n",
" def leaky_relu(x):\n",
" ret = 0.01 * x\n",
" #fixme should map to compare\n",
" if x > 0:\n",
" ret = x\n",
" elif type(x) is np.ndarray:\n",
" ret = np.ones(x.shape)*0.01\n",
" return ret\n",
"\n",
" functions = {\n",
" \"sigmoid\": {\"function\": sigmoid, \"derivative\": deriv_sigmoid},\n",
" \"tanh\": {\"function\": tanh, \"derivative\": deriv_tanh},\n",
" \"relu\": {\"function\": relu, \"derivative\": deriv_relu},\n",
" }\n",
"\n",
" def __init__(self, L=1, n=None, g=None, alpha=0.01):\n",
" \"\"\"Initializes network geometry and parameters\n",
" :param L: number of layers including output and excluding input. Defaut 1.\n",
" :type L: int\n",
" :param n: list of number of units per layer including input. Default [2, 1].\n",
" :type n: list\n",
" :param g: list of activation functions name per layer excluding input.\n",
" Possible names are: \"sigmoid\", \"tanh\". Default [\"sigmoid\"].\n",
" :type g: list\n",
" :param alpha: learning rate. Default 0.01.\n",
" \"\"\"\n",
" w_rand_factor = 1\n",
" self._L = L\n",
" if n is None:\n",
" n = [2, 1]\n",
" self._n = n\n",
" if g is None:\n",
" g = [MultiLayerPerceptron.functions[\"sigmoid\"]]\n",
" else:\n",
" g = [MultiLayerPerceptron.functions[fct] for fct in g]\n",
" self._g = [None] + g\n",
" self._W = [None] + [np.random.randn(n[l+1], n[l])*w_rand_factor for l in range(L)]\n",
" self._b = [None] + [np.zeros((n[l+1], 1)) for l in range(L)]\n",
" assert(len(self._g) == len(self._W))\n",
" assert(len(self._g) == len(self._b))\n",
" assert(len(self._g) == len(self._n))\n",
" self._A = None\n",
" self._X = None\n",
" self._Y = None\n",
" self._Z = None\n",
" self._m = 0\n",
" self._alpha = alpha\n",
"\n",
" def set_all_input_examples(self, X, m=1):\n",
" \"\"\"Set the input examples.\n",
"\n",
" :param X: matrix of dimensions (n[0], m). Accepts also a list (len m) of lists (len n[0])\n",
" :param m: number of training examples.\n",
" :type m: int\n",
" \"\"\"\n",
" if type(X) is list:\n",
" assert(len(X) == m)\n",
" self._X = np.matrix(X).T\n",
" else:\n",
" assert(X.shape == (self._n[0], self._m))\n",
" self._X = X\n",
" self._m = m\n",
" assert((self._m == m) or (self._m == 0))\n",
" self._m = m\n",
"\n",
" def set_all_expected_output_examples(self, Y, m=1):\n",
" \"\"\"Set the output examples\n",
"\n",
" :param Y: matrix of dimensions (n[L], m). Accepts also a list (len m) of lists (len n[L])\n",
" :param m: number of training examples.\n",
" :type m: int\n",
" \"\"\"\n",
" if type(Y) is list:\n",
" assert(len(Y) == m)\n",
" self._Y = np.matrix(Y).T\n",
" else:\n",
" assert(Y.shape == (self._n[self._L], self._m))\n",
" self._Y = Y\n",
" assert((self._m == m) or (self._m == 0))\n",
" self._m = m\n",
"\n",
" def set_all_training_examples(self, X, Y, m=1):\n",
" \"\"\"Set all training examples\n",
"\n",
" :param X: matrix of dimensions (n[0], m). Accepts also a list (len m) of lists (len n[0])\n",
" :param Y: matrix of dimensions (n[L], m). Accepts also a list (len m) of lists (len n[L])\n",
" :param m: number of training examples.\n",
" :type m: int\n",
" \"\"\"\n",
" self._m = m\n",
" self.set_all_input_examples(X, m)\n",
" self.set_all_expected_output_examples(Y, m)\n",
"\n",
" def prepare(self):\n",
" \"\"\"Prepare network\"\"\"\n",
" assert(self._X is not None)\n",
" assert(self._m > 0)\n",
" m = self._m\n",
" self._A = [self._X]\n",
" self._A += [np.empty((self._n[l+1], m)) for l in range(self._L)]\n",
" self._Z = [None] + [np.empty((self._n[l+1], m)) for l in range(self._L)]\n",
"\n",
" def propagate(self):\n",
" \"\"\"Forward propagation\n",
"\n",
" :return: matrix of computed outputs (n[L], m)\n",
" \"\"\"\n",
" for l0 in range(self._L):\n",
" l = l0 + 1\n",
" self._Z[l] = np.dot(self._W[l], self._A[l-1]) + self._b[l]\n",
" self._A[l] = self._g[l][\"function\"](self._Z[l])\n",
" return self._A[self._L]\n",
"\n",
" def get_output(self):\n",
" return self._A[self._L]\n",
"\n",
" def back_propagation(self, get_cost_function=False):\n",
" \"\"\"Back propagation\n",
"\n",
" :param get_cost_function: if True the cost function J\n",
" will be computed and returned.\n",
" J = -1/m((Y(A.T)) + (1-Y)(A.T))\n",
" :return: the cost function if get_cost_function==True else None\n",
" \"\"\"\n",
" J = None\n",
" l = self._L\n",
" m = self._m\n",
" dW = [None] + [None] * self._L\n",
" db = [None] + [None] * self._L\n",
" dA = [None] + [None] * self._L\n",
" dA[l] = -self._Y/self._A[l] + ((1-self._Y)/(1-self._A[l]))\n",
" if get_cost_function:\n",
" J = -1/m * ( np.dot(self._Y, np.log(self._A[l]).T) + \\\n",
" np.dot((1 - self._Y), np.log(1-self._A[l]).T) )\n",
"\n",
" #dZ = self._A[l] - self._Y\n",
" for l in range(self._L, 0, -1):\n",
" dZ = dA[l] * self._g[l][\"derivative\"](self._Z[l])\n",
" dW[l] = 1/self._m * np.dot(dZ, self._A[l-1].T)\n",
" db[l] = 1/m * np.sum(dZ, axis=1, keepdims=True)\n",
" dA[l-1] = np.dot(self._W[l].T, dZ)\n",
"# dZ = np.dot(self._W[l+1].T, dZ) * self._g[l][\"derivative\"](self._Z[l])\n",
"# dW[l] = 1/m * np.dot(dZ, self._A[l-1].T)\n",
"# db[l] = 1/m * np.sum(dZ, axis=1, keepdims=True)\n",
" for l in range(self._L, 0, -1):\n",
" self._W[l] = self._W[l] - self._alpha * dW[l]\n",
" self._b[l] = self._b[l] - self._alpha * db[l]\n",
"\n",
" return J\n",
"\n",
" def minimize_cost(self, min_cost, max_iter=100000, alpha=None):\n",
" \"\"\"Propagate forward then backward in loop while minimizing the cost function.\n",
"\n",
" :param min_cost: cost function value to reach in order to stop algo.\n",
" :param max_iter: maximum number of iterations to reach min cost befor stoping algo. (Default 100000).\n",
" :param alpha: learning rate, if None use the instance alpha value. Default None.\n",
"\n",
" \"\"\"\n",
" nb_iter = 0\n",
" if alpha is None:\n",
" alpha = self._alpha\n",
" self.propagate()\n",
" for i in range(max_iter):\n",
" J = self.back_propagation(True)\n",
" self.propagate()\n",
" nb_iter = i + 1\n",
" if J <= min_cost:\n",
" break\n",
" return {\"iterations\": nb_iter, \"cost_function\": J}\n",
"\n",
" def learning(self, X, Y, m, min_cost=0.05, max_iter=100000, alpha=None):\n",
" \"\"\"Tune parameters in order to learn examples by propagate and backpropagate.\n",
"\n",
" :param X: the inputs training examples\n",
" :param Y: the expected outputs training examples\n",
" :param m: the number of examples\n",
" :param min_cost: cost function value to reach in order to stop algo. Default 0.0.5\n",
" :param max_iter: maximum number of iterations to reach min cost befor stoping algo. (Default 100000).\n",
" :param alpha: learning rate, if None use the instance alpha value. Default None.\n",
"\n",
" \"\"\"\n",
" self.set_all_training_examples(X, Y, m)\n",
" self.prepare()\n",
" res = self.minimize_cost(min_cost, max_iter, alpha)\n",
" return res\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'iterations': 62, 'cost_function': array([[ 0.04879932]])}\n",
"[[ 0.03621064 0.94089041 0.94055051 0.03022811]]\n"
]
}
],
"source": [
"mlp = MultiLayerPerceptron(L=2, n=[2, 2, 1], g=[\"tanh\", \"sigmoid\"], alpha=2)\n",
"#mlp = MultiLayerPerceptron(L=1, n=[2, 1], g=[\"sigmoid\"], alpha=0.1)\n",
"\n",
"X = np.array([[0, 0],\n",
" [0, 1],\n",
" [1, 0],\n",
" [1, 1]])\n",
"\n",
"Y = np.array([[0],\n",
" [1],\n",
" [1],\n",
" [0]])\n",
"\n",
"res = mlp.learning(X.T, Y.T, 4)\n",
"print(res)\n",
"print(mlp.get_output())\n",
"#mlp.set_all_training_examples(X.T, Y.T, 4)\n",
"#mlp.prepare()\n",
"#print(mlp.propagate())\n",
"#for i in range(100):\n",
"# print(mlp.back_propagation())\n",
"# mlp.propagate()\n",
"#print(mlp.propagate())\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}