From c97f9d6676b6651e80cae8da79b521970be1c2c7 Mon Sep 17 00:00:00 2001 From: Adel Daouzli Date: Mon, 1 Jan 2018 20:46:31 +0100 Subject: [PATCH] update jupyter file --- mlp.ipynb | 303 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 301 insertions(+), 2 deletions(-) diff --git a/mlp.ipynb b/mlp.ipynb index 2fd6442..20bba3a 100644 --- a/mlp.ipynb +++ b/mlp.ipynb @@ -1,6 +1,305 @@ { - "cells": [], - "metadata": {}, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "def sigmoid(x):\n", + " return 1/(1+np.exp(-x))\n", + "\n", + "def deriv_sigmoid(x):\n", + " a = sigmoid(x)\n", + " return a * (1 - a)\n", + "\n", + "def tanh(x):\n", + " ep = np.exp(x)\n", + " en = np.exp(-x)\n", + " return (ep - en)/(ep + en)\n", + "\n", + "def deriv_tanh(x):\n", + " a = tanh(x)\n", + " return 1 - (a * a)\n", + "\n", + "class MultiLayerPerceptron(object):\n", + "\n", + " @staticmethod\n", + " def relu(x):\n", + " ret = 0\n", + " #fixme should map to compare\n", + " if x > 0:\n", + " ret = x\n", + " elif type(x) is np.ndarray:\n", + " ret = np.zeros(x.shape)\n", + " return ret\n", + "\n", + " @staticmethod\n", + " def deriv_relu(x):\n", + " ret = 0\n", + " if z < 0:\n", + " ret = 0.01\n", + " else:\n", + " ret = 1\n", + "\n", + " @staticmethod\n", + " def leaky_relu(x):\n", + " ret = 0.01 * x\n", + " #fixme should map to compare\n", + " if x > 0:\n", + " ret = x\n", + " elif type(x) is np.ndarray:\n", + " ret = np.ones(x.shape)*0.01\n", + " return ret\n", + "\n", + " functions = {\n", + " \"sigmoid\": {\"function\": sigmoid, \"derivative\": deriv_sigmoid},\n", + " \"tanh\": {\"function\": tanh, \"derivative\": deriv_tanh},\n", + " \"relu\": {\"function\": relu, \"derivative\": deriv_relu},\n", + " }\n", + "\n", + " def __init__(self, L=1, n=None, g=None, alpha=0.01):\n", + " \"\"\"Initializes network geometry and parameters\n", + " :param L: number of layers including output and excluding input. Defaut 1.\n", + " :type L: int\n", + " :param n: list of number of units per layer including input. Default [2, 1].\n", + " :type n: list\n", + " :param g: list of activation functions name per layer excluding input.\n", + " Possible names are: \"sigmoid\", \"tanh\". Default [\"sigmoid\"].\n", + " :type g: list\n", + " :param alpha: learning rate. Default 0.01.\n", + " \"\"\"\n", + " w_rand_factor = 1\n", + " self._L = L\n", + " if n is None:\n", + " n = [2, 1]\n", + " self._n = n\n", + " if g is None:\n", + " g = [MultiLayerPerceptron.functions[\"sigmoid\"]]\n", + " else:\n", + " g = [MultiLayerPerceptron.functions[fct] for fct in g]\n", + " self._g = [None] + g\n", + " self._W = [None] + [np.random.randn(n[l+1], n[l])*w_rand_factor for l in range(L)]\n", + " self._b = [None] + [np.zeros((n[l+1], 1)) for l in range(L)]\n", + " assert(len(self._g) == len(self._W))\n", + " assert(len(self._g) == len(self._b))\n", + " assert(len(self._g) == len(self._n))\n", + " self._A = None\n", + " self._X = None\n", + " self._Y = None\n", + " self._Z = None\n", + " self._m = 0\n", + " self._alpha = alpha\n", + "\n", + " def set_all_input_examples(self, X, m=1):\n", + " \"\"\"Set the input examples.\n", + "\n", + " :param X: matrix of dimensions (n[0], m). Accepts also a list (len m) of lists (len n[0])\n", + " :param m: number of training examples.\n", + " :type m: int\n", + " \"\"\"\n", + " if type(X) is list:\n", + " assert(len(X) == m)\n", + " self._X = np.matrix(X).T\n", + " else:\n", + " assert(X.shape == (self._n[0], self._m))\n", + " self._X = X\n", + " self._m = m\n", + " assert((self._m == m) or (self._m == 0))\n", + " self._m = m\n", + "\n", + " def set_all_expected_output_examples(self, Y, m=1):\n", + " \"\"\"Set the output examples\n", + "\n", + " :param Y: matrix of dimensions (n[L], m). Accepts also a list (len m) of lists (len n[L])\n", + " :param m: number of training examples.\n", + " :type m: int\n", + " \"\"\"\n", + " if type(Y) is list:\n", + " assert(len(Y) == m)\n", + " self._Y = np.matrix(Y).T\n", + " else:\n", + " assert(Y.shape == (self._n[self._L], self._m))\n", + " self._Y = Y\n", + " assert((self._m == m) or (self._m == 0))\n", + " self._m = m\n", + "\n", + " def set_all_training_examples(self, X, Y, m=1):\n", + " \"\"\"Set all training examples\n", + "\n", + " :param X: matrix of dimensions (n[0], m). Accepts also a list (len m) of lists (len n[0])\n", + " :param Y: matrix of dimensions (n[L], m). Accepts also a list (len m) of lists (len n[L])\n", + " :param m: number of training examples.\n", + " :type m: int\n", + " \"\"\"\n", + " self._m = m\n", + " self.set_all_input_examples(X, m)\n", + " self.set_all_expected_output_examples(Y, m)\n", + "\n", + " def prepare(self):\n", + " \"\"\"Prepare network\"\"\"\n", + " assert(self._X is not None)\n", + " assert(self._m > 0)\n", + " m = self._m\n", + " self._A = [self._X]\n", + " self._A += [np.empty((self._n[l+1], m)) for l in range(self._L)]\n", + " self._Z = [None] + [np.empty((self._n[l+1], m)) for l in range(self._L)]\n", + "\n", + " def propagate(self):\n", + " \"\"\"Forward propagation\n", + "\n", + " :return: matrix of computed outputs (n[L], m)\n", + " \"\"\"\n", + " for l0 in range(self._L):\n", + " l = l0 + 1\n", + " self._Z[l] = np.dot(self._W[l], self._A[l-1]) + self._b[l]\n", + " self._A[l] = self._g[l][\"function\"](self._Z[l])\n", + " return self._A[self._L]\n", + "\n", + " def get_output(self):\n", + " return self._A[self._L]\n", + "\n", + " def back_propagation(self, get_cost_function=False):\n", + " \"\"\"Back propagation\n", + "\n", + " :param get_cost_function: if True the cost function J\n", + " will be computed and returned.\n", + " J = -1/m((Y(A.T)) + (1-Y)(A.T))\n", + " :return: the cost function if get_cost_function==True else None\n", + " \"\"\"\n", + " J = None\n", + " l = self._L\n", + " m = self._m\n", + " dW = [None] + [None] * self._L\n", + " db = [None] + [None] * self._L\n", + " dA = [None] + [None] * self._L\n", + " dA[l] = -self._Y/self._A[l] + ((1-self._Y)/(1-self._A[l]))\n", + " if get_cost_function:\n", + " J = -1/m * ( np.dot(self._Y, np.log(self._A[l]).T) + \\\n", + " np.dot((1 - self._Y), np.log(1-self._A[l]).T) )\n", + "\n", + " #dZ = self._A[l] - self._Y\n", + " for l in range(self._L, 0, -1):\n", + " dZ = dA[l] * self._g[l][\"derivative\"](self._Z[l])\n", + " dW[l] = 1/self._m * np.dot(dZ, self._A[l-1].T)\n", + " db[l] = 1/m * np.sum(dZ, axis=1, keepdims=True)\n", + " dA[l-1] = np.dot(self._W[l].T, dZ)\n", + "# dZ = np.dot(self._W[l+1].T, dZ) * self._g[l][\"derivative\"](self._Z[l])\n", + "# dW[l] = 1/m * np.dot(dZ, self._A[l-1].T)\n", + "# db[l] = 1/m * np.sum(dZ, axis=1, keepdims=True)\n", + " for l in range(self._L, 0, -1):\n", + " self._W[l] = self._W[l] - self._alpha * dW[l]\n", + " self._b[l] = self._b[l] - self._alpha * db[l]\n", + "\n", + " return J\n", + "\n", + " def minimize_cost(self, min_cost, max_iter=100000, alpha=None):\n", + " \"\"\"Propagate forward then backward in loop while minimizing the cost function.\n", + "\n", + " :param min_cost: cost function value to reach in order to stop algo.\n", + " :param max_iter: maximum number of iterations to reach min cost befor stoping algo. (Default 100000).\n", + " :param alpha: learning rate, if None use the instance alpha value. Default None.\n", + "\n", + " \"\"\"\n", + " nb_iter = 0\n", + " if alpha is None:\n", + " alpha = self._alpha\n", + " self.propagate()\n", + " for i in range(max_iter):\n", + " J = self.back_propagation(True)\n", + " self.propagate()\n", + " nb_iter = i + 1\n", + " if J <= min_cost:\n", + " break\n", + " return {\"iterations\": nb_iter, \"cost_function\": J}\n", + "\n", + " def learning(self, X, Y, m, min_cost=0.05, max_iter=100000, alpha=None):\n", + " \"\"\"Tune parameters in order to learn examples by propagate and backpropagate.\n", + "\n", + " :param X: the inputs training examples\n", + " :param Y: the expected outputs training examples\n", + " :param m: the number of examples\n", + " :param min_cost: cost function value to reach in order to stop algo. Default 0.0.5\n", + " :param max_iter: maximum number of iterations to reach min cost befor stoping algo. (Default 100000).\n", + " :param alpha: learning rate, if None use the instance alpha value. Default None.\n", + "\n", + " \"\"\"\n", + " self.set_all_training_examples(X, Y, m)\n", + " self.prepare()\n", + " res = self.minimize_cost(min_cost, max_iter, alpha)\n", + " return res\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'iterations': 62, 'cost_function': array([[ 0.04879932]])}\n", + "[[ 0.03621064 0.94089041 0.94055051 0.03022811]]\n" + ] + } + ], + "source": [ + "mlp = MultiLayerPerceptron(L=2, n=[2, 2, 1], g=[\"tanh\", \"sigmoid\"], alpha=2)\n", + "#mlp = MultiLayerPerceptron(L=1, n=[2, 1], g=[\"sigmoid\"], alpha=0.1)\n", + "\n", + "X = np.array([[0, 0],\n", + " [0, 1],\n", + " [1, 0],\n", + " [1, 1]])\n", + "\n", + "Y = np.array([[0],\n", + " [1],\n", + " [1],\n", + " [0]])\n", + "\n", + "res = mlp.learning(X.T, Y.T, 4)\n", + "print(res)\n", + "print(mlp.get_output())\n", + "#mlp.set_all_training_examples(X.T, Y.T, 4)\n", + "#mlp.prepare()\n", + "#print(mlp.propagate())\n", + "#for i in range(100):\n", + "# print(mlp.back_propagation())\n", + "# mlp.propagate()\n", + "#print(mlp.propagate())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, "nbformat": 4, "nbformat_minor": 2 }