update jupyter file

2018-01-01 20:46:31 +01:00 · 2018-01-01 20:46:31 +01:00 · c97f9d6676
commit c97f9d6676
parent c04c6c307c
1 changed files with 301 additions and 2 deletions
--- a/mlp.ipynb
+++ b/mlp.ipynb
@ -1,6 +1,305 @@
 {
- "cells": [],
- "metadata": {},
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "def sigmoid(x):\n",
+    "    return 1/(1+np.exp(-x))\n",
+    "\n",
+    "def deriv_sigmoid(x):\n",
+    "    a = sigmoid(x)\n",
+    "    return a * (1 - a)\n",
+    "\n",
+    "def tanh(x):\n",
+    "    ep = np.exp(x)\n",
+    "    en = np.exp(-x)\n",
+    "    return (ep - en)/(ep + en)\n",
+    "\n",
+    "def deriv_tanh(x):\n",
+    "    a = tanh(x)\n",
+    "    return 1 - (a * a)\n",
+    "\n",
+    "class MultiLayerPerceptron(object):\n",
+    "\n",
+    "    @staticmethod\n",
+    "    def relu(x):\n",
+    "        ret = 0\n",
+    "        #fixme should map to compare\n",
+    "        if x > 0:\n",
+    "            ret = x\n",
+    "        elif type(x) is np.ndarray:\n",
+    "            ret = np.zeros(x.shape)\n",
+    "        return ret\n",
+    "\n",
+    "    @staticmethod\n",
+    "    def deriv_relu(x):\n",
+    "        ret = 0\n",
+    "        if z < 0:\n",
+    "            ret = 0.01\n",
+    "        else:\n",
+    "            ret = 1\n",
+    "\n",
+    "    @staticmethod\n",
+    "    def leaky_relu(x):\n",
+    "        ret = 0.01 * x\n",
+    "        #fixme should map to compare\n",
+    "        if x > 0:\n",
+    "            ret = x\n",
+    "        elif type(x) is np.ndarray:\n",
+    "            ret = np.ones(x.shape)*0.01\n",
+    "        return ret\n",
+    "\n",
+    "    functions = {\n",
+    "        \"sigmoid\": {\"function\": sigmoid, \"derivative\": deriv_sigmoid},\n",
+    "        \"tanh\": {\"function\": tanh, \"derivative\": deriv_tanh},\n",
+    "        \"relu\": {\"function\": relu, \"derivative\": deriv_relu},\n",
+    "    }\n",
+    "\n",
+    "    def __init__(self, L=1, n=None, g=None, alpha=0.01):\n",
+    "        \"\"\"Initializes network geometry and parameters\n",
+    "        :param L: number of layers including output and excluding input. Defaut 1.\n",
+    "        :type L: int\n",
+    "        :param n: list of number of units per layer including input. Default [2, 1].\n",
+    "        :type n: list\n",
+    "        :param g: list of activation functions name per layer excluding input.\n",
+    "            Possible names are: \"sigmoid\", \"tanh\". Default [\"sigmoid\"].\n",
+    "        :type g: list\n",
+    "        :param alpha: learning rate. Default 0.01.\n",
+    "        \"\"\"\n",
+    "        w_rand_factor = 1\n",
+    "        self._L = L\n",
+    "        if n is None:\n",
+    "            n = [2, 1]\n",
+    "        self._n = n\n",
+    "        if g is None:\n",
+    "            g = [MultiLayerPerceptron.functions[\"sigmoid\"]]\n",
+    "        else:\n",
+    "            g = [MultiLayerPerceptron.functions[fct] for fct in g]\n",
+    "        self._g = [None] + g\n",
+    "        self._W = [None] + [np.random.randn(n[l+1], n[l])*w_rand_factor for l in range(L)]\n",
+    "        self._b = [None] + [np.zeros((n[l+1], 1)) for l in range(L)]\n",
+    "        assert(len(self._g) == len(self._W))\n",
+    "        assert(len(self._g) == len(self._b))\n",
+    "        assert(len(self._g) == len(self._n))\n",
+    "        self._A = None\n",
+    "        self._X = None\n",
+    "        self._Y = None\n",
+    "        self._Z = None\n",
+    "        self._m = 0\n",
+    "        self._alpha = alpha\n",
+    "\n",
+    "    def set_all_input_examples(self, X, m=1):\n",
+    "        \"\"\"Set the input examples.\n",
+    "\n",
+    "        :param X: matrix of dimensions (n[0], m). Accepts also a list (len m) of lists (len n[0])\n",
+    "        :param m: number of training examples.\n",
+    "        :type m: int\n",
+    "        \"\"\"\n",
+    "        if type(X) is list:\n",
+    "            assert(len(X) == m)\n",
+    "            self._X = np.matrix(X).T\n",
+    "        else:\n",
+    "            assert(X.shape == (self._n[0], self._m))\n",
+    "            self._X = X\n",
+    "        self._m = m\n",
+    "        assert((self._m == m) or (self._m == 0))\n",
+    "        self._m = m\n",
+    "\n",
+    "    def set_all_expected_output_examples(self, Y, m=1):\n",
+    "        \"\"\"Set the output examples\n",
+    "\n",
+    "        :param Y: matrix of dimensions (n[L], m). Accepts also a list (len m) of lists (len n[L])\n",
+    "        :param m: number of training examples.\n",
+    "        :type m: int\n",
+    "        \"\"\"\n",
+    "        if type(Y) is list:\n",
+    "            assert(len(Y) == m)\n",
+    "            self._Y = np.matrix(Y).T\n",
+    "        else:\n",
+    "            assert(Y.shape == (self._n[self._L], self._m))\n",
+    "            self._Y = Y\n",
+    "        assert((self._m == m) or (self._m == 0))\n",
+    "        self._m = m\n",
+    "\n",
+    "    def set_all_training_examples(self, X, Y, m=1):\n",
+    "        \"\"\"Set all training examples\n",
+    "\n",
+    "        :param X: matrix of dimensions (n[0], m). Accepts also a list (len m) of lists (len n[0])\n",
+    "        :param Y: matrix of dimensions (n[L], m). Accepts also a list (len m) of lists (len n[L])\n",
+    "        :param m: number of training examples.\n",
+    "        :type m: int\n",
+    "        \"\"\"\n",
+    "        self._m = m\n",
+    "        self.set_all_input_examples(X, m)\n",
+    "        self.set_all_expected_output_examples(Y, m)\n",
+    "\n",
+    "    def prepare(self):\n",
+    "        \"\"\"Prepare network\"\"\"\n",
+    "        assert(self._X is not None)\n",
+    "        assert(self._m > 0)\n",
+    "        m = self._m\n",
+    "        self._A = [self._X]\n",
+    "        self._A += [np.empty((self._n[l+1], m)) for l in range(self._L)]\n",
+    "        self._Z = [None] + [np.empty((self._n[l+1], m)) for l in range(self._L)]\n",
+    "\n",
+    "    def propagate(self):\n",
+    "        \"\"\"Forward propagation\n",
+    "\n",
+    "        :return: matrix of computed outputs (n[L], m)\n",
+    "        \"\"\"\n",
+    "        for l0 in range(self._L):\n",
+    "            l = l0 + 1\n",
+    "            self._Z[l] = np.dot(self._W[l], self._A[l-1]) + self._b[l]\n",
+    "            self._A[l] = self._g[l][\"function\"](self._Z[l])\n",
+    "        return self._A[self._L]\n",
+    "\n",
+    "    def get_output(self):\n",
+    "        return self._A[self._L]\n",
+    "\n",
+    "    def back_propagation(self, get_cost_function=False):\n",
+    "        \"\"\"Back propagation\n",
+    "\n",
+    "        :param get_cost_function: if True the cost function J\n",
+    "            will be computed and returned.\n",
+    "            J = -1/m((Y(A.T)) + (1-Y)(A.T))\n",
+    "        :return: the cost function if get_cost_function==True else None\n",
+    "        \"\"\"\n",
+    "        J = None\n",
+    "        l = self._L\n",
+    "        m = self._m\n",
+    "        dW = [None] + [None] * self._L\n",
+    "        db = [None] + [None] * self._L\n",
+    "        dA = [None] + [None] * self._L\n",
+    "        dA[l] = -self._Y/self._A[l] + ((1-self._Y)/(1-self._A[l]))\n",
+    "        if get_cost_function:\n",
+    "            J = -1/m * ( np.dot(self._Y, np.log(self._A[l]).T) + \\\n",
+    "                         np.dot((1 - self._Y), np.log(1-self._A[l]).T) )\n",
+    "\n",
+    "        #dZ = self._A[l] - self._Y\n",
+    "        for l in range(self._L, 0, -1):\n",
+    "            dZ = dA[l] * self._g[l][\"derivative\"](self._Z[l])\n",
+    "            dW[l] = 1/self._m * np.dot(dZ, self._A[l-1].T)\n",
+    "            db[l] = 1/m * np.sum(dZ, axis=1, keepdims=True)\n",
+    "            dA[l-1] = np.dot(self._W[l].T, dZ)\n",
+    "#            dZ = np.dot(self._W[l+1].T, dZ) * self._g[l][\"derivative\"](self._Z[l])\n",
+    "#            dW[l] = 1/m * np.dot(dZ, self._A[l-1].T)\n",
+    "#            db[l] = 1/m * np.sum(dZ, axis=1, keepdims=True)\n",
+    "        for l in range(self._L, 0, -1):\n",
+    "            self._W[l] = self._W[l] - self._alpha * dW[l]\n",
+    "            self._b[l] = self._b[l] - self._alpha * db[l]\n",
+    "\n",
+    "        return J\n",
+    "\n",
+    "    def minimize_cost(self, min_cost, max_iter=100000, alpha=None):\n",
+    "        \"\"\"Propagate forward then backward in loop while minimizing the cost function.\n",
+    "\n",
+    "        :param min_cost: cost function value to reach in order to stop algo.\n",
+    "        :param max_iter: maximum number of iterations to reach min cost befor stoping algo. (Default 100000).\n",
+    "        :param alpha: learning rate, if None use the instance alpha value. Default None.\n",
+    "\n",
+    "        \"\"\"\n",
+    "        nb_iter = 0\n",
+    "        if alpha is None:\n",
+    "            alpha = self._alpha\n",
+    "        self.propagate()\n",
+    "        for i in range(max_iter):\n",
+    "            J = self.back_propagation(True)\n",
+    "            self.propagate()\n",
+    "            nb_iter = i + 1\n",
+    "            if J <= min_cost:\n",
+    "                break\n",
+    "        return {\"iterations\": nb_iter, \"cost_function\": J}\n",
+    "\n",
+    "    def learning(self, X, Y, m, min_cost=0.05, max_iter=100000, alpha=None):\n",
+    "        \"\"\"Tune parameters in order to learn examples by propagate and backpropagate.\n",
+    "\n",
+    "        :param X: the inputs training examples\n",
+    "        :param Y: the expected outputs training examples\n",
+    "        :param m: the number of examples\n",
+    "        :param min_cost: cost function value to reach in order to stop algo. Default 0.0.5\n",
+    "        :param max_iter: maximum number of iterations to reach min cost befor stoping algo. (Default 100000).\n",
+    "        :param alpha: learning rate, if None use the instance alpha value. Default None.\n",
+    "\n",
+    "        \"\"\"\n",
+    "        self.set_all_training_examples(X, Y, m)\n",
+    "        self.prepare()\n",
+    "        res = self.minimize_cost(min_cost, max_iter, alpha)\n",
+    "        return res\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'iterations': 62, 'cost_function': array([[ 0.04879932]])}\n",
+      "[[ 0.03621064  0.94089041  0.94055051  0.03022811]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "mlp = MultiLayerPerceptron(L=2, n=[2, 2, 1], g=[\"tanh\", \"sigmoid\"], alpha=2)\n",
+    "#mlp = MultiLayerPerceptron(L=1, n=[2, 1], g=[\"sigmoid\"], alpha=0.1)\n",
+    "\n",
+    "X = np.array([[0, 0],\n",
+    "              [0, 1],\n",
+    "              [1, 0],\n",
+    "              [1, 1]])\n",
+    "\n",
+    "Y = np.array([[0],\n",
+    "              [1],\n",
+    "              [1],\n",
+    "              [0]])\n",
+    "\n",
+    "res = mlp.learning(X.T, Y.T, 4)\n",
+    "print(res)\n",
+    "print(mlp.get_output())\n",
+    "#mlp.set_all_training_examples(X.T, Y.T, 4)\n",
+    "#mlp.prepare()\n",
+    "#print(mlp.propagate())\n",
+    "#for i in range(100):\n",
+    "#    print(mlp.back_propagation())\n",
+    "#    mlp.propagate()\n",
+    "#print(mlp.propagate())\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
 "nbformat": 4,
 "nbformat_minor": 2
 }