MultiLayerPerceptron/mlp.ipynb

344 lines
25 KiB
Plaintext
Raw Normal View History

2018-01-01 20:33:05 +01:00
{
2018-01-01 20:46:31 +01:00
"cells": [
{
"cell_type": "code",
2018-01-03 14:43:31 +01:00
"execution_count": 108,
2018-01-01 20:46:31 +01:00
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
2018-01-03 14:43:31 +01:00
"try:\n",
" import matplotlib.pyplot as mp\n",
"except:\n",
" mp = None\n",
"\n",
2018-01-01 20:46:31 +01:00
"\n",
"def sigmoid(x):\n",
" return 1/(1+np.exp(-x))\n",
"\n",
2018-01-03 14:43:31 +01:00
"\n",
2018-01-01 20:46:31 +01:00
"def deriv_sigmoid(x):\n",
" a = sigmoid(x)\n",
" return a * (1 - a)\n",
"\n",
2018-01-03 14:43:31 +01:00
"\n",
2018-01-01 20:46:31 +01:00
"def tanh(x):\n",
" ep = np.exp(x)\n",
" en = np.exp(-x)\n",
" return (ep - en)/(ep + en)\n",
"\n",
2018-01-03 14:43:31 +01:00
"\n",
2018-01-01 20:46:31 +01:00
"def deriv_tanh(x):\n",
" a = tanh(x)\n",
" return 1 - (a * a)\n",
"\n",
"\n",
2018-01-03 14:43:31 +01:00
"def relu(x):\n",
" ret = 0\n",
" #fixme should map to compare\n",
" if x > 0:\n",
" ret = x\n",
" elif type(x) is np.ndarray:\n",
" ret = np.zeros(x.shape)\n",
" return ret\n",
"\n",
"def deriv_relu(x):\n",
" ret = 0\n",
" if z < 0:\n",
" ret = 0.01\n",
" else:\n",
" ret = 1\n",
"\n",
"def leaky_relu(x):\n",
" ret = 0.01 * x\n",
" #fixme should map to compare\n",
" if x > 0:\n",
" ret = x\n",
" elif type(x) is np.ndarray:\n",
" ret = np.ones(x.shape)*0.01\n",
" return ret\n",
"\n",
"\n",
"class MultiLayerPerceptron(object):\n",
2018-01-01 20:46:31 +01:00
"\n",
" functions = {\n",
" \"sigmoid\": {\"function\": sigmoid, \"derivative\": deriv_sigmoid},\n",
" \"tanh\": {\"function\": tanh, \"derivative\": deriv_tanh},\n",
" \"relu\": {\"function\": relu, \"derivative\": deriv_relu},\n",
" }\n",
"\n",
2018-01-03 14:43:31 +01:00
" def __init__(self, L=1, n=None, g=None, alpha=0.01, lambd=0):\n",
2018-01-01 20:46:31 +01:00
" \"\"\"Initializes network geometry and parameters\n",
" :param L: number of layers including output and excluding input. Defaut 1.\n",
" :type L: int\n",
" :param n: list of number of units per layer including input. Default [2, 1].\n",
" :type n: list\n",
" :param g: list of activation functions name per layer excluding input.\n",
" Possible names are: \"sigmoid\", \"tanh\". Default [\"sigmoid\"].\n",
" :type g: list\n",
" :param alpha: learning rate. Default 0.01.\n",
" \"\"\"\n",
" w_rand_factor = 1\n",
" self._L = L\n",
" if n is None:\n",
" n = [2, 1]\n",
" self._n = n\n",
" if g is None:\n",
" g = [MultiLayerPerceptron.functions[\"sigmoid\"]]\n",
" else:\n",
" g = [MultiLayerPerceptron.functions[fct] for fct in g]\n",
" self._g = [None] + g\n",
" self._W = [None] + [np.random.randn(n[l+1], n[l])*w_rand_factor for l in range(L)]\n",
" self._b = [None] + [np.zeros((n[l+1], 1)) for l in range(L)]\n",
" assert(len(self._g) == len(self._W))\n",
" assert(len(self._g) == len(self._b))\n",
" assert(len(self._g) == len(self._n))\n",
" self._A = None\n",
" self._X = None\n",
" self._Y = None\n",
" self._Z = None\n",
" self._m = 0\n",
" self._alpha = alpha\n",
2018-01-03 14:43:31 +01:00
" self._lambda = lambd\n",
2018-01-01 20:46:31 +01:00
"\n",
" def set_all_input_examples(self, X, m=1):\n",
" \"\"\"Set the input examples.\n",
"\n",
" :param X: matrix of dimensions (n[0], m). Accepts also a list (len m) of lists (len n[0])\n",
" :param m: number of training examples.\n",
" :type m: int\n",
" \"\"\"\n",
" if type(X) is list:\n",
" assert(len(X) == m)\n",
" self._X = np.matrix(X).T\n",
" else:\n",
" assert(X.shape == (self._n[0], self._m))\n",
" self._X = X\n",
" self._m = m\n",
" assert((self._m == m) or (self._m == 0))\n",
" self._m = m\n",
"\n",
" def set_all_expected_output_examples(self, Y, m=1):\n",
" \"\"\"Set the output examples\n",
"\n",
" :param Y: matrix of dimensions (n[L], m). Accepts also a list (len m) of lists (len n[L])\n",
" :param m: number of training examples.\n",
" :type m: int\n",
" \"\"\"\n",
" if type(Y) is list:\n",
" assert(len(Y) == m)\n",
" self._Y = np.matrix(Y).T\n",
" else:\n",
" assert(Y.shape == (self._n[self._L], self._m))\n",
" self._Y = Y\n",
" assert((self._m == m) or (self._m == 0))\n",
" self._m = m\n",
"\n",
" def set_all_training_examples(self, X, Y, m=1):\n",
" \"\"\"Set all training examples\n",
"\n",
" :param X: matrix of dimensions (n[0], m). Accepts also a list (len m) of lists (len n[0])\n",
" :param Y: matrix of dimensions (n[L], m). Accepts also a list (len m) of lists (len n[L])\n",
" :param m: number of training examples.\n",
" :type m: int\n",
" \"\"\"\n",
" self._m = m\n",
" self.set_all_input_examples(X, m)\n",
" self.set_all_expected_output_examples(Y, m)\n",
"\n",
" def prepare(self):\n",
" \"\"\"Prepare network\"\"\"\n",
" assert(self._X is not None)\n",
" assert(self._m > 0)\n",
" m = self._m\n",
" self._A = [self._X]\n",
" self._A += [np.empty((self._n[l+1], m)) for l in range(self._L)]\n",
" self._Z = [None] + [np.empty((self._n[l+1], m)) for l in range(self._L)]\n",
"\n",
" def propagate(self):\n",
" \"\"\"Forward propagation\n",
"\n",
" :return: matrix of computed outputs (n[L], m)\n",
" \"\"\"\n",
" for l0 in range(self._L):\n",
" l = l0 + 1\n",
" self._Z[l] = np.dot(self._W[l], self._A[l-1]) + self._b[l]\n",
" self._A[l] = self._g[l][\"function\"](self._Z[l])\n",
" return self._A[self._L]\n",
"\n",
" def get_output(self):\n",
" return self._A[self._L]\n",
"\n",
2018-01-03 14:43:31 +01:00
" def get_weights(self):\n",
" return self._W[1:]\n",
"\n",
2018-01-01 20:46:31 +01:00
" def back_propagation(self, get_cost_function=False):\n",
" \"\"\"Back propagation\n",
"\n",
" :param get_cost_function: if True the cost function J\n",
" will be computed and returned.\n",
" J = -1/m((Y(A.T)) + (1-Y)(A.T))\n",
" :return: the cost function if get_cost_function==True else None\n",
" \"\"\"\n",
" J = None\n",
" l = self._L\n",
" m = self._m\n",
" dW = [None] + [None] * self._L\n",
" db = [None] + [None] * self._L\n",
" dA = [None] + [None] * self._L\n",
" dA[l] = -self._Y/self._A[l] + ((1-self._Y)/(1-self._A[l]))\n",
" if get_cost_function:\n",
2018-01-03 14:43:31 +01:00
" wnorms = 0\n",
" for w in self._W[1:]:\n",
" wnorms += np.linalg.norm(w)\n",
2018-01-01 20:46:31 +01:00
" J = -1/m * ( np.dot(self._Y, np.log(self._A[l]).T) + \\\n",
2018-01-03 14:43:31 +01:00
" np.dot((1 - self._Y), np.log(1-self._A[l]).T) ) + \\\n",
" self._lambda/(2*m) * wnorms # regularization\n",
2018-01-01 20:46:31 +01:00
"\n",
" #dZ = self._A[l] - self._Y\n",
" for l in range(self._L, 0, -1):\n",
" dZ = dA[l] * self._g[l][\"derivative\"](self._Z[l])\n",
" dW[l] = 1/self._m * np.dot(dZ, self._A[l-1].T)\n",
" db[l] = 1/m * np.sum(dZ, axis=1, keepdims=True)\n",
" dA[l-1] = np.dot(self._W[l].T, dZ)\n",
"# dZ = np.dot(self._W[l+1].T, dZ) * self._g[l][\"derivative\"](self._Z[l])\n",
"# dW[l] = 1/m * np.dot(dZ, self._A[l-1].T)\n",
"# db[l] = 1/m * np.sum(dZ, axis=1, keepdims=True)\n",
" for l in range(self._L, 0, -1):\n",
2018-01-03 14:43:31 +01:00
" self._W[l] = self._W[l] - self._alpha * dW[l] - \\\n",
" (self._alpha*self._lambda/m * self._W[l]) # regularization\n",
2018-01-01 20:46:31 +01:00
" self._b[l] = self._b[l] - self._alpha * db[l]\n",
"\n",
" return J\n",
"\n",
2018-01-03 14:43:31 +01:00
" def minimize_cost(self, min_cost, max_iter=100000, alpha=None, plot=False):\n",
2018-01-01 20:46:31 +01:00
" \"\"\"Propagate forward then backward in loop while minimizing the cost function.\n",
"\n",
" :param min_cost: cost function value to reach in order to stop algo.\n",
" :param max_iter: maximum number of iterations to reach min cost befor stoping algo. (Default 100000).\n",
" :param alpha: learning rate, if None use the instance alpha value. Default None.\n",
"\n",
" \"\"\"\n",
" nb_iter = 0\n",
" if alpha is None:\n",
" alpha = self._alpha\n",
" self.propagate()\n",
2018-01-03 14:43:31 +01:00
" if plot:\n",
" y=[]\n",
" x=[]\n",
2018-01-01 20:46:31 +01:00
" for i in range(max_iter):\n",
" J = self.back_propagation(True)\n",
2018-01-03 14:43:31 +01:00
" if plot:\n",
" y.append(J[0][0])\n",
" x.append(nb_iter)\n",
2018-01-01 20:46:31 +01:00
" self.propagate()\n",
" nb_iter = i + 1\n",
" if J <= min_cost:\n",
" break\n",
2018-01-03 14:43:31 +01:00
" if mp and plot:\n",
" mp.plot(x,y)\n",
" mp.show()\n",
2018-01-01 20:46:31 +01:00
" return {\"iterations\": nb_iter, \"cost_function\": J}\n",
"\n",
2018-01-03 14:43:31 +01:00
" def learning(self, X, Y, m, min_cost=0.05, max_iter=100000, alpha=None, plot=False):\n",
2018-01-01 20:46:31 +01:00
" \"\"\"Tune parameters in order to learn examples by propagate and backpropagate.\n",
"\n",
" :param X: the inputs training examples\n",
" :param Y: the expected outputs training examples\n",
" :param m: the number of examples\n",
" :param min_cost: cost function value to reach in order to stop algo. Default 0.0.5\n",
" :param max_iter: maximum number of iterations to reach min cost befor stoping algo. (Default 100000).\n",
" :param alpha: learning rate, if None use the instance alpha value. Default None.\n",
"\n",
" \"\"\"\n",
" self.set_all_training_examples(X, Y, m)\n",
" self.prepare()\n",
2018-01-03 14:43:31 +01:00
" res = self.minimize_cost(min_cost, max_iter, alpha, plot)\n",
" return res\n"
2018-01-01 20:46:31 +01:00
]
},
{
"cell_type": "code",
2018-01-03 14:43:31 +01:00
"execution_count": 109,
2018-01-01 20:46:31 +01:00
"metadata": {},
"outputs": [
2018-01-03 14:43:31 +01:00
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAD8CAYAAAB9y7/cAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAAIABJREFUeJzt3Xl4lfWd9/H3N/u+EAKBEPYdRaQR17pb0VZs6zwt2s2pj3ZmpNpx2o480+nj2OnYcWbaOjO0U2vttLXKKFWLS6V1X6pIQERJCEbWAIEACQlkT77zxznQGLMc4CRnyed1XedK7vv8cs4nXMePd+7tZ+6OiIjEn4RIBxARkcGhghcRiVMqeBGROKWCFxGJUyp4EZE4pYIXEYlTKngRkTgVUsGb2UIzqzSzKjO7vZfnJ5jZc2a2wcxeNLNx4Y8qIiLHwwa60MnMEoHNwGVANbAGuNbdy7uNeQR40t1/YWYXA3/u7l8YvNgiIjKQpBDGLACq3H0LgJktB64GyruNmQ3cFvz+BeDxgV505MiRPnHixOMKKyIy3K1du3a/uxeGMjaUgi8GdnZbrgbO7DHmbeDTwD3Ap4BsMytw9wN9vejEiRMpKysLJaOIiASZ2fZQx4brIOvXgQvM7C3gAmAX0NlLsJvMrMzMympra8P01iIi0ptQCn4XUNJteVxw3THuvtvdP+3upwN/F1xX3/OF3P1edy9199LCwpD+whARkRMUSsGvAaaZ2SQzSwEWAyu7DzCzkWZ29LWWAveHN6aIiByvAQve3TuAJcAqoAJ42N03mtmdZrYoOOxCoNLMNgOjge8OUl4REQnRgKdJDpbS0lLXQVYRkeNjZmvdvTSUsbqSVUQkTqngRUTiVMwV/JptB7n7mU10dWmqQRGR/sRcwb+9s54fvfg+ja0dkY4iIhLVYq7gc9KTATjU1B7hJCIi0S3mCj7vaME3q+BFRPoTcwWfq4IXEQlJzBV8XkYKAPXNbRFOIiIS3WKu4LUFLyISmpgt+HodZBUR6VfMFXxacgIpSQk0aAteRKRfMVfwZkZuerK24EVEBhBzBQ+BUyW1D15EpH8xWfC5KngRkQHFZMHnZSRTr4IXEelXSAVvZgvNrNLMqszs9l6eH29mL5jZW2a2wcyuDH/UP8lJT9ZBVhGRAQxY8GaWCCwDrgBmA9ea2ewew75FYKan0wlM6fejcAftLnCQVRc6iYj0J5Qt+AVAlbtvcfc2YDlwdY8xDuQEv88Fdocv4oflpadwpK2T9s6uwXwbEZGYFkrBFwM7uy1XB9d1dwfweTOrBp4GvhqWdH3ITU8CdDWriEh/wnWQ9Vrgv919HHAl8Csz+9Brm9lNZlZmZmW1tbUn/GZH70ejghcR6VsoBb8LKOm2PC64rrsbgIcB3P11IA0Y2fOF3P1edy9199LCwsITS4zuRyMiEopQCn4NMM3MJplZCoGDqCt7jNkBXAJgZrMIFPyJb6IPIDdDk36IiAxkwIJ39w5gCbAKqCBwtsxGM7vTzBYFh/0NcKOZvQ08BFzv7oM2aaq24EVEBpYUyiB3f5rAwdPu677d7fty4NzwRutb3rE7SupUSRGRvsTklazH5mVt1sTbIiJ9icmCT05MIDMlUbM6iYj0IyYLHgKnSmofvIhI32K24HU/GhGR/sVswedp0g8RkX7FbMHrnvAiIv2L2YLXPeFFRPoXswWvLXgRkf7FbMHnpCfT1tFFS3tnpKOIiESlmC34vIyjV7NqK15EpDcxW/C6H42ISP9ituDz0gP3hNf9aEREehezBa8teBGR/sVswR/bB6+CFxHpVcwW/NE7Sup2BSIivQup4M1soZlVmlmVmd3ey/M/MLP1wcdmM6sPf9QPyk5Nwky7aERE+jLghB9mlggsAy4DqoE1ZrYyOMkHAO7+193GfxU4fRCyfkBCgpGr+9GIiPQplC34BUCVu29x9zZgOXB1P+OvJTBt36DT1awiIn0LpeCLgZ3dlquD6z7EzCYAk4DnTz7awPLSdT8aEZG+hPsg62Jghbv3ev8AM7vJzMrMrKy2tvak3yxHW/AiIn0KpeB3ASXdlscF1/VmMf3snnH3e9291N1LCwsLQ0/Zh7yMFA7pQicRkV6FUvBrgGlmNsnMUgiU+Mqeg8xsJpAPvB7eiH3LTU/SFryISB8GLHh37wCWAKuACuBhd99oZnea2aJuQxcDy93dByfqh+WlB+Zl7eoasrcUEYkZA54mCeDuTwNP91j37R7Ld4QvVmhy05Ppcjjc1kFOWvJQv72ISFSL2StZodv9aHQuvIjIh8R2wWfohmMiIn2J7YLXHSVFRPoU0wWvWZ1ERPoW0wWvLXgRkb7FdMEfndVJBS8i8mExXfBpyQmkJCZQ36yrWUVEeorpgjczctKTNemHiEgvYrrgIXCgVQdZRUQ+LOYLXveEFxHpXcwXfJ5mdRIR6VXMF7y24EVEehf7BZ+hg6wiIr2J/YJPT6axtYOOzq5IRxERiSpxUfAADS0dEU4iIhJdYr7g/3Q/Gl3sJCLSXUgFb2YLzazSzKrM7PY+xnzGzMrNbKOZPRjemH3T/WhERHo34IxOZpYILAMuA6qBNWa20t3Lu42ZBiwFznX3OjMbNViBe8rV/WhERHoVyhb8AqDK3be4exuwHLi6x5gbgWXuXgfg7vvCG7Nv2oIXEeldKAVfDOzstlwdXNfddGC6mb1mZm+Y2cLeXsjMbjKzMjMrq62tPbHEPeRpVicRkV6F6yBrEjANuBC4FvipmeX1HOTu97p7qbuXFhYWhuWNj27B62pWEZEPCqXgdwEl3ZbHBdd1Vw2sdPd2d98KbCZQ+IMuOTGBzJREahtbh+LtRERiRigFvwaYZmaTzCwFWAys7DHmcQJb75jZSAK7bLaEMWe/zp06kt+sq2ZXffNQvaWISNQbsODdvQNYAqwCKoCH3X2jmd1pZouCw1YBB8ysHHgB+Ia7Hxis0D19+6rZuMPfP/4u7j5UbysiEtUsUoVYWlrqZWVlYXu9+17Zwj8+VcGy6+bz8bljwva6IiLRxMzWuntpKGNj/krWo64/ZyKnFOdwxxMbdUaNiAhxVPBJiQl879NzOXC4lX9+ZlOk44iIRFzcFDzAKcW5fPncSTy4egdrth2MdBwRkYiKq4IH+OvLplOcl87SR9+hoUW7akRk+Iq7gs9MTeKuT5/Ktv1HuOZHf2TnwaZIRxIRiYi4K3iA86cX8ssvL2BvQwufXPYaa7fXRTqSiMiQi8uCBzhn6kgeu/lcstKSuPanb/Db9T0vvhURiW9xW/AAUwqzePyvzmVeSR63Ll/P3c9s0tR+IjJsxHXBA+RnpvCrGxaw+IwSfvTi+3z23jeortN+eRGJf3Ff8ACpSYl875q53LN4HpU1jVxxzys8tWFPpGOJiAyqYVHwR109r5inb/kokwuzuPnBdSx9dANHWjVZt4jEp2FV8ADjCzJY8Rdn85cXTmH5mp187Acv8/Lm8Ew+IiISTYZdwUPgHvJ/u3Amj3zlbNKSE/ji/W/y9Ufepr6pLdLRRETCZlgW/FGlE0fw1C0fZclFU3nsrV1c+v2XeXLDbt1yWETiwrAueIC05ES+fvkMVi45l6LcVJY8+BbX/XQ1lTWNkY4mInJSQip4M1toZpVmVmVmt/fy/PVmVmtm64OP/xv+qINrzthcHv+rc/nOJ0+hoqaBK//9Fe5YuZFDmutVRGLUgBN+mFkigTlWLyMw9+oa4Fp3L+825nqg1N2XhPrG4Z7wI5zqjrTxb3+o5MHVO8jLSOGWi6dy3ZkTSEka9n/wiEiEhXvCjwVAlbtvcfc2YDlw9ckEjHb5mSn84ydP5Ymvnsf00Vnc8UQ5l3z/RR5/axddXdo/LyKxIZSCLwZ2dluuDq7r6Roz22BmK8ysJCzpImzO2FweuvEs/vvPzyA7NZmv/c96rvz3V3iuYq8OxIpI1AvXPocngInuPhf4A/CL3gaZ2U1mVmZmZbW1sXHuuZlx4YxRPPnV87hn8Tya2jq54RdlXPWfr/LMuzXaoheRqBXKPvizgTvc/fLg8lIAd7+
"text/plain": [
"<matplotlib.figure.Figure at 0x7f99a3370b70>"
]
},
"metadata": {},
"output_type": "display_data"
},
2018-01-01 20:46:31 +01:00
{
"name": "stdout",
"output_type": "stream",
"text": [
2018-01-03 14:43:31 +01:00
"{'iterations': 68, 'cost_function': array([[ 0.04958664]])}\n",
"[[ 0.01220273 0.95827161 0.95016051 0.05815676]]\n",
"[array([[-3.29388938, 2.64675614],\n",
" [ 1.70522303, -2.64034303],\n",
" [-2.2292173 , -1.73490183]]), array([[ 3.5237224 , 3.48321649, -2.70213352]])]\n"
2018-01-01 20:46:31 +01:00
]
}
],
"source": [
2018-01-03 14:43:31 +01:00
"mlp = MultiLayerPerceptron(L=2, n=[2, 3, 1], g=[\"tanh\", \"sigmoid\"], alpha=2, lambd=0.005)\n",
2018-01-01 20:46:31 +01:00
"#mlp = MultiLayerPerceptron(L=1, n=[2, 1], g=[\"sigmoid\"], alpha=0.1)\n",
"\n",
"X = np.array([[0, 0],\n",
" [0, 1],\n",
" [1, 0],\n",
" [1, 1]])\n",
"\n",
"Y = np.array([[0],\n",
" [1],\n",
" [1],\n",
" [0]])\n",
"\n",
2018-01-03 14:43:31 +01:00
"res = mlp.learning(X.T, Y.T, 4, max_iter=5000, plot=True)\n",
2018-01-01 20:46:31 +01:00
"print(res)\n",
"print(mlp.get_output())\n",
2018-01-03 14:43:31 +01:00
"print(mlp.get_weights())\n",
2018-01-01 20:46:31 +01:00
"#mlp.set_all_training_examples(X.T, Y.T, 4)\n",
"#mlp.prepare()\n",
"#print(mlp.propagate())\n",
"#for i in range(100):\n",
"# print(mlp.back_propagation())\n",
"# mlp.propagate()\n",
"#print(mlp.propagate())\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
2018-01-01 20:33:05 +01:00
"nbformat": 4,
"nbformat_minor": 2
}