update jupyter file
This commit is contained in:
parent
c04c6c307c
commit
c97f9d6676
303
mlp.ipynb
303
mlp.ipynb
|
@ -1,6 +1,305 @@
|
|||
{
|
||||
"cells": [],
|
||||
"metadata": {},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"def sigmoid(x):\n",
|
||||
" return 1/(1+np.exp(-x))\n",
|
||||
"\n",
|
||||
"def deriv_sigmoid(x):\n",
|
||||
" a = sigmoid(x)\n",
|
||||
" return a * (1 - a)\n",
|
||||
"\n",
|
||||
"def tanh(x):\n",
|
||||
" ep = np.exp(x)\n",
|
||||
" en = np.exp(-x)\n",
|
||||
" return (ep - en)/(ep + en)\n",
|
||||
"\n",
|
||||
"def deriv_tanh(x):\n",
|
||||
" a = tanh(x)\n",
|
||||
" return 1 - (a * a)\n",
|
||||
"\n",
|
||||
"class MultiLayerPerceptron(object):\n",
|
||||
"\n",
|
||||
" @staticmethod\n",
|
||||
" def relu(x):\n",
|
||||
" ret = 0\n",
|
||||
" #fixme should map to compare\n",
|
||||
" if x > 0:\n",
|
||||
" ret = x\n",
|
||||
" elif type(x) is np.ndarray:\n",
|
||||
" ret = np.zeros(x.shape)\n",
|
||||
" return ret\n",
|
||||
"\n",
|
||||
" @staticmethod\n",
|
||||
" def deriv_relu(x):\n",
|
||||
" ret = 0\n",
|
||||
" if z < 0:\n",
|
||||
" ret = 0.01\n",
|
||||
" else:\n",
|
||||
" ret = 1\n",
|
||||
"\n",
|
||||
" @staticmethod\n",
|
||||
" def leaky_relu(x):\n",
|
||||
" ret = 0.01 * x\n",
|
||||
" #fixme should map to compare\n",
|
||||
" if x > 0:\n",
|
||||
" ret = x\n",
|
||||
" elif type(x) is np.ndarray:\n",
|
||||
" ret = np.ones(x.shape)*0.01\n",
|
||||
" return ret\n",
|
||||
"\n",
|
||||
" functions = {\n",
|
||||
" \"sigmoid\": {\"function\": sigmoid, \"derivative\": deriv_sigmoid},\n",
|
||||
" \"tanh\": {\"function\": tanh, \"derivative\": deriv_tanh},\n",
|
||||
" \"relu\": {\"function\": relu, \"derivative\": deriv_relu},\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" def __init__(self, L=1, n=None, g=None, alpha=0.01):\n",
|
||||
" \"\"\"Initializes network geometry and parameters\n",
|
||||
" :param L: number of layers including output and excluding input. Defaut 1.\n",
|
||||
" :type L: int\n",
|
||||
" :param n: list of number of units per layer including input. Default [2, 1].\n",
|
||||
" :type n: list\n",
|
||||
" :param g: list of activation functions name per layer excluding input.\n",
|
||||
" Possible names are: \"sigmoid\", \"tanh\". Default [\"sigmoid\"].\n",
|
||||
" :type g: list\n",
|
||||
" :param alpha: learning rate. Default 0.01.\n",
|
||||
" \"\"\"\n",
|
||||
" w_rand_factor = 1\n",
|
||||
" self._L = L\n",
|
||||
" if n is None:\n",
|
||||
" n = [2, 1]\n",
|
||||
" self._n = n\n",
|
||||
" if g is None:\n",
|
||||
" g = [MultiLayerPerceptron.functions[\"sigmoid\"]]\n",
|
||||
" else:\n",
|
||||
" g = [MultiLayerPerceptron.functions[fct] for fct in g]\n",
|
||||
" self._g = [None] + g\n",
|
||||
" self._W = [None] + [np.random.randn(n[l+1], n[l])*w_rand_factor for l in range(L)]\n",
|
||||
" self._b = [None] + [np.zeros((n[l+1], 1)) for l in range(L)]\n",
|
||||
" assert(len(self._g) == len(self._W))\n",
|
||||
" assert(len(self._g) == len(self._b))\n",
|
||||
" assert(len(self._g) == len(self._n))\n",
|
||||
" self._A = None\n",
|
||||
" self._X = None\n",
|
||||
" self._Y = None\n",
|
||||
" self._Z = None\n",
|
||||
" self._m = 0\n",
|
||||
" self._alpha = alpha\n",
|
||||
"\n",
|
||||
" def set_all_input_examples(self, X, m=1):\n",
|
||||
" \"\"\"Set the input examples.\n",
|
||||
"\n",
|
||||
" :param X: matrix of dimensions (n[0], m). Accepts also a list (len m) of lists (len n[0])\n",
|
||||
" :param m: number of training examples.\n",
|
||||
" :type m: int\n",
|
||||
" \"\"\"\n",
|
||||
" if type(X) is list:\n",
|
||||
" assert(len(X) == m)\n",
|
||||
" self._X = np.matrix(X).T\n",
|
||||
" else:\n",
|
||||
" assert(X.shape == (self._n[0], self._m))\n",
|
||||
" self._X = X\n",
|
||||
" self._m = m\n",
|
||||
" assert((self._m == m) or (self._m == 0))\n",
|
||||
" self._m = m\n",
|
||||
"\n",
|
||||
" def set_all_expected_output_examples(self, Y, m=1):\n",
|
||||
" \"\"\"Set the output examples\n",
|
||||
"\n",
|
||||
" :param Y: matrix of dimensions (n[L], m). Accepts also a list (len m) of lists (len n[L])\n",
|
||||
" :param m: number of training examples.\n",
|
||||
" :type m: int\n",
|
||||
" \"\"\"\n",
|
||||
" if type(Y) is list:\n",
|
||||
" assert(len(Y) == m)\n",
|
||||
" self._Y = np.matrix(Y).T\n",
|
||||
" else:\n",
|
||||
" assert(Y.shape == (self._n[self._L], self._m))\n",
|
||||
" self._Y = Y\n",
|
||||
" assert((self._m == m) or (self._m == 0))\n",
|
||||
" self._m = m\n",
|
||||
"\n",
|
||||
" def set_all_training_examples(self, X, Y, m=1):\n",
|
||||
" \"\"\"Set all training examples\n",
|
||||
"\n",
|
||||
" :param X: matrix of dimensions (n[0], m). Accepts also a list (len m) of lists (len n[0])\n",
|
||||
" :param Y: matrix of dimensions (n[L], m). Accepts also a list (len m) of lists (len n[L])\n",
|
||||
" :param m: number of training examples.\n",
|
||||
" :type m: int\n",
|
||||
" \"\"\"\n",
|
||||
" self._m = m\n",
|
||||
" self.set_all_input_examples(X, m)\n",
|
||||
" self.set_all_expected_output_examples(Y, m)\n",
|
||||
"\n",
|
||||
" def prepare(self):\n",
|
||||
" \"\"\"Prepare network\"\"\"\n",
|
||||
" assert(self._X is not None)\n",
|
||||
" assert(self._m > 0)\n",
|
||||
" m = self._m\n",
|
||||
" self._A = [self._X]\n",
|
||||
" self._A += [np.empty((self._n[l+1], m)) for l in range(self._L)]\n",
|
||||
" self._Z = [None] + [np.empty((self._n[l+1], m)) for l in range(self._L)]\n",
|
||||
"\n",
|
||||
" def propagate(self):\n",
|
||||
" \"\"\"Forward propagation\n",
|
||||
"\n",
|
||||
" :return: matrix of computed outputs (n[L], m)\n",
|
||||
" \"\"\"\n",
|
||||
" for l0 in range(self._L):\n",
|
||||
" l = l0 + 1\n",
|
||||
" self._Z[l] = np.dot(self._W[l], self._A[l-1]) + self._b[l]\n",
|
||||
" self._A[l] = self._g[l][\"function\"](self._Z[l])\n",
|
||||
" return self._A[self._L]\n",
|
||||
"\n",
|
||||
" def get_output(self):\n",
|
||||
" return self._A[self._L]\n",
|
||||
"\n",
|
||||
" def back_propagation(self, get_cost_function=False):\n",
|
||||
" \"\"\"Back propagation\n",
|
||||
"\n",
|
||||
" :param get_cost_function: if True the cost function J\n",
|
||||
" will be computed and returned.\n",
|
||||
" J = -1/m((Y(A.T)) + (1-Y)(A.T))\n",
|
||||
" :return: the cost function if get_cost_function==True else None\n",
|
||||
" \"\"\"\n",
|
||||
" J = None\n",
|
||||
" l = self._L\n",
|
||||
" m = self._m\n",
|
||||
" dW = [None] + [None] * self._L\n",
|
||||
" db = [None] + [None] * self._L\n",
|
||||
" dA = [None] + [None] * self._L\n",
|
||||
" dA[l] = -self._Y/self._A[l] + ((1-self._Y)/(1-self._A[l]))\n",
|
||||
" if get_cost_function:\n",
|
||||
" J = -1/m * ( np.dot(self._Y, np.log(self._A[l]).T) + \\\n",
|
||||
" np.dot((1 - self._Y), np.log(1-self._A[l]).T) )\n",
|
||||
"\n",
|
||||
" #dZ = self._A[l] - self._Y\n",
|
||||
" for l in range(self._L, 0, -1):\n",
|
||||
" dZ = dA[l] * self._g[l][\"derivative\"](self._Z[l])\n",
|
||||
" dW[l] = 1/self._m * np.dot(dZ, self._A[l-1].T)\n",
|
||||
" db[l] = 1/m * np.sum(dZ, axis=1, keepdims=True)\n",
|
||||
" dA[l-1] = np.dot(self._W[l].T, dZ)\n",
|
||||
"# dZ = np.dot(self._W[l+1].T, dZ) * self._g[l][\"derivative\"](self._Z[l])\n",
|
||||
"# dW[l] = 1/m * np.dot(dZ, self._A[l-1].T)\n",
|
||||
"# db[l] = 1/m * np.sum(dZ, axis=1, keepdims=True)\n",
|
||||
" for l in range(self._L, 0, -1):\n",
|
||||
" self._W[l] = self._W[l] - self._alpha * dW[l]\n",
|
||||
" self._b[l] = self._b[l] - self._alpha * db[l]\n",
|
||||
"\n",
|
||||
" return J\n",
|
||||
"\n",
|
||||
" def minimize_cost(self, min_cost, max_iter=100000, alpha=None):\n",
|
||||
" \"\"\"Propagate forward then backward in loop while minimizing the cost function.\n",
|
||||
"\n",
|
||||
" :param min_cost: cost function value to reach in order to stop algo.\n",
|
||||
" :param max_iter: maximum number of iterations to reach min cost befor stoping algo. (Default 100000).\n",
|
||||
" :param alpha: learning rate, if None use the instance alpha value. Default None.\n",
|
||||
"\n",
|
||||
" \"\"\"\n",
|
||||
" nb_iter = 0\n",
|
||||
" if alpha is None:\n",
|
||||
" alpha = self._alpha\n",
|
||||
" self.propagate()\n",
|
||||
" for i in range(max_iter):\n",
|
||||
" J = self.back_propagation(True)\n",
|
||||
" self.propagate()\n",
|
||||
" nb_iter = i + 1\n",
|
||||
" if J <= min_cost:\n",
|
||||
" break\n",
|
||||
" return {\"iterations\": nb_iter, \"cost_function\": J}\n",
|
||||
"\n",
|
||||
" def learning(self, X, Y, m, min_cost=0.05, max_iter=100000, alpha=None):\n",
|
||||
" \"\"\"Tune parameters in order to learn examples by propagate and backpropagate.\n",
|
||||
"\n",
|
||||
" :param X: the inputs training examples\n",
|
||||
" :param Y: the expected outputs training examples\n",
|
||||
" :param m: the number of examples\n",
|
||||
" :param min_cost: cost function value to reach in order to stop algo. Default 0.0.5\n",
|
||||
" :param max_iter: maximum number of iterations to reach min cost befor stoping algo. (Default 100000).\n",
|
||||
" :param alpha: learning rate, if None use the instance alpha value. Default None.\n",
|
||||
"\n",
|
||||
" \"\"\"\n",
|
||||
" self.set_all_training_examples(X, Y, m)\n",
|
||||
" self.prepare()\n",
|
||||
" res = self.minimize_cost(min_cost, max_iter, alpha)\n",
|
||||
" return res\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'iterations': 62, 'cost_function': array([[ 0.04879932]])}\n",
|
||||
"[[ 0.03621064 0.94089041 0.94055051 0.03022811]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"mlp = MultiLayerPerceptron(L=2, n=[2, 2, 1], g=[\"tanh\", \"sigmoid\"], alpha=2)\n",
|
||||
"#mlp = MultiLayerPerceptron(L=1, n=[2, 1], g=[\"sigmoid\"], alpha=0.1)\n",
|
||||
"\n",
|
||||
"X = np.array([[0, 0],\n",
|
||||
" [0, 1],\n",
|
||||
" [1, 0],\n",
|
||||
" [1, 1]])\n",
|
||||
"\n",
|
||||
"Y = np.array([[0],\n",
|
||||
" [1],\n",
|
||||
" [1],\n",
|
||||
" [0]])\n",
|
||||
"\n",
|
||||
"res = mlp.learning(X.T, Y.T, 4)\n",
|
||||
"print(res)\n",
|
||||
"print(mlp.get_output())\n",
|
||||
"#mlp.set_all_training_examples(X.T, Y.T, 4)\n",
|
||||
"#mlp.prepare()\n",
|
||||
"#print(mlp.propagate())\n",
|
||||
"#for i in range(100):\n",
|
||||
"# print(mlp.back_propagation())\n",
|
||||
"# mlp.propagate()\n",
|
||||
"#print(mlp.propagate())\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue