{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Implementing k-means" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First we create the dataset. It will have 1500 samples and we want to use k-means to detect clusters in this dataset." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "from sklearn import datasets\n", "\n", "np.random.seed(0)\n", "\n", "n_samples = 1500\n", "X, y = datasets.make_blobs(n_samples=n_samples, random_state=8)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 5.86749807 8.17715188]\n", " [ 5.61369982 9.93295527]\n", " [ 7.22508428 10.44886194]\n", " ..., \n", " [ 7.73674097 10.82855388]\n", " [ -4.61701094 -9.64855983]\n", " [ -3.48640175 -9.25766922]]\n", "[0 0 0 ..., 0 2 2]\n" ] } ], "source": [ "print(X)\n", "print(y)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's visualise the dataset. The clusters are clearly visible to us." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD8CAYAAAB0IB+mAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHzNJREFUeJzt3X+MHGd5B/Dvc+txMmdQ1iYm2JscNii6lMgkB6fgYloR\nk+ZC3CYXo/yCtFQguUiFNpRedS4RcQrIB1cKqgStgkDQBkKSxjkMAQzBSEhGdjlzdhxD3CQkcbIJ\nxBBvgNwm3rt7+sfurGd359f+mB878/1Ip9vbnd15Ped7ZuZ5n/d9RVVBRETpNxB3A4iIKBoM+ERE\nGcGAT0SUEQz4REQZwYBPRJQRDPhERBnBgE9ElBEM+EREGcGAT0SUEcviboDd2WefrevWrYu7GURE\nfeXgwYO/UdXVftslKuCvW7cOs7OzcTeDiKiviMiTQbZjSoeIKCMY8ImIMoIBn4goIxjwiYgyggGf\niCgjElWlQ0RkNzNXxPSeY3imVMbavImJsWGMjxTiblbfYsAnokSamSti+64jKFcWAQDFUhnbdx0B\nAAb9DjHgE1EiTe85Vg/2lnJlEdN7jrUd8HmnUMWAT0SxcwrIz5TKjtu6Pe/12UHvFNJ+YmDAJyJX\nUQRAt4B8lmmgVK60bL82bwZu38xcER+5+zAWVRs+w+lOIQspJAZ8InIUVQB0S928tLDYsq1p5DAx\nNoyZuSJu+9ZRnJw/fUJobp/V/uZgb2m+U+hlCimpGPCJyFFUAdAtRdMcp1cOGrj1Ly4EgIYTkVv7\nnNpvd5ZpYNPU3vrdQdEnhZSGdA8DPhE56lUOvVlz4MwPGg1X6m4Gl1fDlVOKxq5YKmPT1F7XAA4A\nxoDgxVML9ZRRsVSGAHD61LV5MzXpHgZ8InLkdtVrz6G3Y2auiB27jzbk5a1AG0SxVMbNdx3y3U5q\n27rJieAVZy5rOck4BXsrheR2t3PzXYcwvedY31ztc6QtETmaGBuGaeQanrMCYLusK2SnTlj3a/XO\neH2eaeRw41vOC3RHAQBnLKuGSK+7Gutqf2au2E4zY8GAT0SOxkcK2Ll1Awp5EwKgkDexc+uGjq5k\n/fLpURAAbxo6C/ceDB6YS+UKJv7nMPKDhud2Vt9B0ol65MKiNjo6qlwAhSh52u2wbN7eK8XSDwaN\nASjE96T1uesvjiW1IyIHVXXUbzte4RORJysdUyyVofBPYThtHzRPn1TzlaX63Y6XpKd2GPCJyJNX\neabdzFwRm6b24ua7DrVsn5w8QufGRwrYN7kZN20ccj2BlSuL2LH7aKTtagerdIgyot20zC0zR3Dn\ngacCDVxqLltMG6lF+Jm5Iu49WPQ8gZXKFczMFeuDv5JUu8+AT5QB7daR3zJzBHfsP+75mQMiWD95\nP9bmTcyfWvAN9m517v1AFb61/XbW3U/SaveZ0iHKgKBpGcudB57y/cxF1XqOPkiZY78Ge0s7Hc/P\nlMptH/Mo9CTgi8iXReQ5EXnI9twqEfmBiDxS+76yF/siova1O2rWayQr+VubN0MbqdyNXl3hfwXA\nFU3PTQL4oaqeD+CHtZ+JKAZuo2Pdnpd+L6uJkTU4rd1jHoWeBHxV/TGA55uevhrAV2uPvwpgvBf7\nIqL2tTtq1lzGbG+nrMFpl16w2vH1da8ysWlqL9ZP3o9NU3sjLeMMs9P2HFV9tvb4VwDOCXFfROTB\n6iQMWjFSrixF2bzUKOTN+jH90cMnHLf5yWPP1/szou7IjaRKR1VVRByTgiKyDcA2ABgaGoqiOUSZ\nND5SCBxU0jA6Ng7zpxbqJZlux685EEY5536Y922/FpE1AFD7/pzTRqp6u6qOquro6tXOt0BEFC2n\nFBDT+v5OzlewfdcR3DJzpK3jFVVHbpgBfzeA99YevxfAN0PcFxF1wRola+WVAbRMnPbZ6y/GE1Nb\nfKcXyLpyZRF37D/eVhlqVB25PUnpiMidAN4O4GwReRrArQCmANwtIu8H8CSA63qxLyLqLbdBWTu3\nbsC+yc0t20+MDWPinsOoLLF0sxOmkWuoz+90yulO9KpK50ZVXaOqhqqeq6pfUtXfquo7VPV8Vb1M\nVZureIgoAdoZIGRNFcBg35mVg0bPppzuBKdWIMq4IAOEnFarovapttd53msstiXKOL8BQl6rVVF7\nXoj5GDLgE2Wc36CsJKxWlRZxjrIFGPCJMs9vKcM4537pF6aRw6bXr0KuNieFCDAgrdtE1Tnrhjl8\nIvLMK3MQlreCy6jlpM2FDzDgE5GPibFh3HzXobibkUgCOJauAvF2zrphSocoRZoHUPViYq7xkQJW\nDhqOrzWnLbJGgcgnQOsGAz5RSrS72Hg7trxxjePzLMfv7XEOGwM+UUqEucKS28yPVNXNcQ7jrswN\nc/hEKRHmCkus1PHXyTFqd63hbvEKnyglwlxhqZf14wJg0+tX9ezzoubWbdHJMYp63VsGfKKUaHdV\nq24/uxMrlueqs27+tn/uGFYOGg1jFN6zcahnxznqdW+Z0iFKiXZXter0s4ulMgStC3k0c6tPB4AP\n91GZZ2m+grmPXd7w3OhrV7ke53bq793GOIQ1Ilc0QavTj46O6uzsbNzNICIfM3NFz9r8Qt50rU8H\nqqWMSRvM5XYS8/u32DXn5IHq1b/bjJjtbu/adpGDqjrqtx1TOkTUtvGRgutCKAL4pjcmxoY9V4Ry\nq/vvhtQ+10rN3LRxqOepmnZz8n7TWvQaUzpE1JGJseGWq1MB8J6NQ74Ba3yk4HmHMPexyzu6CzBy\nAmNAMN+0CHvQq2avVE0QneTkoxyRy4BPRB3pts+g4JK/tu4cnE4ofiqLisqiYuWgAdXqdMTttKvb\n4Bt1Tr5dDPhE1LFuAqRTQLenUJpPKGeZBl54qYIg3Y4n5yswjWpFUJTz2fj9m+LGgE9EofGqWAly\nh9B8Qlk/eX/gfVu58ygDfpiVUr3AgE9EoQgyirTdO4R2p2qOY4RwEmfJtLBKh4hCEcYoUqcBYF7V\nPknJnScFr/CJKBRhjCJ1S5kA8M2dJ3FBkqgx4BNRKMKqWPFKmXiNfo1ykrKkYsAnolBEXbHidyJw\nSy8x4BMRdSlJFStRT1KWVAz4RBSapFSsJH1AVFRYpUNEqRfm1NH9hFf4RJR6SUovxYkBn4gyISnp\npTiFHvBF5AkAvwewCGAhyJzNRETUe1Fd4V+qqr+JaF9EROSAnbZERBkRRcBXAA+IyEER2RbB/oiI\nyEEUKZ23qWpRRF4N4Aci8rCq/th6sXYS2AYAQ0NDETSHiCibQr/CV9Vi7ftzAO4DcEnT67er6qiq\njq5evTrs5hARZVaoAV9EVojIK63HAC4H8FCY+yQiImdhp3TOAXCfiFj7+rqqfi/kfRIRkYNQA76q\n/hLARWHug4iIgmFZJhFRRjDgExFlBOfSIaJE49KEvcOAT0SJ1c7ShJ2eGLJ0QmFKh4gSy2tpQjvr\nxFAslaE4fWKYmSt6fn6n7+tXvMInosRyW4KwWCpj/eT9OMs0IAKcnK+0bBNkzdqsrXXLgE9EiXWW\naaBUbg3mQHWSLrfXLH5r1mZtrVumdIgosapjNjvnt2at2+tpXeuWV/hEFKqgnaJO25UcUjVBBVmz\ndmJsuKFTOOj7+hUDPhGFJmiVjdt2XikdL4WA1TZZW+tWVDXuNtSNjo7q7Oxs3M0goh7ZNLUXRYd8\neCFvYt/kZgDVYP+Ruw9j0SEWrRw0UJqvIGiUMo0cdm7dgNknn8edB57CoipyIrjxLefhE+Mbuvmn\nJJqIHAyyfCwDPhH1lD014xVdBNVO2RdPLaCy6LylAIGD/cpBA6ruHbnnv3oF5k8tdZRaSvoVf9CA\nz5QOEfVMc2rGS5Aqm/yggcHlyxzvEpo5lWbaPfLci/XHxVIZE/ccBhAstWTfrh9PCBYGfCLqGae6\n9m784aUFbHnjGtx7sNjTzwWAypJix+6jDcHarS7/I3cfrv/czycElmUSUc8EuRJvR2VJccf+4z0P\n9pZSuYJbZo7Uf3arv19UxfZdR3Dbt456jvxN+shdXuETUWBeV68zc8W2cu5Jccf+47hj/3Hf7cqV\nRdcTj3Wi8LtDiPtKnwGfiALxy29P7znWd8G+V6yBWn53CEC8QZ8pHSIKxG8is7ROR2CXNw2YRq7h\nOftALa8Ruk6TvkWNAZ+IAvGbdyat0xHYiQDvenMBhdq/NSdSD+Qzc0VMjA23nBDs4j4pMuATUSB+\n885cesHqKJsTi5PzFdx7sIhLL1gN08jVB4vZ01s7t25AzmUSoLhPihx4RUSBONXYGwOCV5y5DKX5\nCgZEHEfLZslK27iB5g5saxRwGDl8Drwioq40V+RcesFqnGkM1AO+aQxgYUnrA56yHuyB6h2AdTwU\np0cKB53bJ2wM+ETUwqkip7l08aXKUmarcoKygr193qA4B2Ux4BNRix27WwcYNWOwD6ZYKtcHXgVd\nnzcsDPhE1GBmrtjRlMTkbuKe6mygS01nyaiXU2TAJ6IGcdeKp1GlOdLbRFmqybJMImrQTgDqcgXC\nvjYA4KaNQ11/TpSlmgz4RNSgnQCU1Ty+AHj3xqGuF1WJejlFBnwiauA0WjTLV/JOFMCPHj4BAFix\n3H1krZecSGh1+W5CD/gicoWIHBORR0VkMuz9EVF3xkcK2Ll1Awp5E4JqWeF7Ng55ThmQRcVSGesn\n78eLp9qfutk0cvjMdRdFXpcfaqetiOQAfB7AnwF4GsBPRWS3qv48zP0SUXfGRwotwWj0tatc157N\nqqBHwj4i2aq/n33y+frxjGrd3bCv8C8B8Kiq/lJVTwH4BoCrQ94nEYVgfKSApYwG+05TWoLqDJtO\nwf6O/cfrJ89FrS70Yl+MJQxhB/wCgKdsPz9de46I+lDck3/FxZomoR1508Bnr78YLy8s4eR8pWEF\nrK+5LLhy54GnHJ/vldg7bUVkm4jMisjsiRMn4m4OEXmYGBuGMZC9LtycSFsVScaAYMdVF7quIeD2\nWWGny8IO+EUA59l+Prf2XJ2q3q6qo6o6unp1+qdXJepn4yMFTF97UaaqdoyB9mYBLeRNTF9b7ZBt\nd1CV27TKvRL2SNufAjhfRNajGuhvAPDukPdJRD3WPOlXljL509dehOk9xwIt0G6fKA2opsCc3rdi\nec6xuufGt5zX8lwvhXqFr6oLAD4IYA+AXwC4W1WPhrlPIuota+bMYqlcz0NnRSFvYnykEGhxFyMn\nLYOonMY0mEYOn7xmA27aOFS/os+J4KYeDOTywwVQiMjTpqm9mQrydisHDWx54xrceeAp37RO3jRw\n6NbLW56PYkpkLoBCRD0RJA9tLfBx27eO1hcASYOT85WWdQDcvOAyw6jTmIa4xF6lQ0TJ5leKKQD2\nTW7G+EgBpRQF+3b1Q8kqAz4ReXLKQ9vZA10/BL0wRD0JWqcY8InIkzW3Tt40Wl5rDnR+J4ckK+RN\nFNo8YVlzDUU9CVqnmMMnIl9WHtqvA9J63G+5fPuJy74MoZfmEsx+wIBPRIEF6YBsPjkUS2XkpDp4\nKW8aePHUAiqLyakOLDicuOwntXWvMvGTx55vGHvQLymcZizLJKJINd8lPP/iyyhXltr+HAGwfNkA\nXl5o/71AdQStNSLWTxSlld0IWpbJgE9EsZqZK2LinsOe6742M41cPW8+M1fEP+96EPMdnDT6MS3j\nJGjAZ6ctEcXKmp/HvuDKTRuHPH+2d5KOjxTw84+/s2HkalDWIiabpvZiZq7o/4Y+xxw+EcWu28FJ\nM3NF3Huw2NFsk/Zpi622pBWv8Imo7zlNQwxU56ixFiExct5X/+XKIqb3HAuphcnAK3wi6ntu0z8s\nqeLxqS0AGjte3e4D2p3OuN8w4BNR33Obhtg+8teeNnKbEC7tI4WZ0iGivuc2DbFbrXy726cFr/CJ\nqO9ZV+5Ba+Xb3T4tWIdPRNTnWIdPREQNGPCJiDKCAZ+IKCMY8ImIMoIBn4goIxjwiYgyggGfiCgj\nGPCJiDKCAZ+IKCMY8ImIMoIBn4goIxjwiYgyggGfiCgjGPCJiDIitIAvIjtEpCgih2pfV4a1LyIi\n8hf2AiifVdV/DXkfREQUAFM6REQZEXbA/5CIPCgiXxaRlU4biMg2EZkVkdkTJ06E3BwiouzqaolD\nEXkAwGscXvoogP0AfgNAAXwcwBpVfZ/X53GJQyKi9gVd4rCrHL6qXhawMV8E8O1u9kVERN0Js0pn\nje3HawA8FNa+iIjIX5hVOp8WkYtRTek8AeBvQtwXERH5CC3gq+pfhvXZRETUPpZlEhFlBAM+EVFG\nMOATEWUEAz4RUUYw4BMRZQQDPhFRRjDgExFlBAM+EVFGMOATEWUEAz4RUUYw4BMRZUTYSxz2hZm5\nIqb3HMMzpTLW5k1MjA1jfKTQ033cMnMEXz9wHEu15QdMYwA7t76xrf1E0U4iSq/MB/yZuSK27zqC\ncmURAFAslbF91xEA8AymzcF33atM7P/lSSzaFpQp1ILy7JPP4479xxveX64s4R/uOtSyH7egHqSd\nPCEQkZeuVrzqtThWvNo0tRfFUrnl+ULexL7JzY7vaQ6+XgTV+aHd2PczM1fExD2HUVk6/Q5jQDB9\n7UWY3nPMtZ0TY8PYsfsoSuVKw2umkcPOrRsY9IlSLpIVr9LgGYcgan/eumoulsrIiWBRtf49CL+t\n7EF8x+6jDcEeACpLih27j+KFpmBuf7/byadcWcT0nmMM+EQEgAEfa/Om45VzftDAxbd9v+Gq2Qry\nQYN9EILqSWV8pNByhW4plSsouLQzJ+J5p+F2QiOi7Ml8lc7E2DBMI9fwnJET/OGlBdcA3EsKYOKe\nQ1g3eb/ndhNjw46/LL+Tz9q82XnjiChVMnOF79ahaaU77K+9+HI0wd5SWfJ+fUCAm2sdvO0wjRwm\nxoY7bBURpU0mAr5fhYv1ZZ0Uogz2QSx1mEGycvgAGv59rOIhyqZMVOm4VeKsHDQwuHwZnimVcaYx\ngLLfpXafMo0c3vXmAu49WGzI99ureHgyIOpfma7SaQ5eTsEeAE7OV3Byvno1n9ZgD1Sv9O888FRL\nvt9+B9DJWAQi6i+p67S10jfFUhmKavCSuBuVAG6du8+Uypjec6yl0sd+MiCidEjdFb5T8FL4D4BK\nO7exA2vzpudYBKZ6iNIjdVf4bsFLUR2VmkWmkcONbzmvtfx0QDB/asH1RHiWabTcLW3fdQQzc8XQ\n20xEvZe6gO9Wd25NYZDF9I7UQvqZxulft2kMAIJ6H0Yz08hBBEz1EKVI6gK+00Aqez16FgcizVeW\ncMf+4w3B/aXKEiqLztf2hbyJnVs3oORyMmi+i5qZK2LT1F6sn7wfm6b28g6AKKFSEfDtAWd6zzG8\n680FFPImBKeDl5V3djohZJFbGkcA7JvcjPGRguvJ0f68Uyc50z5EydT3Ad8p4Nx7sIiJsWE8PrUF\nE2PDmN5zrH71CQA7t26onxCokT2Y+90tAc6d5Ez7ECVT31fp+AWc5vpy+xQFpjGAlypLma7esRNU\nj9Gmqb0N1TheVTp+s40SUXJ0FfBF5FoAOwD8EYBLVHXW9tp2AO8HsAjg71R1Tzf7cuMVcJxOBnZp\nHmzVCevE5zT1hBu3gW1Z7CshSrpuUzoPAdgK4Mf2J0XkDQBuAHAhgCsAfEFEQkmce+WZ3UbYkr+g\naZkgaR8iSoauAr6q/kJVnaLC1QC+oaovq+rjAB4FcEk3+3LjFXBywix9N4q1gVdexkcKDX0izZ3k\nRJQcYeXwCwD2235+uvZcCxHZBmAbAAwNDbW9I3ue2VqVyro67eVCJWmxYnkORm4g8IygQebU8Uv7\nEFEy+F7hi8gDIvKQw9fVvWiAqt6uqqOqOrp69eqOPmN8pFC/0reCPOfQaZUTwdF/uQKHbr088Khj\nVtwQpYfvFb6qXtbB5xYBnGf7+dzac6Fxm0OHTtv4upX1xxNjw4EXVWHFDVE6hFWHvxvADSJyhois\nB3A+gP8NaV8AGJSCeOK3p4/R+EgBedMI9D5W3BClQ1cBX0SuEZGnAfwxgPtFZA8AqOpRAHcD+DmA\n7wH4W1V1r4/sAbegxI7b05o7YXdcdaHvqGNW3BClR7dVOvep6rmqeoaqnqOqY7bXPqmqr1fVYVX9\nbvdN9eZWreM0S2SW2ac9cKqwuWnjECtuiFKq70faWrxGhY6+dlW9iifrrE5Y63ixwoYoOzKxpq1l\nZq6ID991KPOduQLg8aktcTeDiHok6Jq2fT95WjvGRwp4z8ahzJdrshOWKJsyFfAB4BPjG/DZ6y/O\nbGeukRN2whJlVOYCPlC90v/MdRdlsjN3xfJlzNkTZVRqOm3bZQW9oIOP0uKFgFMqEFH6ZPIK3zI+\nUvCcYkAAbHr9qnqZYhrSQMzfE2VXpgM+4L3koaI6OnXf5GY8PrUFn7nuIhgDyQr67bZm/tRC4OUH\nuVYtUbpkNqVj8Uvt2KdssLbdsfto4Nkmw6ao3nk4zQy6ctCAKhraenK+EmgGTGvpSPtqYUHeR0TJ\nlfkrfMA7tdOcAhkfKeDQrZfjiaktiSjvLORNLLmMpSjNV7DijNZzepAZMLlWLVH6MODXBFm5qTnF\nkR8MNvmYm0Le9J3AbOWgAbcsktU+r1W/Ol1zlmvVEqUPA36N38pNVoqjWCpDUU1x/OGlBRi5xmhs\nGjnctHHINUhbBMC+yc3YcdWFjv0CAwJ87vqLMbh8GZYcLuBzIvX2eZ2svE4GXjp9HxElV+Zz+HZe\n88o4pTgqS4q8aWDFGcsc5++x58CbWYFzfKSA2751FCfnG/sElhT4yN2HXVftWlJtmA/HamNzOwC0\ntCPIDJgTY8MdvY+IkosBPyC3VMYL5QoO3Xp5y/NWsHUK5s2BszTv3AG8qAqB80IuTn0LTicrv5OB\nm07fR0TJxYAf0Nq86TjbpleKwwrCM3NFz8Dp9tlANdg3B/12r7Q7nRGTM2kSpQsDfkDdpDj8AqfT\nZ9spqn0KvNImom4w4AcUZorD+gy3nH0hb2Lf5Oau90NE2caA34YwUxzddLASEQXBgJ8g7CglojCx\nDp+IKCN4hZ8gnL+GiMLEK/wE4fw1RBQmBvwE4fw1RBQmBvwE4fw1RBQmBvwECTJjJxFRp9hpmyAs\nyySiMDHgJwznryGisDClQ0SUEQz4REQZwYBPRJQRDPhERBnBgE9ElBGiLmumxkFETgB4MqSPPxvA\nb0L67F5hG3uDbeydfmgn2wi8VlVX+22UqIAfJhGZVdXRuNvhhW3sDbaxd/qhnWxjcEzpEBFlBAM+\nEVFGZCng3x53AwJgG3uDbeydfmgn2xhQZnL4RERZl6UrfCKiTEttwBeRu0TkUO3rCRE55LLdEyJy\npLbdbMRt3CEiRVs7r3TZ7goROSYij4rIZMRtnBaRh0XkQRG5T0TyLttFfhz9jotU/Xvt9QdF5E1R\ntMu2//NE5Eci8nMROSoif++wzdtF5AXb/4GPRdnGWhs8f3dxH8daG4Ztx+iQiPxORG5u2ibyYyki\nXxaR50TkIdtzq0TkByLySO37Spf3Rv93raqp/wLwGQAfc3ntCQBnx9SuHQD+0WebHIDHALwOwHIA\nhwG8IcI2Xg5gWe3xpwB8KgnHMchxAXAlgO8CEAAbARyI+Pe7BsCbao9fCeD/HNr4dgDfjuP/X9Df\nXdzH0eV3/ytUa89jPZYA/hTAmwA8ZHvu0wAma48nnf5m4vq7Tu0VvkVEBMB1AO6Muy0dugTAo6r6\nS1U9BeAbAK6Oaueq+n1VXaj9uB/AuVHt20eQ43I1gP/Sqv0A8iKyJqoGquqzqvqz2uPfA/gFgH6c\n+zrW4+jgHQAeU9WwBmkGpqo/BvB809NXA/hq7fFXAYw7vDWWv+vUB3wAfwLg16r6iMvrCuABETko\nItsibJflQ7Xb5C+73PoVADxl+/lpxBc03ofqlZ6TqI9jkOOSmGMnIusAjAA44PDyW2v/B74rIhdG\n2rAqv99dYo5jzQ1wv4CL+1gCwDmq+mzt8a8AnOOwTSzHtK8XQBGRBwC8xuGlj6rqN2uPb4T31f3b\nVLUoIq8G8AMRebh21g69jQD+A8DHUf2D+ziqqaf39WrfQQU5jiLyUQALAL7m8jGhHsd+JiKvAHAv\ngJtV9XdNL/8MwJCq/qHWhzMD4PyIm9g3vzsRWQ7gKgDbHV5OwrFsoKoqIokphezrgK+ql3m9LiLL\nAGwF8GaPzyjWvj8nIveheqvVs//sfm20iMgXAXzb4aUigPNsP59be65nAhzHvwbw5wDeobUEpMNn\nhHocHQQ5LqEfOz8iYqAa7L+mqruaX7efAFT1OyLyBRE5W1UjmxsmwO8u9uNo804AP1PVXze/kIRj\nWfNrEVmjqs/WUl/POWwTyzFNe0rnMgAPq+rTTi+KyAoReaX1GNUOyoectg1DUx70Gpd9/xTA+SKy\nvnZ1cwOA3VG0D6hWEgD4JwBXqeq8yzZxHMcgx2U3gL+qVZlsBPCC7VY7dLX+oy8B+IWq/pvLNq+p\nbQcRuQTVv8nfRtjGIL+7WI9jE9c79riPpc1uAO+tPX4vgG86bBPP33WUPdpRfwH4CoAPND23FsB3\nao9fh2rv+GEAR1FNYUTZvv8GcATAg7Vf9prmNtZ+vhLVCo/HYmjjo6jmGg/Vvv4zKcfR6bgA+ID1\nO0e1quTztdePABiN+Ni9DdV03YO243dlUxs/WDtmh1HtFH9rxG10/N0l6Tja2roC1QB+lu25WI8l\nqiefZwFUUM3Dvx/AqwD8EMAjAB4AsKq2bex/1xxpS0SUEWlP6RARUQ0DPhFRRjDgExFlBAM+EVFG\nMOATEWUEAz4RUUYw4BMRZQQDPhFRRvw/rE0tiIhupOYAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.scatter(X[:, 0], X[:, 1])\n", "plt.show()\n", "plt.gcf().clear()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To start the k-means calculation we pick three centroids - randomly, so in this case we select the first three points in the dataset." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 5.86749807 8.17715188]\n", " [ 5.61369982 9.93295527]\n", " [ 7.22508428 10.44886194]]\n" ] } ], "source": [ "k = 3\n", "centroids = np.array(X[:k])\n", "print(centroids)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Have a look at the dataset again and show the locations of the centroids." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD8CAYAAAB0IB+mAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X1sHPd5J/Dvw9XQHiqBV7IUR1qbkRMYTGsoNmPC0VW+\nQ6ympmu1MqPAb4nbHJJCLXBtzr0cA+pixPKlhVjz0hQB+gI3DZLWqWO5lhkmbqrGUYDAKqSLFEqW\nlVjnd0krJ5ZtrduYa2vJfe6P3VnN7s7bvszL7nw/AMHl7OzMT0Pxmd88vzdRVRARUf8biLsAREQU\nDQZ8IqKUYMAnIkoJBnwiopRgwCciSgkGfCKilGDAJyJKCQZ8IqKUYMAnIkqJZXEXwG7VqlW6bt26\nuItBRNRTDh069KqqrvbbL1EBf926dTh48GDcxSAi6iki8lKQ/ZjSISJKCQZ8IqKUYMAnIkoJBnwi\nopRgwCciSolE9dIhIrKbnc9jZs9xnC4UsTZrYnJ8BBOjubiL1bMY8IkokWbn89i++yiKpSUAQL5Q\nxPbdRwGAQb9NDPhElEgze47Xgr2lWFrCzJ7jLQd8PilUMOATUeycAvLpQtFxX7ftXscO+qTQ7zcG\nBnwichVFAHQLyBeZBgrFUtP+a7Nm4PLNzufx2V1HsKRadwynJ4U0pJAY8InIUVQB0C1189biUtO+\nppHB5PgIZufzuPc7x3B24fwNobF8Vvkbg72l8UmhmymkpGLAJyJHUQVAtxRNY5xeMWTgnt++EgDq\nbkRu5XMqv91FpoGN03trTwd5nxRSP6R7GPCJyFG3cuiNGgNndsioq6m7GRqshCunFI1dvlDExum9\nrgEcAIwBwZvnFmspo3yhCAHgdNS1WbNv0j0M+ETkyK3Wa8+ht2J2Po8dc8fq8vJWoA0iXyjirocO\n++4n1X3dZETwjguXNd1knIK9lUJye9q566HDmNlzvGdq+xxpS0SOJsdHYBqZum1WAGyVVUN2aoR1\nr6u3x+t4ppHBHR+6LNATBQBcsKwSIr2eaqza/ux8vpVixoIBn4gcTYzmsHPreuSyJgRALmti59b1\nbdVk/fLpURAAHxy+CI8cCh6YC8USJv/pCLJDhud+VttB0ol65MKiNjY2plwAhSh5Wm2wbNzfK8XS\nC4aMASjE96b1F7ddHUtqR0QOqeqY336s4RORJysdky8UofBPYTjtHzRPn1QLpXLtacdL0lM7DPhE\n5Mmre6bd7HweG6f34q6HDjftn5w8QvsmRnPYN7UJd24Ydr2BFUtL2DF3LNJytYK9dIhSotW0zN2z\nR/HggZOBBi41dlvsti0DT+Bzy3ZhrbyK07oK9y3eirnydaGcy4lUI/zsfB6PHMp73sAKxRJm5/O1\nwV9J6rvPHD5RCjgFZNPIuDbC3j17FA/sP+F5zIwIyqpYmzWxcG7Rt+eLWz93P1sGnsC08VUMybna\ntgUdxFTp9yIN+rkW2iJy1eDeyjXvBHP4RFQTNC1jefDASd9jLqnWcvRBujm2W7X83LJddcEeAIbk\nHD63bFebR2xPKw3PpwvFlq95FLoS8EXkayLyiog8Zdu2UkS+LyLPVL+v6Ma5iKh1rY6a9RrJGrW1\n8qrL9tciLklwa7NmaCOVO9GtGv7XAdzYsG0KwA9U9QoAP6j+TEQxcBsd67ZdAnar2TLwBJ4Y/Aye\nv+DjeGLwM9gy8ES7RXR1Wle5bL+46+fqBmtwWqvXPApdCfiq+iMArzdsvhnAN6qvvwFgohvnIqLW\ntTpq1lzmHxqs3PqlA69iQIBLB17FtPHVrgf9+xZvxYIO1m1b0EHct3hrV8/TLVaO/vr3r3Z8f93F\nJjZO78XlU49h4/TeSLtxhpnDv0RVX66+/jmAS0I8FxF5aHXUbLFU9j1mVLn1ufJ1mCr9Hk6VV6Gs\nglPlVZE32AaVy5q1a/rDp8847vNvz70eeExDt0XSLVNVVUQck4Iisg3ANgAYHh6OojhEqTQxmgvc\nOyTI6Ngoc+tz5eswdy55Ab7RwrnFWpdMt+vXGAijnHM/zBr+L0RkDQBUv7/itJOq3q+qY6o6tnq1\n8yMQEUXLKQXUmNbvtdx6FM4ulLB991HcPXu0pdHFUTXkhhnw5wB8svr6kwC+HeK5iKgD1ihZK68M\noCkF9OXbrsaL05tr0wv0Wm49KsXSEh7Yf6KlbqhRNeR2JaUjIg8C+DCAVSJyCsA9AKYB7BKRTwN4\nCUC6/xcQJZTb4h47t67HvqlNTftPjo9g8uEjlRx6CdURsK/htF4c+QjYXmQamabBWO1MOd0OjrQl\nSjm31aFyWbMp4FtTBfT67JdxsZZp7PZ0C0FH2nIuHaKUCzJAyGm1KmqdamuN593GqRWIUs5vgJDX\nalXUmjdivoYM+EQp5zcoKwmrVfWLOEfZAgz4RKnnNygrzrlfeoVpZLDxfSuRqc5JIQIMSPM+UTXO\numEOn4g888r9sERhmHIuDa9JmwsfYMAnIh+T4yO466HDcRcjkQRw7LoKxNs464YpHaI+0jiAqhtz\ntEyM5rBiyHB8rzFtEcXsmUmiQOQToHWCAZ+oT7S62HgrNn9gjeP2sm0YT1SzZyZN1BOgdYIBn6hP\nhLnCktvMj3ZJWZkqDp1c5zCeytwwh0/UJ8JcYSnIMXpxZapuauc6u01rASCU/D9r+ER9IswVloIc\nI+jsmQJg4/tWdlymuLjNgtnOdY563VsGfKI+0eqqVp0eu1GQ2TOXD2Yqs26+1jvdPFcMGXVjFD6x\nYbhr1znqdW+Z0iHqE1YKIIy+3/Zj5wtFCJoX8micPfMVWYWT10ziK1t+H19p2PePe6ibZ2GhhPkv\n3FC3bew9K12vcyv9793GOIQ1IpezZRJRy2bn8559851m2rRzm6EzTk43McD/32LXmJMHKrV/t+Uk\nW93ftewBZ8tkSoeIWjYxmqsthNJIAN/0xuT4iOeKUG79/jsh1eNaqZk7Nwx3PVXTak6+1bWGO8WU\nDhG1ZXJ8pKl2KgA+sWHYN2BNjOY8nxDmv3BDW08BRkZgDAgWGhZhD1pr9krVBNFOTj7KEbkM+ETU\nlk7bDHIu+WvrycHphuKntKQoLSlWDBlQrUxH3Eq5Og2+UefkW8WAT0Rt6yRAOgV0ewql8YZykWng\njbdKCNLseHahBNOo9AiKcj4bv39T3BjwiSg0Xj1WgjwhNN5QLp96LPC5rdx5lAE/zJ5S3cCAT0Sh\nCDKKtNUnhFanao5jLv8kzpJpYS8dIgpFGKNInQaAefX2SUruPClYwyeiUIQxitQtZQLAN3eexAVJ\nosaAT0ShCKvHilfKxGv0a5STlCUVAz4RhSLqHit+NwK39BIDPhFRh5LUYyXqScqSigGfiEKTlB4r\nSR8QFRX20iGivhfm1NG9hDV8Iup7SUovxYkBn4hSISnppTiFHvBF5EUA/wFgCcBikDmbiYio+6Kq\n4V+vqs4rHBMRUSTYaEtElBJRBHwF8LiIHBKRbRGcj4iIHESR0rlOVfMi8i4A3xeRp1X1R9ab1ZvA\nNgAYHh6OoDhEROkUeg1fVfPV768AeBTAtQ3v36+qY6o6tnr16rCLQ0SUWqEGfBFZLiLvtF4DuAHA\nU2Gek4iInIWd0rkEwKMiYp3rH1X1X0I+JxEROQg14Kvq8wCuCvMcREQUDLtlEhGlBAM+EVFKcC4d\nIko0Lk3YPQz4RJRYrSxN2O6NIU03FKZ0iCixvJYmtLNuDPlCEYrzN4bZ+bzn8dv9XK9iDZ+IEstt\nCcJ8oYjLpx7DRaYBEeDsQqlpnyBr1qZtrVsGfCJKrItMA4ViczAHKpN0ub1n8VuzNm1r3TKlQ0SJ\nVRmz2T6/NWvd3u/XtW5ZwyeiUAVtFHXar+CQqgkqyJq1k+MjdY3CQT/XqxjwiSg0QXvZuO3nldLx\nkgvY2yZta92KqsZdhpqxsTE9ePBg3MUgoi7ZOL0XeYd8eC5rYt/UJgCVYP/ZXUew5BCLVgwZKCyU\nEDRKmUYGO7eux8GXXseDB05iSRUZEdzxocvwJxPrO/mnJJqIHAqyfCwDPhF1lT014xVdBJVG2TfP\nLaK05LynAIGD/YohA6ruDblXvGs5Fs6V20otJb3GHzTgM6VDRF3TmJrxEqSXTXbIwNDgMsenhEZO\nXTPtnnnlzdrrfKGIyYePAAiWWrLv14s3BAsDPhF1jVO/9k788q1FbP7AGjxyKN/V4wJAqazYMXes\nLli79cv/7K4jtZ97+YbAbplE1DVBauKtKJUVD+w/0fVgbykUS7h79mjtZ7f+90uq2L77KO79zjHP\nkb9JH7nLGj4RBeZVe52dz7eUc0+KB/afwAP7T/juVywtud54rBuF3xNC3DV9BnwiCsQvvz2z53jP\nBftusQZq+T0hAPEGfaZ0iCgQv4nM+nU6ArusacA0MnXb7AO1vEboOk36FjUGfCIKxG/emX6djsBO\nBPjYNTnkqv/WjEgtkM/O5zE5PtJ0Q7CL+6bIgE9EgfjNO3P9+1dHWZxYnF0o4ZFDeVz//tUwjUxt\nsJg9vbVz63pkXCYBivumyIFXRBSIUx97Y0DwjguXobBQwoCI42jZNFlhGzfQ2IBtjQIOI4fPgVdE\n1JHGHjnXv381LjQGagHfNAawWNbagKe0B3ug8gRgXQ/F+ZHCQef2CRsDPhE1ceqR09h18a1SObW9\ncoKygr193qA4B2Ux4BNRkx1zzQOMGjHYB5MvFGsDr4KuzxsWBnwiqjM7n29rSmJyN/lwZTbQcsNd\nMurlFBnwiahO3H3F+1GpMdLbRNlVk90yiahOKwGowxUIe9oAgDs3DHd8nCi7ajLgE1GdVgJQWvP4\nAuDjG4Y7XlQl6uUUGfCJqI7TaNE01+SdKIAfPn0GALB80H1krZeMSGj98t2EHvBF5EYROS4iz4rI\nVNjnI6LOTIzmsHPreuSyJgSVboWf2DDsOWVAGuULRVw+9RjePNf61M2mkcGXbr0q8n75oTbaikgG\nwF8C+A0ApwD8WETmVPWnYZ6XiDozMZprCkZj71npuvZsWgW9EvYRyVb/+4MvvV67nlGtuxt2Df9a\nAM+q6vOqeg7AtwDcHPI5iSgEE6M5lFMa7NtNaQkqM2w6BfsH9p+o3TyXtLLQi30xljCEHfBzAE7a\nfj5V3UZEPSjuyb/iYk2T0IqsaeDLt12NtxfLOLtQqlsB65suC648eOCk4/Zuib3RVkS2ichBETl4\n5syZuItDRB4mx0dgDKSvCTcj0lKPJGNAsGPLla5rCLgdK+x0WdgBPw/gMtvPl1a31ajq/ao6pqpj\nq1f3//SqRL1sYjSHmVuuSlWvHWOgtVlAc1kTM7dUGmRbHVTlNq1yt4Q90vbHAK4QkctRCfS3A/h4\nyOckoi5rnPQrTZn8mVuuwsye44EWaLdPlAZUUmBOn1s+mHHs3XPHhy5r2tZNodbwVXURwB8C2APg\nZwB2qeqxMM9JRN1lzZyZLxRreei0yGVNTIzmAi3uYmSkaRCV05gG08jgTz+6HnduGK7V6DMiuLML\nA7n8cAEUIvK0cXpvqoK83YohA5s/sAYPHjjpm9bJmgYO33ND0/YopkTmAihE1BVB8tDWAh/3fudY\nbQGQfnB2odS0DoCbN1xmGHUa0xCX2HvpEFGy+XXFFAD7pjZhYjSHQh8F+1b1QpdVBnwi8uSUh7az\nB7peCHphiHoStHYx4BORJ2tunaxpNL3XGOj8bg5JlsuayLV4w7LmGop6ErR2MYdPRL6sPLRfA6T1\nutdy+fYbl30ZQi+NXTB7AQM+EQUWpAGy8eaQLxSRkcrgpaxp4M1ziygtJad3YM7hxmW/qa272MS/\nPfd63diDXknhNGK3TCKKVONTwutvvo1iqdzycQTA4LIBvL3Y+meByghaa0Ssnyi6VnYiaLdMBnwi\nitXsfB6TDx/xXPe1kWlkannz2fk8/tfuJ7HQxk2jF9MyToIGfDbaElGsrPl57Auu3Llh2PNneyPp\nxGgOP/3ib9aNXA3KWsRk4/RezM7n/T/Q45jDJ6LYdTo4aXY+j0cO5duabdI+bbFVln7FGj4R9Tyn\naYiByhw11iIkRsa79l8sLWFmz/GQSpgMrOETUc9zm/6hrIoXpjcDqG94dXsOaHU6417DgE9EPc9t\nGmL7yF972shtQrh+HynMlA4R9Ty3aYjd+sq3un+/YA2fiHqeVXMP2le+1f37BfvhExH1OPbDJyKi\nOgz4REQpwYBPRJQSDPhERCnBgE9ElBIM+EREKcGAT0SUEgz4REQpwYBPRJQSDPhERCnBgE9ElBIM\n+EREKcGAT0SUEgz4REQpEVrAF5EdIpIXkcPVr5vCOhcREfkLewGUL6vq/wn5HEREFABTOkREKRF2\nwP8jEXlSRL4mIiucdhCRbSJyUEQOnjlzJuTiEBGlV0dLHIrI4wDe7fDW5wHsB/AqAAXwRQBrVPVT\nXsfjEodERK0LusRhRzl8Vf1IwML8LYDvdnIuIiLqTJi9dNbYfvwogKfCOhcREfkLs5fOfSJyNSop\nnRcB/H6I5yIiIh+hBXxV/Z2wjk1ERK1jt0wiopRgwCciSgkGfCKilGDAJyJKCQZ8IqKUYMAnIkoJ\nBnwiopRgwCciSgkGfCKilGDAJyJKCQZ8IqKUCHuJw54wO5/HzJ7jOF0oYm3WxOT4CCZGc109x92z\nR/GPB06gXF1+wDQGsHPrB1o6TxTlJKL+lfqAPzufx/bdR1EsLQEA8oUitu8+CgCewbQx+K672MT+\n589iybagTK4alA++9Doe2H+i7vPFUhn/46HDTedxC+pByskbAhF56WjFq26LY8WrjdN7kS8Um7bn\nsib2TW1y/Exj8PUiqMwP7cZ+ntn5PCYfPoJS+fwnjAHBzC1XYWbPcddyTo6PYMfcMRSKpbr3TCOD\nnVvXM+gT9blIVrzqB6cdgqh9u1VrzheKyIhgSbX2PQi/vexBfMfcsbpgDwClsmLH3DG80RDM7Z93\nu/kUS0uY2XOcAZ+IADDgY23WdKw5Z4cMXH3vv9bVmq0gHzTYByGo3FQmRnNNNXRLoVhCzqWcGRHP\nJw23GxoRpU/qe+lMjo/ANDJ124yM4JdvLboG4G5SAJMPH8a6qcc895scH3H8ZfndfNZmzfYLR0R9\nJTU1fLcGTSvdYX/vzbejCfaWUtn7/QEB7qo28LbCNDKYHB9ps1RE1G9SEfD9erhYX9ZNIcpgH0S5\nzQySlcMHUPfvYy8eonRKRS8dt544K4YMDA0uw+lCERcaAyj6VbV7lGlk8LFrcnjkUL4u32/vxcOb\nAVHvSnUvncbg5RTsAeDsQglnFyq1+X4N9kClpv/ggZNN+X77E0A7YxGIqLf0XaOtlb7JF4pQVIKX\nxF2oBHBr3D1dKGJmz/Gmnj72mwER9Ye+q+E7BS+F/wCofuc2dmBt1vQci8BUD1H/6LsavlvwUlRG\npaaRaWRwx4cua+5+OiBYOLfoeiO8yDSanpa27z6K2fl86GUmou7ru4Dv1u/cmsIgjekdqYb0C43z\nv27TGAAEtTaMRqaRgQiY6iHqI30X8J0GUtn7o6dxINJCqYwH9p+oC+5vlcooLTnX7XNZEzu3rkfB\n5WbQ+BQ1O5/Hxum9uHzqMWyc3ssnAKKE6ouAbw84M3uO42PX5JDLmhCcD15W3tnphpBGbmkcAbBv\nahMmRnOuN0f7dqdGcqZ9iJKp5wO+U8B55FAek+MjeGF6MybHRzCz53it9gkAO7eur90QqJ49mPs9\nLQHOjeRM+xAlU8/30vELOI39y+1TFJjGAN4qlVPde8dOULlGG6f31vXG8eql4zfbKBElR0cBX0Ru\nAbADwK8AuFZVD9re2w7g0wCWAHxGVfd0ci43XgHH6WZg18+Drdph3ficpp5w4zawLY1tJURJ12lK\n5ykAWwH8yL5RRH4VwO0ArgRwI4C/EpFQEudeeWa3EbbkL2haJkjah4iSoaOAr6o/U1WnqHAzgG+p\n6tuq+gKAZwFc28m53HgFnIwwS9+JfHXglZeJ0Vxdm0hjIzkRJUdYOfwcgP22n09VtzURkW0AtgHA\n8PBwyyey55mtVams2mk3FyrpF8sHMzAyA4FnBA0yp45f2oeIksG3hi8ij4vIUw5fN3ejAKp6v6qO\nqerY6tWr2zrGxGiuVtO3gjzn0GmWEcGx/30jDt9zQ+BRx+xxQ9Q/fGv4qvqRNo6bB3CZ7edLq9tC\n4zaHDp234b0raq8nx0cCL6rCHjdE/SGsfvhzAG4XkQtE5HIAVwD4vyGdCwCDUhAvvnb+Gk2M5pA1\njUCfY48bov7QUcAXkY+KyCkA/wnAYyKyBwBU9RiAXQB+CuBfAPw3VXXvH9kFbkGJDbfnNTbC7thy\npe+oY/a4IeofnfbSeVRVL1XVC1T1ElUdt733p6r6PlUdUdXvdV5Ub269dZxmiUwz+7QHTj1s7tww\nzB43RH2q50faWrxGhY69Z2WtF0/aWY2w1vViDxui9EjFmraW2fk8/vihw6lvzBUAL0xvjrsYRNQl\nQde07fnJ01oxMZrDJzYMp767JhthidIpVQEfAP5kYj2+fNvVqW3MNTLCRliilEpdwAcqNf0v3XpV\nKhtzlw8uY86eKKX6ptG2VVbQCzr4qF+8EXBKBSLqP6ms4VsmRnOeUwwIgI3vW1nrptgPaSDm74nS\nK9UBH/Be8lBRGZ26b2oTXpjejC/dehWMgWQF/VZLs3BuMfDyg1yrlqi/pDalY/FL7dinbLD23TF3\nLPBsk2FTVJ48nGYGXTFkQBV1ZT27UAo0A6a1dKR9tbAgnyOi5Ep9DR/wTu00pkAmRnM4fM8NeHF6\ncyK6d+ayJsouYykKCyUsv6D5nh5kBkyuVUvUfxjwq4Ks3NSY4sgOBZt8zE0ua/pOYLZiyIBbFskq\nn9eqX+2uOcu1aon6DwN+ld/KTVaKI18oQlFJcfzyrUUYmfpobBoZ3Llh2DVIWwTAvqlN2LHlSsd2\ngQEB/uK2qzE0uAxlhwp8RqRWPq+bldfNwEu7nyOi5Ep9Dt/Oa14ZpxRHqazImgaWX7DMcf4eew68\nkRU4J0ZzuPc7x3B2ob5NoKzAZ3cdcV21q6xaNx+OVcbGcgBoKkeQGTAnx0fa+hwRJRcDfkBuqYw3\niiUcvueGpu1WsHUK5o2Bs7Dg3AC8pAqB80IuTm0LTjcrv5uBm3Y/R0TJxYAf0Nqs6TjbpleKwwrC\ns/N5z8DpdmygEuwbg36rNe12Z8TkTJpE/YUBP6BOUhx+gdPp2HaKSpsCa9pE1AkG/IDCTHFYx3DL\n2eeyJvZNber4PESUbgz4LQgzxdFJAysRURAM+AnChlIiChP74RMRpQRr+AnC+WuIKEys4ScI568h\nojAx4CcI568hojAx4CcI568hojAx4CdIkBk7iYjaxUbbBGG3TCIKEwN+wnD+GiIKC1M6REQpwYBP\nRJQSDPhERCnBgE9ElBIM+EREKSHqsmZqHETkDICXQjr8KgCvhnTsbmEZu4Nl7J5eKCfLCLxHVVf7\n7ZSogB8mETmoqmNxl8MLy9gdLGP39EI5WcbgmNIhIkoJBnwiopRIU8C/P+4CBMAydgfL2D29UE6W\nMaDU5PCJiNIuTTV8IqJU69uALyIPicjh6teLInLYZb8XReRodb+DEZdxh4jkbeW8yWW/G0XkuIg8\nKyJTEZdxRkSeFpEnReRREcm67Bf5dfS7LlLxler7T4rIB6Mol+38l4nID0XkpyJyTET+u8M+HxaR\nN2z/B74QZRmrZfD83cV9HatlGLFdo8Mi8u8iclfDPpFfSxH5moi8IiJP2batFJHvi8gz1e8rXD4b\n/d+1qvb9F4AvAfiCy3svAlgVU7l2APifPvtkADwH4L0ABgEcAfCrEZbxBgDLqq//DMCfJeE6Brku\nAG4C8D0AAmADgAMR/37XAPhg9fU7Afw/hzJ+GMB34/j/F/R3F/d1dPnd/xyVvuexXksA/wXABwE8\nZdt2H4Cp6uspp7+ZuP6u+7aGbxERAXArgAfjLkubrgXwrKo+r6rnAHwLwM1RnVxV/1VVF6s/7gdw\naVTn9hHkutwM4O+1Yj+ArIisiaqAqvqyqv6k+vo/APwMQC/OfR3rdXTw6wCeU9WwBmkGpqo/AvB6\nw+abAXyj+vobACYcPhrL33XfB3wA/xnAL1T1GZf3FcDjInJIRLZFWC7LH1Ufk7/m8uiXA3DS9vMp\nxBc0PoVKTc9J1NcxyHVJzLUTkXUARgEccHj716r/B74nIldGWrAKv99dYq5j1e1wr8DFfS0B4BJV\nfbn6+ucALnHYJ5Zr2tMLoIjI4wDe7fDW51X129XXd8C7dn+dquZF5F0Avi8iT1fv2qGXEcBfA/gi\nKn9wX0Ql9fSpbp07qCDXUUQ+D2ARwDddDhPqdexlIvIOAI8AuEtV/73h7Z8AGFbVX1bbcGYBXBFx\nEXvmdycigwC2ANju8HYSrmUdVVURSUxXyJ4O+Kr6Ea/3RWQZgK0ArvE4Rr76/RUReRSVR62u/Wf3\nK6NFRP4WwHcd3soDuMz286XVbV0T4Dr+VwC/BeDXtZqAdDhGqNfRQZDrEvq18yMiBirB/puqurvx\nffsNQFX/WUT+SkRWqWpkc8ME+N3Ffh1tfhPAT1T1F41vJOFaVv1CRNao6svV1NcrDvvEck37PaXz\nEQBPq+oppzdFZLmIvNN6jUoD5VNO+4ahIQ/6UZdz/xjAFSJyebV2czuAuSjKB1R6EgD4HIAtqrrg\nsk8c1zHIdZkD8LvVXiYbALxhe9QOXbX96O8A/ExV/9xln3dX94OIXIvK3+RrEZYxyO8u1uvYwPWJ\nPe5raTMH4JPV158E8G2HfeL5u46yRTvqLwBfB/AHDdvWAvjn6uv3otI6fgTAMVRSGFGW7x8AHAXw\nZPWXvaaxjNWfb0Klh8dzMZTxWVRyjYerX3+TlOvodF0A/IH1O0elV8lfVt8/CmAs4mt3HSrpuidt\n1++mhjL+YfWaHUGlUfzXIi6j4+8uSdfRVtblqATwi2zbYr2WqNx8XgZQQiUP/2kAFwP4AYBnADwO\nYGV139j/rjnSlogoJfo9pUNERFUM+EREKcGAT0SUEgz4REQpwYBPRJQSDPhERCnBgE9ElBIM+ERE\nKfH/AfwP7JMUAAAAA0lEQVQZms2ERgrAAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.scatter(X[:, 0], X[:, 1])\n", "plt.scatter(centroids[:, 0], centroids[:, 1])\n", "plt.show()\n", "plt.gcf().clear()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we start using Spark. The first operation is the creation of a \"Resilient Distributed Dataset\" (RDD) which means distributing the data to the available nodes. This could be any size of underlying cluster - the user doesn't see the details.\n", "\n", "Note that this is just a demo and normally the dataset would be available on the compute nodes." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "ParallelCollectionRDD[0] at parallelize at PythonRDD.scala:475" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rdd = sc.parallelize(X)\n", "rdd.cache()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We tell Spark to cache this dataset because k-means is an iterative algorithm and we will scan through the data many times. (In this case, the dataset is so small that it will be easily kept in memory. However, if necessary, Spark can spill to disk if the dataset grows too large.)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's see how many records are in the dataset: Call the function count." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1500" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rdd.count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For the k-means algorithm we need to define a cost function. We will use the squared Euclidean distance which is the \"distance\" between points." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from math import sqrt\n", "def cost_function(x, y):\n", " return sqrt((x[0]-y[0])**2 + (x[1]-y[1])**2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can test the cost function on two random points of the dataset:" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.9590304461225516" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cost_function(X[205], X[978])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we start the first k-means iteration: For each point in the dataset we calculate the distances to each of the centroids. Then we assign the point to the centroid to which it has the smallest distance.\n", "\n", "Spark allows us to do this quite easily: \"For each\" means we apply the \"map\" function, and define the output as a lambda. So the following means that each point x is mapped to a tuple with itself (x) as the first element, and the second element is a list of distances to all the centroids." ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true }, "outputs": [], "source": [ "d = rdd.map(lambda x: (x, [cost_function(x, centroids[i]) for i in range(k)]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "At this point, Spark has not actually \"materialised\" the dataset so no computations have actually happened because we only applied a \"transformation\". To kick off the calculation we need to use an \"action\", for example \"take\" which returns the first n elements of a dataset. Let's have a look at the first 2 entries that from our mapping." ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[(array([ 5.86749807, 8.17715188]),\n", " [0.0, 1.7740516007978733, 2.646451758613209]),\n", " (array([ 5.61369982, 9.93295527]),\n", " [1.7740516007978733, 0.0, 1.6919573226760787])]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d.take(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now that we know all the distances we can pick the centroid that have the smallest distance to our point. To do this we map each point to a tuple of itself and the centroid." ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[(0, array([ 5.86749807, 8.17715188])),\n", " (1, array([ 5.61369982, 9.93295527])),\n", " (2, array([ 7.22508428, 10.44886194])),\n", " (0, array([ 6.76282255, 0.60514535])),\n", " (0, array([ 8.0161824 , 1.54314701])),\n", " (0, array([ 8.40185356, -0.37348132])),\n", " (1, array([ 6.51192277, 9.81342902])),\n", " (0, array([ 7.3996796 , 0.91258881])),\n", " (0, array([ -4.98436335, -11.42227525])),\n", " (0, array([ 9.88825096, 0.90241392]))]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "assignments = d.map(lambda (x, c): (c.index(min(c)), x))\n", "assignments.take(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we can plot the assignments of points to their centroids." ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD8CAYAAAB0IB+mAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X2MHPd5H/Dvs0veOUsKjLikSNXyzZk1I9ipktY8qKHj\nFgyOSB0itmwDFUQsadaSexbZRHIAwVC0QKLCOMNR1cYUEEk9KZIZ3laOgFqy7NJ2zHNcAYHshjJk\nUTZNST7f0aLF44ssyuRZJG/36R+/ndvZvXnb252X3fl+gMXuzszO/DQnPjPz/N5EVUFERP0vl3QB\niIgoHgz4REQZwYBPRJQRDPhERBnBgE9ElBEM+EREGcGAT0SUEQz4REQZwYBPRJQRK5IugNO6det0\neHg46WIQEfWU559//qyqrg/aLlUBf3h4GEeOHEm6GEREPUVEZsNsx5QOEVFGMOATEWUEAz4RUUYw\n4BMRZQQDPhFRRjDgExFlBAM+EVFGMOATEWUEAz4Rpc7cXAXPPTeM7343h+eeG8bcXCXpIvWFVPW0\nJSKam6vg+PEx1GrzAIBLl2Zx/PgYAGDDhlKSRet5vMMnolSZni4vBntbrTaP6ely2/vik0IzBnwi\nSoRXML506YTr9vbysEHcflK4dGkWgC4+Kbhtn5ULAwM+ES1ROVrB8BeHkfuvOQx/cRiVo90NgH7B\neHBwyPU3g4NDoYP43FwFx47tCfWk0M6FodeJqiZdhkUjIyPK0TKJklU5WsHY18Ywf6URLAsrC5j4\n8ARKN3Qnh/7cc8P1ANsqD6AKQAA0YlMuV8DGjXvwi19M1Nc3Gxy0sHXrDICldQBuBgctXLp0AoOD\nQ1hYuIBq9ZzrNps2jWN6ury47aZN46msRxCR51V1JHA7Bnwichr+4jBmzy8NxtYaCzOfmVn2fufm\nKovB0xnMvZmgPzhooVjcgVOnDvgEccF733uwvv+gkYKbLyZ+crlC0zFzuQKuv34idUE/bMBnSoeI\nmpw4755D91oeRmvaJBwFkMemTeM4ffpJ3zv2fH6tY/9+wgd7IN+1yuO0YMAnoiZDa9xz6F7Lg3jl\n08Op4tixXVhYWJpycarV3g6x/xzaubN3Sx0Bpplor1bsMuATUZPx0XEUVhaalhVWFjA+Ot72vuw7\ne6/g2S2qFwO2WAmRsN2O8ti4cQ8GBy3PLXq1YpcBn4ialG4oYeLDE7DWWBAIrDXWsits3drUx0+Q\nzw9C9XLI7at4/fW/RbG4o36n764X0zsM+ES0ROmGEmY+M4ODHz8IANj9ld2hmme2Nud8OzCnHgdF\ntXqhvV/oZZw+/SSuv34i8E6/l+7yGfCJyJXdPHP2/CwUitnzsxj72phn0Hfb/vTbEnOpu2dh4Rw2\nbChh69YZ36B/7NhuvPzyvhhLtnwM+ER9rt1OVPb2u76yq6ktPgDMX5lHeaocevtHphUpavm9bMXi\nDpgWPm4Uv/jFw013+mntucvB04j6WGsnKvsuHYBrTr5ytIJPPv1JXKld8dyns3mmWyctp6kzwD3v\n9Q6VQa45DGx6FBg8DVy6Bpj+FHB6+zJ31rYc5uYqePnlO107ZjVTTE+XsWFDKdWDv7HjFVEfa7cT\n1br71uHcr/2DW17yqGkNQ2uGcOHyhcDtn/i3wMZ3tFVsACbYX38/kL/UWFYdBI7fFV/QFxloo7JX\nsG1bzbMXsbM3cLex4xURtd2JKih4A0BVq4s5+jDbPzoNvN3SKjPMfeamR5uDPWC+b3o0+LfdEj7Y\nY3EMoKDB35LUlYAvIo+JyGkRecmxbK2IfFtEXqm/X92NYxFReN3uRLUcU2eA+48Dp94Gamrewxg8\n3d7yJOVyBWzaZPop+A3+lrRu3eF/CcCHWpbdDWBKVTcDmKp/J6IYtduJavXA6lD73fki8LO/Bqr3\nmvedL/pvP3UG2Pl9YPRZ837eu4pg0aVr2luenHzT+DqbNo1DZKBpC5GBxQtCkroS8FX1WQBvtCy+\nCcCB+ucDAD7ajWMRUXjtdqIazA8G7nPni8AjXwOGz5sAMnzefA8K+k7/eDo4rTP9KZOzd6oOmuXp\nUltSGdtaN6q6gFdeuTPxVjtRttLZoKqv1z+fArDBbSMRGQMwBgBDQ8k/8hD1m9INpdC9ZN/4det9\n21KfnwJWtdyhr7pilj/xO+HK9AfXABLQdMeumE2ulU44raka0/u29RGmtjgeUJKtdmKptFVzuXO9\nnqvqhKqOqOrI+vXr4ygOEXkIk9sfOt/e8laj64E1K8Nte3o78L0vA//3O+Y9bcEeAC5fPtt0xx48\nYmdywzJEGfDnRORaAKi/p7CqhSjbWjtl7di8Y0nOX1pa0Z9Y474vr+WtPrUp+O6+l6hexLFju/Dd\n7+bxwgvbEbbXQRKtdqIM+M8A2FP/vAfAVyM8FhG1yW0ohAM/PIA9v7unKed/8OMHoX+psNaY4QXu\nGQUuttyhX1xplodxTXA1QY+q4c03pxB2COYkWu10JYcvIk8A2AZgnYi8BuAvAXwBwJMichuAWQA3\nd+NYRNQd5amy69AJh145tKRTVuVoBRcumwHI7Dz956dMGufEGhPsw+bvT19aXkesfuJsxhmnrgR8\nVd3psSrkNZ+I4hamU1alAtz5aAXnPjAGDDQuDk/8TvgA3+rRaeCu64F35Jf3+160YkUR+fzqxOfG\n5Vg6RBk1tGbIddgFu+K2UgHGxoD5sXJTsO/U1Bnz/qlNwIbB/srne9m8eX/i4+gAHFqBKLOCOmWV\ny8D8PIA1ISsXawg9XazdESs9I3lFJ58vpiLYAwz4RJkV1CnrhB3nz4eoXLxcAJ6aBL4yCVwsBkdy\nBXCxiNO/LHbyn5AyOQDNeapcroDf+q39yRTHBUfLJCJXw8PA7CyAGyrAh8fc0zoKYL4IfHM/cNRx\nF3tDBfjj24GBC43trqwGBi6aC8jUOHC0hNHRCu6++xNYsaLWvNuWsJTWtE8uV2gaVmFuroLp6XLs\nufqwo2Uy4BORq0oF2LWr/uWGCjBaBtbMArU8kKsC563FwN2J0dEK/uzPPo1CwUxEXqvl8Mwzn8YD\nDzy4uL5c3pWyoC+JVr4uKQ0DPhF1at064JzLCMirVgEXLza+70QFn0cZQziBExjCPRjHE+heILzv\nvu0YGZlKRdCPclz75eJ4+EQZUqmYFEwuZ94rXRqb62aP3jOtwf4RjGEYs8hBMYxZPIIx7ET3Bgj7\n7GcP4+mn90I13Fj6UVpYuJD4IGjLxTt8oh632HzSkWIvFICJCaDU4U32Yh7fx88wjGEs3WgGFt6N\nmc4K4OKOO/bhIx/5n8jlakvWxf0E0JrDD6vbuX6mdIgywisoWxYwM9PZvnO54DvqKnLIuTTLqUGQ\nx9KgHJXR0QruumsM73iHf58B1e5eGNpN8bTOeQss/8JhY0qHKCNOeDST91rejjAjlp+A+0Zuy1et\n6rRE3qamSrj//gmcOmWhVgMWFvJQBd58s4g33yyiVhOcOmV1PSXU7iBo09PlpmAPxDd6JnvaEvW4\noSH3O/xuTC8xPr40XdTqHozjEYxhFRobXUQB96B5rJiBAeATnwAefji6PPzUVAlTU/53yXfcsQ8f\n/ehDnnf5Cws5iFyNfP4cTLv6KgYHLVSrFxbHtHdqdxC0JOe85R0+UY8bHzc5e6dCwSzvVKlk6gIs\ny3ubJ1DCf8YEZmChBsFreQt/PzqBfyg2Am+xCDz2GHDoUPKVrg888CCefnrv4hNAtZrDxYurFp8A\nvvCFv8Ntt53Ftm2KbdsWsG2bYuvWGWzevB+5XPOJXs4gaEnOecscPlEfqFTMUAgnTpg7+/Hxzits\n3QTlvoPqDcLUCURJJNzxRYCaS/WDV2VrO+c/yRw+Az4RhebXaidMy6CgVj/Fonu7/+USAdauBd54\nwwTiHTuAAwf8U1RAexXey2kllVQrHahqal5btmxRIkqvyUnVQsFuDd94FYtmXZjft/7W+VJVtSz/\nbVpfo6Oqe/eqijQvLxTcyzQ5aY4hYso9MBDud168ymtZ4ffRKQBHNESMTTzIO18M+ETp5wyYltVe\ncFQ1QdYvQHpdVLxedqAvFs2r3XJ1+t/TeqFxlisuYQM+UzpEFKswKRA7Jz47C+TzQLUabt/d6nDW\njij7QYTFdvhElBi/oR6cLX9EzHtrkC6VTLBUBRYWwneUmp83F4o4RdlKqtt4h09EXRXFUA9hhniw\nebWwiVJcraS8sJUOESUiihSH20XES5yplLRgSoeIEhHFUA+tHcDy9YmlWlM9bqmUqEYS7UUM+ETU\nVV5DOnQ61ENrXl8VOHjQvy7AfjKYnTXbz86a71kN+kzpEFFXRTlcc7vS0IImDkzpEFEiwrTCiUuU\nI4n2Io6WSURdVyolE+BbRTmSaC/iHT4R9a1eaiMfBwZ8IupbaUovpQFTOkTU19KSXkqDyAO+iMwA\n+BWAKoCFMDXJRETUfXHd4f+Bqp6N6VhEROSCOXwiooyII+ArgMMi8ryIjMVwPCIichFHSueDqnpS\nRK4B8G0R+YmqPmuvrF8ExgBgKKuNY4mIYhD5Hb6qnqy/nwbwFIAbW9ZPqOqIqo6sX78+6uIQEWVW\npAFfRFaJyFX2ZwB/COClKI9JRETuok7pbADwlJgxTFcA+F+q+s2Ij0lERC4iDfiqOg3gd6M8BhER\nhcNmmUREGcGAT0SUEQz4RJRKnJqw+xjwiSh1wk5N2MlFIYsXFE5xSESpE2Zqwk6mUkzTNIzdEHaK\nQwZ8Ikod05LbnWUBO3aY4Fytuq8Pmq+23+a6DRvwOR4+EaVKpWICvte96Ows8NBD3r8PM19tVue6\nZQ6fiFKlXPYO9mGEGZLLa5t+H86LAZ+IIhG2UrR1O7dUS1hh56vN6ly3TOkQUde1VorarWyA5kpR\nt+380jl+8vnwla72NuWySeMMDZlg34sVtu3gHT4RdV253NwCBjDfy+XG90oF2LNn6XbLCfYipgJ3\n927zWQRYt86/qWWpZCpoazUT7Mvl/m+iyYBPRB0Lm5aZnTXbrFsH3HqreyubsPJ58+58InBeLM6d\nA3btAlas8L8IhG3z3w8Y8ImoI24B04+qCcaXL3tvkwsRmeyLRdATgfOicu6cudA4g3nYpxG/+ohe\n6cTFdvhE1JFOK1qTUCwCZ8+az7mc90XDbvN/4IB3J600dOJixysiioVfwEy7YhH45S9NHt+LVyWy\n3UnL64KXz5sLRRxBP2zAZ0qHiAL5pSzWrk2qVJ07d84/2APeFzO7k5ZXZ61qNX11AQz4ROQrS5Wa\n7bA7afl11mqtC0gaAz4R+Qqq1HzjjfjLlDRnJy23TlxOaRqugR2viMhX0Lgza9ea1Eg/W7ECWLPG\n/Hfm880XPDtHv2ePezPTNA3XwIBPRL6GhtwrJdMUyKK2sGAqd1euBK5cMcvs1NY//RNw6JAJ9q0V\nvGkbroEpHSJq0lpB+573LB2ueOVK4MIFs02/393barVGsLfNzwMPP9y4IKo2zpVlpW98fd7hE9Ei\nt7Ft3O7u7c5TtLQVj2p6x9XnHT4RLXKroHWzsBB9WXqZs94jTb1weYdPRIt6rcdsWuVyjcAeZtTQ\nuDDgExGA4JmmKDx75M6VK5eOGWS38Eki4DOlQ0QAOp9pKiv27vWfc9em6j1AXFJt8xnwiQhAe0Eo\nzGiW/WjvXuDBBzu/MCbVpDWjfzYiatVOEAoaf6YfDQwAv//73dlXUm3zGfCJCID7EAFhUhdZcfmy\n6U2by3V2XorF5NrmRx7wReRDInJcRF4VkbujPh4RLU+pZDoKWZYJaJYF3H67/zgxWVOtmnTOclM6\nAwPmPakmmpEGfBHJA/gbAH8E4H0AdorI+6I8JhEtn3Oe15kZk6+emGhMJ0jBnBfMvXsb34tFc8E4\nd64x6mjr7FtRi/oO/0YAr6rqtKpeBvBlADdFfEwi6qJSKZs5++WamQEOHjSfH37YvNvfWwdXu3wZ\nuPPO2IoWecB/J4CfO76/Vl9GRD0kSwOldaJY9J4/wGsoijiHqEi80lZExkTkiIgcOXPmTNLFISIX\nQWO+k+lktX+/9/wBaRB1wD8J4F2O79fVly1S1QlVHVHVkfXr10dcHCJqhz0OzO7dwG/8Rrba3xeL\n7W3/+OMm/dVup6p2j9OJqP98/wxgs4i8W0QGANwC4JmIj0lEXdCamggz/2u/sCzg5pvb295uaumV\n/ioWzVOAk/1UEJdIA76qLgD4EwDfAnAMwJOq+qMoj0lE3RF25Mx+Y09a8uST4bYXae5I5Zb+KhRM\nYH/88eZWPPZTQVxEUzR4xsjIiB45ciTpYhARTPomTHgoFEza58c/jrxIkcvlzFNMsdheZWrreapU\nzAXzxAlzxz8+Hm1gF5HnVXUkaLsMZeSIqB1hWubYszpdvBh9eeJgp6zaCfaWtXRZa3+GtMx6xYBP\nRK6CWuaINIJZUqM/dstyh0pI25y1QRjwiciVPdSCVy9b5xNAL7fTz+fbHyrBzsGnbc7aIAz4ROSp\nVAIOHHCvhAyqqOwFhYL573NLy3ixrPSlasJiwCciX26DqrXe2Tq3ARpPBfZ7sdgYOCxp+fzS/w63\nC9YKl/kAey2F04oBn4gChamEtLdRNZOcO9/PngUee6z5opGUsbGl/x1uF7UvfQmYnPS/0PUaNssk\nokSsW9f+ODKFArB1K/Cd7yx/iGLLMsG+n7BZJhGl2v79S3ue2izL3F273WEfPmzu0Ccnlzcsweys\n6TeQ1Jj0SXLJUhERRc9OjQR1UPJLofz61+0fV8QEfaAxkmXQcfoFUzpE1JOGhxuB2ymfN08Aa9cC\nv/qVGXPeJuKeCur1NA9TOkTU17w6e9Vq5uVWUex1f9vrHcfCYsAnop7k1dnLuby1dZFX66Be7jjW\nDgZ8IupJXqNS+rWTX85v+gkDPhH1pDAdwrrxm37CSlsioh7HSlsiImrCgE9ElBEM+EREGcGAT0SU\nEQz4REQZwYBPRJQRDPhERBnBgE9ElBEM+EREGcGAT0SUEQz4REQZwYBPRJQRDPhERBnBgE9ElBGR\nBXwRuVdETorIC/XXjqiORUREwVZEvP+/VtX7Iz4GERGFwJQOEVFGRB3w/1REXhSRx0TkarcNRGRM\nRI6IyJEzZ85EXBwiouzqaIpDETkMYKPLqjKA7wE4C0ABfA7Atap6q9/+OMUhEVH7wk5x2FEOX1W3\nhyzMIwC+3smxiIioM1G20rnW8fVjAF6K6lhERBQsylY694nIv4ZJ6cwA+HSExyIiogCRBXxV3R3V\nvomIqH1slklElBEM+EREGcGAT0SUEQz4REQZwYBPRJQRDPhERBnBgE9ElBEM+EREGcGAT0SUEQz4\nREQZwYAPoFIBhoeBXM68VyrdP8a+fcCKFYCIed+3r/vHICLyk/mAX6kAY2PA7Cygat7HxtoP+n4X\njX37gIceAqpV871aNd+3bw+/jzguSkTU51Q1Na8tW7Zo3CxL1YT65pdl+f9uctJsI6JaLKrm80v3\nsXev2TaXcz8GYPZj769QaF5XKJjlfuu8ymRZS9cTUX8CcERDxNiOZrzqtiRmvMrlTAhtJQLUao3v\nlQpQLgMnTgBr1wJvvQVcuRK8/717zd28l2IROHvW3LXPzi5db1nm3WvdzIwp2513AufONa8vFICJ\nCaBUCi4nEfWusDNeZT7gewXaYhFYvdqsy+Wag3+3TU4Cu3Z5rxfxvigdPGhSUPPz7r+1LwpE1L/C\nBvzM5/DHx82dsNPAgLmDty8EUQZ7ANjtM3NAPg8MDbmvGxoyTx1ewR4wTyRERAADPkolk/awLHPH\nbFnAVVeFS9d0i99DVrUKXLjgvm521v3pxMnrYkFE2ZOZgO/XyqVUMmmPgwfN99ZceJJEll+eQsE8\nwdjY0oco2zIR8MM0vXRukyadVLGIND4HnQNeDIj6X5STmCfG2aJmaMikRFrz3PPzpmWLvR3QWXBN\no4sXgU9+0nx2y/XPz5vlQHPFr30xANjCh6if9F0rHftO1q8iM2ssy1zUvFr6DA35N/skonTLbCud\noFYrWWQ/6bgZGvJuyTM7yzQPUT/pu4DPZohLDQ25Nz8VafQzcGOv72TICSJKj74L+F53ssVio9dq\n1rznPY0nn3zeLHN25rLH+HFy6+zlzPkTUe/pu4DvdidbKAD795t8dBaD/tRUI0dfrXr33M3nG30R\nvKp2Wp+g2LqHqHf0RcB3Bp1yGdizp7kjlXM8GbcLQtZ4BfNazbz8LozOJ6hujTRKRPHo+YDvFnQO\nHDCBvVYz7+Vy4w4UaPSspWbOYO71pOTsyBXU1JOI0qXn2+G32758925zYbAsk9dPU6/aJNkVtMPD\nJqjbT0TO/gzO5YB3BTkrzonSqaN2+CLyHwHcC+C9AG5U1SOOdX8O4DYAVQB3qOq3gva3nHb4fsMb\nr13LgB5Ga04/7LDKfkM6s/0+UXziaof/EoCPA3i25eDvA3ALgN8G8CEAD4pIvsNjufJqlcNgH95y\nW+OESfsQUXp0FPBV9ZiqHndZdROAL6vqJVX9GYBXAdzYybG8eAUd6kyYtIzbSKOccIUovaKqtH0n\ngJ87vr9WX9Z1zqADmKaF8/O8u+9ULheutY090qjduofBnii9AittReQwgI0uq8qq+tVOCyAiYwDG\nAGBomYO320GGY+j4c87iFaRa5QBqRP0m8A5fVber6r9yefkF+5MA3uX4fl19mdv+J1R1RFVH1q9f\n317pHTiGTrB2O5+xiSVRf4kqpfMMgFtEZFBE3g1gM4D/F9GxALApYBCR5s5nzrHy/fC8EvWPjgK+\niHxMRF4DsBXA/xGRbwGAqv4IwJMAfgzgmwD+i6q6jNjSPZzKz59qIydfKgG33x4u6PO8EvWPTlvp\nPKWq16nqoKpuUNX/4Fg3rqr/UlWvV9VvdF5Uf16tdYrFqI/cO5zDHjz4oJnS0W5hUywCK1c2b88m\nlkT9peeHVrB5NRHcv5/NNG2tOXlnC5uzZ4HHH2cTS6J+1nczXrmxpzxM23y1SRAxAZ6I+kdmZ7xy\nY9/JqjLFw5w8UXZlIuA7ZTnFw5w8UbZlLuDbuf4s3unv2cOcPFGWZS7gAybonT0LTE4mXZJ4HTqU\ndAmIKEmZDPi2Usm/12mxCOzd22i50uvYiYoo2zId8AH/XqerV5v26nbTxV6fJSvsgGg2zldL1F8y\nH/BLpfATdvfSfLhuFzF7QLQwgZvz1RL1n8wHfCDchN3A0s5daaZqhopuFXZANM5XS9R/GPARfuYm\nuwOXPcdrpy197Hl1/eR8/kJ+Q0dYlncHqzC5fM5XS9R/GPARbuYmtxTHW28BAwPN+yoUgNHR4GOK\nmLqBm2/23saygKuvdl+Xz3sPHWFfrLw6WYXpfNXJb4kopVQ1Na8tW7ZoWlmWqgn1za9i0awTMe+T\nk2b7yUnVQsH9N4DZ1m+/9jZe60QaZZucDF+GQqGx3k8nvyWieAE4oiFibOJB3vlKc8AXCQ68rSYn\nzQWh9TfOwOm1X+f+/S4YQbwuBlH/lojiEzbgZ2LwtG4YHnYffM2yTGrGT2vuf3y8kS7y2q+TSHNL\nokKBI1kSUQMHT+uysBW7bvwm+g7T1FOVwxYTUecCJzEnww6wXnfq3div151+mKcIIqIgvMNvg9+d\nejf2Ozm5/KcIIqIgDPgpEqZ5KBHRcjHgExFlBHP4KWJ37rKHNLDHrwF4l09EneMdfopw/BoiihID\nfopw/BoiihIDfopw/BoiihIDfop00rmLiCgIA36KsFkmEUWJrXRSplRigCeiaPAOn4goIxjwiYgy\nggGfiCgjGPCJiDKCAZ+IKCNSNeOViJwBEDD/07KtA3A2on13Q9rLB7CM3cIydkfayxhn+SxVXR+0\nUaoCfpRE5EiYKcCSkvbyASxjt7CM3ZH2MqaxfEzpEBFlBAM+EVFGZCngTyRdgABpLx/AMnYLy9gd\naS9j6sqXmRw+EVHWZekOn4go0/o24IvI34vIC/XXjIi84LHdjIgcrW93JMby3SsiJx1l3OGx3YdE\n5LiIvCoid8dVvvqx/5uI/EREXhSRp0TkNz22i/0cBp0XMR6or39RRN4fR7kcx3+XiPyjiPxYRH4k\nIne6bLNNRM47/h/4i5jL6Pt3S8E5vN5xbl4QkbdE5DMt28R+DkXkMRE5LSIvOZatFZFvi8gr9fer\nPX6b2L9nAICq9v0LwH8H8Bce62YArEugTPcCuCtgmzyAnwLYBGAAwA8BvC/GMv4hgBX1z38F4K/S\ncA7DnBcAOwB8A4AA+D0A34/573stgPfXP18F4GWXMm4D8PW4/98L+3dL+hy6/M1PwbQ3T/QcAvj3\nAN4P4CXHsvsA3F3/fLfbv5Wk/z2rav/e4dtERADcDOCJpMuyDDcCeFVVp1X1MoAvA7gproOr6j+o\n6kL96/cAXBfXsQOEOS83Afg7Nb4H4DdF5Nq4Cqiqr6vqD+qffwXgGIB3xnX8Lkn0HLYYBfBTVY2q\nY2ZoqvosgDdaFt8E4ED98wEAH3X5aaL/noE+Tuk4/DsAc6r6isd6BXBYRJ4XkbEYywUAf1p/VH7M\n4xHwnQB+7vj+GpILGrfC3O25ifschjkvqTl3IjIM4N8A+L7L6g/U/x/4hoj8dqwFC/67peYcArgF\n3jdtSZ5D2wZVfb3++RSADS7bJH4+e3oCFBE5DGCjy6qyqn61/nkn/O/uP6iqJ0XkGgDfFpGf1K/g\nkZYPwEMAPgfzj+5zMGmnW7tx3HaEOYciUgawAKDisZvIzmGvE5HVAP43gM+o6lstq38AYEhVL9Tr\ncJ4GsDnG4vXE301EBgB8BMCfu6xO+hwuoaoqIqls/tjTAV9Vt/utF5EVAD4OYIvPPk7W30+LyFMw\nj11d+Z8+qHyOcj4C4Osuq04CeJfj+3X1ZV0T4hz+JwB/DGBU64lIl31Edg49hDkvkZ+7ICKyEibY\nV1T1K63rnRcAVT0kIg+KyDpVjWX8lRB/t8TPYd0fAfiBqs61rkj6HDrMici1qvp6Pe112mWbxM9n\nv6d0tgP4iaq+5rZSRFaJyFX2Z5hKypfctu22llzoxzyO+88ANovIu+t3ObcAeCaO8gGmRQGAzwL4\niKrOe2yG9YnnAAABIklEQVSTxDkMc16eAfCJekuT3wNw3vHIHbl63dHfAjimqv/DY5uN9e0gIjfC\n/Hs8F1P5wvzdEj2HDp5P6UmewxbPANhT/7wHwFddtkn03zOA/m6lA+BLAG5vWfYvAByqf94EU1P+\nQwA/gkljxFW2gwCOAnix/ke/trV89e87YFp4/DTO8tWP/SpMzvGF+uvhtJxDt/MC4Hb77w3TsuRv\n6uuPAhiJ+dx9ECZd96Lj/O1oKeOf1M/ZD2EqxT8QY/lc/25pOof1MqyCCeBrHMsSPYcwF5/XAVyB\nycPfBqAIYArAKwAOA1hb3zY1/55VlT1tiYiyot9TOkREVMeAT0SUEQz4REQZwYBPRJQRDPhERBnB\ngE9ElBEM+EREGcGAT0SUEf8fXc2ijczjzvYAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "colours = ['b','g','y']\n", "for i in range(k):\n", " all_assigned = np.array(assignments.filter(lambda (a, x): a==i).map(lambda (c, x): x).collect())\n", " plt.scatter(all_assigned[:,0], all_assigned[:,1], color=colours[i])\n", " plt.scatter([centroids[i][0]], [centroids[i][1]], color=['r'])\n", "plt.show()\n", "plt.gcf().clear()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "That doesn't look very good. Both of the lower clusters are assigned to the same centroid and the one at the top is split across all three centroids. Let's see how many points are assigned to each cluster: This is a class \"Map-Reduce\" operation for counting groups." ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[(0, 1124), (1, 57), (2, 319)]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "assignments.map(lambda (c, a): (c, 1)).reduceByKey(lambda a,b: a+b).collect()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Actually, Spark has a built-in function that does this directly:" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "defaultdict(int, {0: 1124, 1: 57, 2: 319})" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "assignments.countByKey()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we start the next iteration: For each cluster we calculate a new centroid as the mean of all points in this cluster. So we first need to separate the clusters." ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[array([ 5.86749807, 8.17715188]),\n", " array([ 6.76282255, 0.60514535]),\n", " array([ 8.0161824 , 1.54314701]),\n", " array([ 8.40185356, -0.37348132]),\n", " array([ 7.3996796 , 0.91258881]),\n", " array([ -4.98436335, -11.42227525]),\n", " array([ 9.88825096, 0.90241392]),\n", " array([ 7.95311372, 8.36897664]),\n", " array([ 6.10846066, 8.23343995]),\n", " array([-6.66843698, -9.35707168])]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c0 = assignments.filter(lambda (a, x): a==0).map(lambda (a, x): x)\n", "c1 = assignments.filter(lambda (a, x): a==1).map(lambda (a, x): x)\n", "c2 = assignments.filter(lambda (a, x): a==2).map(lambda (a, x): x)\n", "c0.take(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We now calculate the mean for each cluster and that yields the new centroids." ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ 1912.08080524 -3537.79137713]\n", "1124\n", "[array([ 1.70113951, -3.14750123]), array([ 5.88201222, 9.8330787 ]), array([ 7.96381989, 9.8000387 ])]\n" ] } ], "source": [ "print(c0.sum())\n", "print(c0.count())\n", "new_centroids = [c0.sum()/c0.count(), c1.sum()/c1.count(), c2.sum()/c2.count()]\n", "print(new_centroids)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we use the same method as before to assign the points to their new centroids." ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[(1, array([ 5.86749807, 8.17715188])),\n", " (1, array([ 5.61369982, 9.93295527])),\n", " (2, array([ 7.22508428, 10.44886194])),\n", " (0, array([ 6.76282255, 0.60514535])),\n", " (0, array([ 8.0161824 , 1.54314701])),\n", " (0, array([ 8.40185356, -0.37348132])),\n", " (1, array([ 6.51192277, 9.81342902])),\n", " (0, array([ 7.3996796 , 0.91258881])),\n", " (0, array([ -4.98436335, -11.42227525])),\n", " (2, array([ 9.88825096, 0.90241392]))]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d = rdd.map(lambda x: (x, [cost_function(x, new_centroids[i]) for i in range(k)]))\n", "new_assignments = d.map(lambda (x, c): (c.index(min(c)), x))\n", "new_assignments.take(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "And plot the results." ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD8CAYAAAB0IB+mAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X9wG+d5J/DvA1CEDP+gLZCinDgAwrOiuKmbXszxRT0n\npx51baJp68TT81gDK2wcl7V97sl/+Dzucaa1J8NO6qipdZOzM4xjRxVxTj1zUW0nSn0Rc5q0M0ov\ndM6x7MiSHZZULFmkJJ/lSIxJkXjujxdLLMBdYEFggQX2+5lhQOwusJul9ey7zz7v+4qqgoiI2l+k\n2QdARESNwYBPRBQSDPhERCHBgE9EFBIM+EREIcGAT0QUEgz4REQhwYBPRBQSDPhERCHR0ewDsOvu\n7tZ0Ot3swyAiaikvvvjiGVXtqbRdoAJ+Op3GxMREsw+DiKiliMi0l+2Y0iEiCgkGfCKikGDAJyIK\nCQZ8IqKQYMAnIgoJBnwiopBgwCciCgkGfCKikGDAJ6LAmZnJ4tChNA4ejODQoTRmZrLNPqS2EKie\ntkREMzNZHD06hFxuDgAwPz+No0eHAAC9vZlmHlrLYwufiAJlcnJ4Odhbcrk5TE4OV/1dvFMoxoBP\nRE2RPZxF+tE0Ig9HkH40jexhE4zn5487bm8t9xrErTuF+flpALp8p+C0fVguDAz4RLSCWzCu5/cP\nPT+E6XPTUCimz01j6PkhZA9nEYslHT8TiyU9B/GZmSyOHBn0dKdQzYWh1YmqNvsYlvX39ytHyyRq\nLisYz10sBMv4mjhGf38Umevrk0NPP5rG9LmVAzxGJYot3Uv4Lx8WxCKF2BSJxLFhwyBOnhwFsLTi\nc7FYCps3TwFY+QzASSyWwvz8ccRiSSwunsfS0lnHbfr6RjA5Oby8bV/fSCCfI4jIi6raX3E7Bnwi\nsnMLxqmuFKbum1r192YPZzE8Pozj545DUT7uDPQAf9wnWL9WsTaWQiKxDadO7SkTxAXXXbc3H5wr\njRQsQIX9WyKReNE+I5E4Nm0aDVzQ9xrwmdIhoiLHzznn0N2We1GawilnoAe4sw/oiSnOLkTR1zeC\n2dlnyrbYo9F1trRMOd6DPRCt28PjoGDAJ6IiyS7nHLrb8kqyh7MY3DdYlCJyM9AD3L8J2LAWiAjQ\n3bmEI0dux+LiypSLXS73XtkLghFBNS17p9QRYMpEW/XBLgM+ERUZGRhBfE28aFl8TRwjAyNVf5fV\nsl9S5+BZ6s4+YG206t1A9UKFLdZAxGu3oyg2bBhELJZy3aJVH+wy4BNRkcz1GYz+/ihSXSkIBKmu\n1Kof2A6PD3tq2VvWx6rehQeCaDQG1QWP2y/hrbe+gURiW76l76wV0zsM+ES0Qub6DKbum8LeW/YC\nAHZ8e4en8szSck6nh7/lzM6v+pDLUCwtna/uE7qA2dlnsGnTaMWWfiu18hnwichRuVp5r9sLpKp9\nPjEJvOct++O7xcWz6O3NYPPmqbJB/8iRHTh27J4GHtnqMeATtblqO1FZ29/+7dtXpGPmLs5heHzY\n8/aVKnJKjZ8Gdh0Fzi14r6VphERiG+B68VKcPPm1opZ+UHvucvA0ojZW2onKaqUDcMzJZw9n8fm/\n/zwu5i66fqe9PNOpk1Y9qPU/1d0g1FkEMzNZHDu207FjVjHF5OQwenszgR78jR2viNpYtZ2ouh/p\nxtlflQ9uUYkipzkku5I4v3C+4vZeDfQA914LdK0BpKmBvkCks4qHvYItW3I4dCjt2B/A3hu43rx2\nvGILn6iNVduJykvwtkosq30gW45Vf7+akkw/eQ/2WB4DqNLgb81Ulxy+iDwpIrMi8opt2ToR+b6I\nvJ5/vaoe+yIi7+rdicovq62/D4pIJI6+PtNPodzgb81Wr4e23wTwqZJlDwIYV9WNAMbz74mogart\nRHVZ52WNOKwV/Km/b5Ro0fg6fX0jEOks2kKkc/mC0Ex1Cfiq+kMAb5csvhnAnvzvewB8ph77IiLv\nqu1EFYuuLvJufxn4l78Blh4yr9tfru7z9aq/X38A+PhtwL/79+Z1/YH6fG95uRUPY0ufjaou4vXX\ndza9asfPHH6vqr6V//0UgF6njURkCMAQACSTzb/lIWo3mesznnvJvv2r0nZbZdtfBr7+PHBpvrAn\nfc68B4Cnf8PbdzwxCQxfV9vD2vUHgE27gGj+4rF2xrwHgNmtq//eSkpTNab3bWmVU255PKBmVu00\npA5fzeXOsRxIVUdVtV9V+3t6ehpxOETkYjW5/b8cLwR7y6UXzXKvxk/XXnff90Qh2Fui82a5nxYW\nzhS12CuP2Nm8YRn8DPgzInI1AORfZ33cFxGtQmmnrG0bt63I+VfqLZs8V91yN7UG/JhLhHFbXi+q\nF3DkyO04eDCKl17aCq+dB5pRteNnwH8OwGD+90EAz/q4LyKqktNQCHt+ugeDHx0syvnvvWUv9C8U\nqS7n4QWOdzl/v9tyN7UGo/n11S2vvxzeeWccXi9dzajaqUsOX0SeBrAFQLeIvAngLwB8CcAzIvIF\nANMAbq3HvoioPpxGspy7OIf9r+9f0SkreziL8wvOA5D914HiHD4AXFhjljfS5J3FOXwAWIqZ5UFj\nL+NspLoEfFXd7rKqwX9yIvLKS6es7OEsdn5vZ9kOWdaD2b8cN2mc410m2Ht9YGupdSQF68Fs3xMm\njTO/3gR7Px/YetXRkUA0elnT58ZlT1uikEp2JR17y1oPbqsZJ+fp36g+wNsN9NRn2JzZrcEI8KU2\nbtzd9HF0AI6WSRRalTplVTt5SS3u7AvO+Dn1Fo0mAhHsAQZ8otCq1CmrlknLq9Xb0j1tLREAxeND\nRCJxfOhDu5tzOA6Y0iEKsXKdstxSPlQQicSLhlWYmclicnK46bl6Nwz4RORoZGAEt3/79mYfRkCJ\nY0Dv7c0EKsCXYkqHiBxlrs8gcUnCcd2lay6t676WgjMtR0WxWApbtuSwefNUoIO7EwZ8ojZQ7TSG\nXt36EefuMxcuXqjL91v+7/8DAjQXU1mLi+ebPgjaajGlQ9Tiqp3GsBr7X99f8/F5sfHy1qnSsaY7\nrGUQtGbl+tnCJ2pxbj1mSycbX41GVep0rWnIbupuNYOgWXPemkHWdPnC0Yi7BQZ8ohZX7TSG1aj3\nzFj1zv0HQbWDoE1ODi9PcG5p1OiZDPhELc7PaQydOmetVme0E5/76Ocqjr4ZBCLWhcnU1cdiKXR0\nOD/ArnYQtGbOecuAT9Tiqp3GsBr2zllepbpSuLv/7qIKn8QlCTx585PY//p+qMNokudK5wtpss7O\nbmzZotiyZRFbtig2b57Cxo27EYkUn+fVDILWzDlvpXQqrmbq7+/XiYmJZh8GUcvJHs5ieHwYx88d\nR7IriZGBkZof2DqRh8u3zlNdqRUjbdpFHo44BvyBHuCBDwOdvjdBBd6GLxZs2ZJbsdTtYWs1D2Gt\nHL49rVPagavq/1ciL6pqf6XtWKVD1AaqmcawFqmulGvvWy93FW69d8dPm9cHfz2Bjpz7yJzVE3R0\nrMPi4tuIxZJIJLbh1Kk9K3Lopdxa204dq0oDeKXqHWsZq3SIKNDccvqJSxJlJ0e3f97N+Glg6yfP\nIBbznj4CgCuvHMD73nc3SsfbjETiuO66vbjppjPLHaU+9KHHsGnTaH4fgmg0AZHOFZ+rJk2zmoew\nvb0ZbN481fAOXAz4ROSZ04BrY7eM4cwDZzzdYZTrvWs9J+jrG1mRKy/nnXd+gJMnH0dHxzpEowmY\nYQ9SrikSe7D9xCfO4MMffnL5AlDuc26a+RC2WkzpEFFVak0f7f707hXj7NvTQcUpj2mYSpmlMt9o\ncvKLi2eXW/XVBOxax7+JxZKOE5c3YwrDStjCJ6K6KzfUQ6VhmQF7K9xUynidHqVR9ex2TnckzZrC\nsBK28ImorrwM9VDtXYJbK9pJo1MpzXwIWy2WZRJRXaUfTTtW4lQq2SzHqZTRTSyWwubNq9tPq/Ja\nlsmUDhHVlR9DPfT2ZmzVNUBhZqmVlTmlqZSZmSwOHUq37AiX9cSUDhHVVaXJ0VfLrQa+XCql2hr5\ndseAT0R1NTIwUrYKp54qVdiUq5EPY8BnSoeI6spLFU6jtFKNfCOwhU9EddeooR4qaaUa+UZgC5+I\n2lYr1cg3AgM+EbWt4uqe1Q2d0E6Y0iGitlbr0AntxPeALyJTAH4JMxjGopfOAUREVH+NauH/tqqe\nadC+iIjIAXP4REQh0YiArwAOiMiLIjLUgP0REZGDRqR0blLVEyKyHsD3ReQ1Vf2htTJ/ERgCgGQy\nnLWxRESN4HsLX1VP5F9nAewDcGPJ+lFV7VfV/p6eHr8Ph4gotHwN+CJyqYhcbv0O4HcAvOLnPomI\nyJnfKZ1eAPtExNrX/1DVf/B5n0RE5MDXgK+qkwA+6uc+iIjIG5ZlEhGFBAM+EVFIMOATUSBxasL6\nY8AnosCxpiY0Y9nr8tSEpUG/lotCGC8oDPhEFDjlpia0eL0oOKnls62MAZ+IAsdplipr+aFDaRw7\ndg+OHBmseFFw4+WC0o44Hj4RBYppZQvMMFwrzc9P4+TJx10/72W+2rDOdcsWPhEFimllOwd7L7zM\nV+u2TbvPdcuAT0S+yB7OIv1oGpGHI0g/mkb2sHN+vHS791zSOV54na82rHPdMqVDRHWXPZzF0PND\nmLto8uTT56Yx9LwZHT1zfabsdrPvCXrXrqaFH/U8X621zeTkMObnjyMWS6Kvb6Ttp0IU1dXfOtVb\nf3+/TkxMNPswiKhG6UfTmD63sqWe6kph6r4pACbYD+4bxJIuFW0z0AMMXweYIbi8snL+hdx/R0cC\nGzfu9hTEZ2ayLR38ReRFL9PHMqVDRDXLZoF0GohEzOv0OeeHn9PvHEckAnT/dhZ37BtaEewBYPy0\n1wx+NP9qf8Bb+OTi4lkcOXI7Dh7swMGDgoMHBf/0T92OtfxhKdFkwCeimmSzwNAQMD0NqJpXvOPy\n8PNcEqrA2d8cxoIWyiIHeoCn/w0w/knzemHJS2iyLhaVLg+Fi8ri4lm89todRcHca81/uU5ardKJ\niwGfiGoyPAzMzZUsHB8BFoofimIhbpYDQFfhDmCgB7h/E7BhLRAR83qJCBYWOn05XtUFHDu2c/m9\ne4lmoea/3B1AK90hMOATUU2OO8XLwxng+VHgnRSgYl6fHzXLAeBc4Q7gzj5gbbT44x3RJczNXY5T\np1JQNXcO9bS0dBYHDwr+8R+7YVJCzkzN/9fK3gG43SEcOTIYuKDPgE9EFZXm6LO2OLZuncuHDmeA\nR6eAh3Pm9bDtIajtDmB9zPnjV1zxNrZvnwJQ7QNc75aWzgLIVdjKrQPY8aJXh28PXEufAZ+IynLK\n0Q8NFQf9qtnuAGbnnTeZnQ12Jyirk1a5zlpBG66BAZ+IynLK0c/NmeUA8Pbbq/zi/B3AE7vG8N57\nxfn+996L44kngtsJyt5Jy6kTl12QhmtgwCeishxz9Lblrikdj8bHM9i1axSnTqWQywlOnUph165R\njI+bFNDExIBjDt+P3L4bkQ5Eo4n8u+hyy31mJove3gw2bRpFoUy0WJCGa2DHKyIqK53Ol1qWSKWA\nqSmguxs4e9bfY3jkka3o7x9ffr+wEMOXv/wN3HvvTlx5pc87XxaBCeoXC0sicWzYMIizZ/fnq3SK\nB32LROKee//WwmvHKw6tQERFslmTrjl+HEgmgWuvNb/b24Zr1gDnz5uHuI1oMz7wwAHXdQ8++Efo\n6Fj0/yCQQ+kD3lxuDidPfg3FHb9M0I/FUoHrscuAT0TLrAe0Vs5+etq5da/qf6veCyvtc++9O9HV\nZQ5obu5SXLy4Fldc8TZyuQg6Olb25rVTrbUKqPSKZ4L95s1TtXypL5jSIaJlbumbVjUwkMX99w9h\n7drSnmHGwkInvvvdL+DTn95TtE3tFwHBli3mbqAR4/QwpUNEVWunYA8U7gDuvHMY69cfx7vvmifM\nV1zxNmZnk3jiiRGMj2fw6qv/dnmb2dkkDh3ahs985vEagn5kuf7+6NGh5Y5ZVi9cAE1J9bCFT0QA\nTDpnx47GVb4E3b593Y4PhHM5gYhWvBjMz8exZo0gErmwYl29Uz4cLZOIqjI8zGBv99Wv7sbCwpqi\nZQsLa/DOO3tx+nSq4udjsTmIrAz2QPNq8xnwiQiAe729k0gIIsf4eAaPPPJUUf+A8+efwi23ZDA6\nOrKis5gTt7uAZtXmM4dPRABMCabXHH6u0vAzbWJ8PLP8HKCzE3jyycJyoPBswEs1kF2zplIMwXWa\niLwYGQHiJY1WvwYta0ULC8DgoLm7ETFBf/v2KQwM5PClL+1Z0eLP5ZxPXkdHomm1+b4HfBH5lIgc\nFZE3RORBv/dHRKuTyQCjo6YHrYh5veuulReBMFtach7SwWl4iGefvcvhIhDHV7+623HU0UbwtUpH\nRKIAjgH4DwDeBPBjANtV9WdO27NKhyh4slnTsl3ynrEItVSq0Ev5K1/J4uqrTQ3+4mISu3aN4IUX\nCq17K02UqbHBH5QqnRsBvKGqk6q6AOBbAG72eZ9EVEeZTHhy9vUwNQXs3Wt+/8M/NGmfEydyuO22\nqaJgD5g00c6dK7/DL34H/PcD+IXt/Zv5ZUTUQpLBGfAx0BIJ9/kD3IaiaOQQFU1/aCsiQyIyISIT\np0+fbvbhEJEDpwe6VGzNGmD3bvf5A4LA74B/AsAHbO+vyS9bpqqjqtqvqv09PT0+Hw4RVcOa2nDH\nDuCSS8JRf29JJCpvY/fUUyb9VU1/htXspxZ+//l+DGCjiHxQRDoB3AbgOZ/3SUR1UJqaOHs2PLn8\nVAq49dbqtrcevLqlvxIJcxdgZ90VNIqvAV9VFwHcC+AFAEcAPKOqr/q5TyKqD6fURBjE4yaF9cwz\n3rYXMdtbnNJf8bgJ7E89VVz2at0VNAoHTyMiR14nN4nHTdrnZ47F1q0lEjF3MYlEdQ9TS89T6SQy\nIyP+BvaglGUSUYvyUpmTSpnOWhecxwhrOVbKqppgn3IYRy2TMeWZuZx5bWQrvhwGfCJyVKkyR6QQ\nzKp9UBk0qx1Cwkr/tAoGfCJyZA21EI06r7ffAbRynX40Wv2w0FYOfnQ0OK13LxjwichVJgPs2eP8\nELLSg8pWEI+b/39OaRk3qVTwUjVeMeATUVlOg6qVtmzt2wCFuwLrNZEw48YEQTS68v+H0wWrw2Hw\n+FZL4ZRiwCeiirw8hLS2UQUWF4tfz5wxg4TZLxrNMjS08v+H00Xtm98ExsbKX+haDcsyiagpurur\nH0cmHgc2bwZ+8IPVT8eYSplg305YlklEgbZ798qep5ZUyrSunVrYBw6YFvrY2OqGJZieNv0GmjUm\nfTNxikMiagorNVKpg1K5FMqvflX9fkUKUzlaI1lW2k+7YEqHiFpSOu08B280au4A1q0DfvlLM+a8\nRcQ5FdTqaR6mdIiorbl19srlzI/Tg2K39m2rdxzzigGfiFqSW2cv+/LS6iK36qBW7jhWDQZ8ImpJ\nbqNSlquTX81n2gkDPhG1JC8dwurxmXbCh7ZERC2OD22J/GTN/RfGYm5qWazDJ6qWNfefNR1U2Iq5\nqWWxhU9ULae5/+bmzHKiAGPAJ6qWW9F2WIq5qWUx4BNVy0sBOFEAMeATVSvsxdzUshjwiaoV9mJu\nalms0iFajUyGAZ5aDlv4REQhwYBPRBQSDPhERCHBgE9EFBIM+EREIcGAT0QUEr4FfBF5SEROiMhL\n+Z9tfu2LiIgq87sO/29UdZfP+yAiIg+Y0iEiCgm/A/6fisjLIvKkiFzltIGIDInIhIhMnD592ufD\nISIKr5qmOBSRAwA2OKwaBvAjAGcAKIAvArhaVe8o932c4pCIqHpepzisKYevqls9HszXAXynln0R\nEVFt/KzSudr29rMAXvFrX0REVJmfVTqPiMhvwqR0pgD8iY/7IiKiCnwL+Kq6w6/vJiKi6rEsk4go\nJBjwiYhCggGfiCgkGPCJiEKCAZ+IKCQY8ImIQoIBn4goJBjwiYhCggGfiCgkGPCJiEKCAR9ANguk\n00AkYl6z2frv4557gI4OQMS83nNP/fdBRFRO6AN+NgsMDQHT04CqeR0aqj7ol7to3HMP8PjjwNKS\neb+0ZN5v3er9OxpxUSKiNqeqgfm54YYbtNFSKVUT6ot/UqnynxsbM9uIqCYSqtHoyu+4+26zbSTi\nvA/AfI/1ffF48bp43Cwvt87tmFKpleuJqD0BmFAPMbamGa/qrRkzXkUiJoSWEgFyucL7bBYYHgaO\nHwfWrQPefRe4eLHy9999t2nNu0kkgDNnTKt9enrl+lTKvLqtm5oyx7ZzJ3D2bPH6eBwYHQUymcrH\nSUSty+uMV6EP+G6BNpEALrvMrItEioN/vY2NAbff7r5exP2itHevSUHNzTl/1rooEFH78hrwQ5/D\nHxkxLWG7zk7TgrcuBH4GewDYUWbmgGgUSCad1yWT5q7DLdgD5o6EiAhgwEcmY9IeqZRpMadSwOWX\ne0vX1Eu5m6ylJeD8eed109POdyd2bhcLIgqf0AT8clUumYxJe+zda96X5sKbSWT1xxOPmzsYCyt9\niMItFAHfS+mlfZsgqeURi0jh90rngBcDovbn5yTmTWOvqEkmTUqkNM89N2cqW6ztgNqCaxBduAB8\n/vPmd6dc/9ycWQ4UP/i1LgYAK3yI2knbVelYLdlyDzLDJpUyFzW3Sp9ksnzZJxEFW2irdCpVrYSR\ndafjJJl0r+SZnmaah6idtF3AZxniSsmkc/mpSKGfgRNrfS1DThBRcLRdwHdrySYShV6rYXPttYU7\nn2jULLN35rLG+LFz6uxlz/kTUetpu4Dv1JKNx4Hdu00+OoxBf3y8kKNfWnLvuRuNFvoiuD3aKb2D\nYnUPUetoi4BvDzrDw8DgYHFHKvt4Mk4XhLBxC+a5nPkpd2G030HVa6RRImqMlg/4TkFnzx4T2HM5\n8zo8XGiBAoWetVTMHszd7pTsHbkqlXoSUbC0fB1+tfXlO3aYC0MqZfL6QepV20zWA9p02gR1647I\n3p/Bvhxwf0DOB+dEwVRTHb6I/EcADwG4DsCNqjphW/dnAL4AYAnAf1bVFyp932rq8MsNb7xuHQO6\nF6U5fa/DKpcb0pn1+0SN06g6/FcA3ALghyU7/zUAtwH4CIBPAXhMRKI17suRW1UOg713q63G8ZL2\nIaLgqCngq+oRVT3qsOpmAN9S1XlV/RcAbwC4sZZ9uXELOlQbL2kZp5FGOeEKUXD59dD2/QB+YXv/\nZn5Z3dmDDmBKC+fm2LqvVSTirdrGGmnUqu5hsCcKrooPbUXkAIANDquGVfXZWg9ARIYADAFAcpWD\nt1tBhmPolGefxauSpSUOoEbUbiq28FV1q6r+usNPuWB/AsAHbO+vyS9z+v5RVe1X1f6enp7qjt6G\nY+hUVm3nM5ZYErUXv1I6zwG4TURiIvJBABsB/B+f9gWApYCViBR3PrOPlV8OzytR+6gp4IvIZ0Xk\nTQCbAXxXRF4AAFV9FcAzAH4G4B8A/CdVdRixpX44lV95qoWcfCYD3HWXt6DP80rUPmqt0tmnqteo\nakxVe1X1d23rRlT1X6nqJlX9Xu2HWp5btU4i4feeW4d92IPHHjNTOloVNokEsGZN8fYssSRqLy0/\ntILFrURw926WaVpKc/L2CpszZ4CnnmKJJVE7a7sZr5xYUx4Gbb7aZhAxAZ6I2kdoZ7xyYrVkVZni\nYU6eKLxCEfDtwpziYU6eKNxafrTMalk56cFB55me2tngIHPyRGEWuhY+YILenj3ha+nv39/sIyCi\nZgplwAcKVT3RMmN4JhLA3XcXKldaHTtREYVbaAM+UGjpuwXzyy4z9epW6WKrz5LldUA0C+erJWov\noQ74gAn6XifsbqX5cJ0uYtaAaF4CN+erJWo/oQ/4gLcJu4GVnbuCTNU5XeV1QDTOV0vUfhjw4X3m\nJqsDlzXHa601/da8uuVEyvyFyg0dkUq5d7DyksvnfLVE7YcBH95mbnJKcbz7LtDZWfxd8TgwMFB5\nnyLm2cCtt7pvk0oBV13lvC4adR86wrpYuXWy8tL5qpbPElFAqWpgfm644QYNqlRK1YT64p9EwqwT\nMa9jY2b7sTHVeNz5M4DZttz3Wtu4rRMpHNvYmPdjiMcL68up5bNE1FgAJtRDjG16kLf/BDngi1QO\nvKXGxswFofQz9sDp9r327y93wajE7WLg92eJqHG8BvxQDJ5WD+m08+BrqZRJzZRTmvsfGSmki9y+\n106kuJIoHudIlkRUwMHT6szrg10n5Sb69lLqqcphi4modqEbS2e1rADr1lKvx/e6tfS93EUQEVXC\nFn4VyrXU6/G9Y2Orv4sgIqqEAT9AvJSHEhGtFgM+EVFIMIcfIFbnLmtIA2v8GoCtfCKqHVv4AcLx\na4jITwz4AcLxa4jITwz4AcLxa4jITwz4AVJL5y4iokoY8AOEZZlE5CdW6QRMJsMAT0T+YAufiCgk\nGPCJiEKCAZ+IKCQY8ImIQoIBn4goJAI145WInAZQYf6nVesGcMan766HoB8fwGOsFx5jfQT9GBt5\nfClV7am0UaACvp9EZMLLFGDNEvTjA3iM9cJjrI+gH2MQj48pHSKikGDAJyIKiTAF/NFmH0AFQT8+\ngMdYLzzG+gj6MQbu+EKTwyciCrswtfCJiEKtbQO+iPydiLyU/5kSkZdctpsSkcP57SYaeHwPicgJ\n2zFuc9nuUyJyVETeEJEHG3V8+X1/WUReE5GXRWSfiFzpsl3Dz2Gl8yLGf8uvf1lEPtaI47Lt/wMi\n8r9F5Gci8qqI7HTYZouInLP9N/DnDT7Gsn+3AJzDTbZz85KIvCsi95Vs0/BzKCJPisisiLxiW7ZO\nRL4vIq/nX69y+WzT/j0DAFS17X8A/DWAP3dZNwWguwnH9BCA+ytsEwXwcwB9ADoB/BTArzXwGH8H\nQEf+978C8FdBOIdezguAbQC+B0AAfBzAPzf473s1gI/lf78cwDGHY9wC4DuN/m/P69+t2efQ4W9+\nCqbevKnnEMAnAXwMwCu2ZY8AeDD/+4NO/1aa/e9ZVdu3hW8REQFwK4Cnm30sq3AjgDdUdVJVFwB8\nC8DNjdocujESAAADMUlEQVS5qv4vVV3Mv/0RgGsate8KvJyXmwH8rRo/AnCliFzdqANU1bdU9Sf5\n338J4AiA9zdq/3XS1HNYYgDAz1XVr46ZnqnqDwG8XbL4ZgB78r/vAfAZh4829d8z0MYpHZtPAJhR\n1ddd1iuAAyLyoogMNfC4AOBP87fKT7rcAr4fwC9s799E84LGHTCtPSeNPodezktgzp2IpAH8awD/\n7LD6t/L/DXxPRD7S0AOr/HcLzDkEcBvcG23NPIeWXlV9K//7KQC9Dts0/Xy29AQoInIAwAaHVcOq\n+mz+9+0o37q/SVVPiMh6AN8XkdfyV3Bfjw/A4wC+CPOP7oswaac76rHfang5hyIyDGARQNbla3w7\nh61ORC4D8D8B3Keq75as/gmApKqezz/D+XsAGxt4eC3xdxORTgB/AODPHFY3+xyuoKoqIoEsf2zp\ngK+qW8utF5EOALcAuKHMd5zIv86KyD6Y2666/Edf6fhsx/l1AN9xWHUCwAds76/JL6sbD+fwjwD8\nHoABzSciHb7Dt3Powst58f3cVSIia2CCfVZVv1263n4BUNX9IvKYiHSrakPGX/Hwd2v6Ocz7NICf\nqOpM6Ypmn0ObGRG5WlXfyqe9Zh22afr5bPeUzlYAr6nqm04rReRSEbnc+h3mIeUrTtvWW0ku9LMu\n+/0xgI0i8sF8K+c2AM814vgAU1EA4AEAf6Cqcy7bNOMcejkvzwH4XL7S5OMAztluuX2Xf3b0DQBH\nVPUrLttsyG8HEbkR5t/j2QYdn5e/W1PPoY3rXXozz2GJ5wAM5n8fBPCswzZN/fcMoL2rdAB8E8Bd\nJcveB2B//vc+mCflPwXwKkwao1HHthfAYQAv5//oV5ceX/79NpgKj5838vjy+34DJuf4Uv7na0E5\nh07nBcBd1t8bprLkv+fXHwbQ3+BzdxNMuu5l2/nbVnKM9+bP2U9hHor/VgOPz/HvFqRzmD+GS2EC\neJdtWVPPIczF5y0AF2Hy8F8AkAAwDuB1AAcArMtvG5h/z6rKnrZERGHR7ikdIiLKY8AnIgoJBnwi\nopBgwCciCgkGfCKikGDAJyIKCQZ8IqKQYMAnIgqJ/w9fqTIgdluVOQAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "colours = ['b','g','y']\n", "for i in range(k):\n", " all_assigned = np.array(new_assignments.filter(lambda (a, x): a==i).map(lambda (c, x): x).collect())\n", " plt.scatter(all_assigned[:,0], all_assigned[:,1], color=colours[i])\n", " plt.scatter([new_centroids[i][0]], [new_centroids[i][1]], color=['r'])\n", "plt.show()\n", "plt.gcf().clear()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now let's make this more efficient: We create functions for assigning points to centroids, and for recalculating the centroids as the mean of the points in each cluster. The combination of those functions gives us an iteration." ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def assign(rdd, centroids):\n", " d = rdd.map(lambda x: (x, [cost_function(x, centroids[i]) for i in range(k)]))\n", " return d.map(lambda (x, c): (c.index(min(c)), x))\n", " \n", "def recalculate_centroids(assignments):\n", " new_centroids = []\n", " for i in range(k):\n", " a = assignments.filter(lambda (a, x): a==i).map(lambda (a, x): x)\n", " c = a.sum()/a.count()\n", " new_centroids.append(c)\n", " return new_centroids\n", "\n", "def iteration(rdd, centroids):\n", " assigned = assign(rdd, centroids)\n", " new_centroids = recalculate_centroids(assigned)\n", " return assigned, new_centroids\n", "\n", "assigned, new_centroids = iteration(rdd, new_centroids)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To visually check the results let's define a function for plotting the current cluster assignments." ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def draw_clusters(assignments, centroids):\n", " colours = ['b','g','y']\n", " for i in range(k):\n", " all_assigned = np.array(assignments.filter(lambda (a, x): a==i).map(lambda (c, x): x).collect())\n", " plt.scatter(all_assigned[:,0], all_assigned[:,1], color=colours[i])\n", " plt.scatter([centroids[i][0]], [centroids[i][1]], color=['r'])\n", " plt.show()\n", " plt.gcf().clear()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD8CAYAAAB0IB+mAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X9wHOWZJ/DvMyNpzPBD4JEskxCNosNx2CyXXFBx670k\n5S359hLX7pJQexSuwdEm4XTAsWf+4Cj2VLULtaWtLGGz+CoHKcWBeK255Ki6eIHEWS4W58tulbMX\nkSMYYmyIVlKwsWSbw8RWsCzNc3+805qeUfdMj2Z6umf6+6kSM9Pd0/3Ssp5+++n3h6gqiIio9cWC\nLgARETUGAz4RUUQw4BMRRQQDPhFRRDDgExFFBAM+EVFEMOATEUUEAz4RUUQw4BMRRURb0AWw6+rq\n0r6+vqCLQUTUVF588cUzqtpdabtQBfy+vj5MTk4GXQwioqYiIjNetmNKh4goIhjwiYgiggGfiCgi\nGPCJiCKCAZ+IKCIY8ImIIoIBn4goIhjwiYgiggGfiEJnbi6Lw4f7cOhQDIcP92FuLht0kVpCqHra\nEhHNzWVx7NgwcrkFAMDFizM4dmwYANDTkwmyaE2PNXwiCpWpqZGVYG/J5RYwNTVS9b54p1CMAZ+I\nApE9kkXfY32IPRxD32N9yB4xwfjixVnH7a3lXoO4dadw8eIMAF25U3DaPioXBgZ8IlrFLRjXc//D\nzw1j5twMFIqZczMYfm4Y2SNZJBK9jt9JJHo9B/G5uSyOHh3ydKdQzYWh2YmqBl2GFQMDA8rRMomC\nZQXjhUuFYJlsT2Ls98eQubE+OfS+x/owc271AI9xiWNr1zL+04cFiVghNsViSWzcOISTJ8cALK/6\nXiKRxpYt0wBWPwNwkkikcfHiLBKJXiwtncfy8lnHbfr7RzE1NbKybX//aCifI4jIi6o6UHE7Bnwi\nsnMLxunONKbvm17zfrNHshiZGMHsuVkoysedwW7g3/ULNqxTrEukkUptx6lTe8sEccENN+zLB+dK\nIwULUOH4llgsWXTMWCyJzZvHQhf0vQZ8pnSIqMjsOeccuttyL0pTOOUMdgN39gPdCcXZxTj6+0cx\nP/902Rp7PL7elpYpx3uwB+J1e3gcFgz4RFSkt9M5h+62vJLskSyG9g8VpYjcDHYD928GNq4DYgJ0\ndSzj6NE7sLS0OuVil8u9V/aCYMRQTc3eKXUEmGaizfpglwGfiIqMDo4i2Z4sWpZsT2J0cLTqfVk1\n+2V1Dp6l7uwH1sWrPgxUL1TYoh0iXrsdxbFx4xASibTrFs36YJcBn4iKZG7MYOz3x5DuTEMgSHem\n1/zAdmRixFPN3rIhUfUhPBDE4wmoLnrcfhlvvfVNpFLb8zV9Z82Y3mHAJ6JVMjdmMH3fNPbdug8A\nsPO7Oz01zyxtzun08Lec+YtrLnIZiuXl89V9QxcxP/80Nm8eq1jTb6ZaPgM+ETkq11be6/YCqeqY\ne6aA97xlf3y3tHQWPT0ZbNkyXTboHz26E8eP39PAkq0dAz5Ri6u2E5W1/R3fvWNVOmbh0gJGJkY8\nb1+pRU6pidPAo8eAc4ve29I0Qiq1HXC9eClOnvx6UU0/rD13OXgaUQsr7URl1dIBOObks0ey+MLf\nfgGXcpdc92lvnunUSase1PpPdTcIdRbD3FwWx4/vcuyYVUwxNTWCnp5MqAd/Y8crohZWbSeqrke6\ncPbX5YNbXOLIaQ69nb04v3i+4vZeDXYD914PdLYDEmigLxDpqOJhr2Dr1hwOH+5z7A9g7w1cb147\nXrGGT9TCqu1E5SV4W00sq30gW47V/n4tTTL95D3YY2UMoEqDvwWpLjl8EXlSROZF5BXbsvUi8kMR\neT3/ek09jkVE3tW7E5Vf1tr+PixisST6+00/hXKDvwWtXg9tvwXg0yXLHgQwoaqbAEzkPxNRA1Xb\nieqKjisaUaxV/Gl/3yjxovF1+vtHIdJRtIVIx8oFIUh1Cfiq+iMAb5csvgXA3vz7vQA+W49jEZF3\n1XaiSsSDibz+tL9vlNyqh7Glz0ZVl/D667sCb7XjZw6/R1Xfyr8/BaDHaSMRGQYwDAC9vcHf8hC1\nmsyNGc+9ZN/+dWm9rTH2TAEjN4TnYW01SlM1pvdtaSun3Mp4QEG22mlIO3w1lzvH5kCqOqaqA6o6\n0N3d3YjiEJGLoHL7E6fD1e6+GouLZ4pq7JVH7AxuWAY/A/6ciFwLAPnXeR+PRURrUNopa/um7aty\n/tX2ll2rZg34qhdw9OgdOHQojpde2gavnQeCaLXjZ8B/FsBQ/v0QgGd8PBYRVclpKIS9P9uLoY8O\nFeX89926D/pninSn+/ACTna8DPzTXwPLD5nXHS+X3775u/3n8M47E/B66Qqi1U5dcvgi8m0AWwF0\nicibAP4MwJcBPC0iXwIwA+C2ehyLiOrDaSTLhUsLOPD6gVWdsrJHsji/6H0Ash0vA994Drg8n8ru\nO2c+A8C3/3ktpW4N9macjVSXgK+qO1xWDdZj/0RUf146ZWWPZLHrB7uq7k37FxOFYG+5/JJZ7hbw\nAx9JwUdtbSnE41cEPjcue9oSRVRvZ69jb1nrwW0t4+T0nqtu+WB36wZ7ANi0aXfg4+gArZA2I6I1\nqdQpq9rJS+xmO6tbfmd/czbJ9CIeT4Ui2AMM+ESRValTVi2Tlv/nQeBCe/GyC+1muZOepu5pa4kB\nKB4fIhZL4kMf2h1McRwwpUMUYeU6ZbmlfLyw8vR/MWHSOLOdJtg34oHthoNA/x4gMQ9c3ABM3QnM\nb/PnWLFYsmhYhbm5LKamRgLP1bvh8MhE5Ch7JIs7vntHQ471wqfqk9LZcBDY/CgQtw3VsJwAjt1f\n76AvoQroXodHZkqHiBxlbswgdVnKcd3l7ZfX9VjLdap39u8pDvaA+dy/pz77B8y49lu35rBly3Qo\ngn01GPCJWkC10xh6ddtHnLvPXLh0oS77t/zf/wfUI9mQcOnP77Z8LZaWzgc+CNpaMeATNblqJxuv\nxoHXD9ShhJVturI+KZ2LG6pbvhZmukNdGQRtLUE/qDlvGfCJmpxbj9nSycbXopaWOtXobK+8jRdT\nd5qcvd1ywiz3w1oGQbPmvDWDrNV24agWAz5Rk6t2GsNq1Hv0zHrn/kvNbzMPaN/rAVTMa/0f2Bar\ndhC0qamRlQnOLY0aPZMBn6jJ+TmNoVPnrLXqiHfg8x/9vO+jb85vA378HeB/v2Be1xLsRawLk2lX\nn0ik0dbm/AC72kHQgpzzlgGfqMlVO41hNeyds7xKd6Zx98DdRS18Upel8OQtT+LA6wegDqNJniud\nLyRgHR1d2LpVsXXrErZuVWzZMo1Nm3YjFis+z2sZBC3IOW/ZDp+oBWSPZDEyMYLZc7Po7ezF6OCo\n51muqiEPl6+dpzvTq0batIs9HHMM+IPdwAMfBjp8r4IKvA1fLNi6NbdqqVvHqmo6XFk5fHtap7QD\nV9X/Vx7b4bOnLVELqGYaw1qkO9OuvW+93FW49d6dOG1eH/zNFNpy1Y3MWZ6grW09lpbeRiLRi1Rq\nO06d2rsqh17Krbbd05NZFZRLA3ilKQytZUH0yGVKh4g8c8vppy5LlZ0c3f59NxOngW2fOoNEorqJ\nVq6+ehDve9/dKB1vMxZL4oYb9uETnziz0lHqQx96HJs3j+WPIYjHUxDpWPW9atI0a3kI29OTwZYt\n0w3vwMWAT0SeOQ24Nn7rOM48cMbTHUa53rvWc4L+/tFVufJy3nnnBZw8+QTa2tYjHk/BDHuQdk2R\n2IPtJz95Bh/+8JMrF4By33MT5EPYajGlQ0RVqTV9tPszu1eNs29PBxWnPGZgWsosl9mjyckvLZ1d\nqdVXE7Cd0jTVSCR6HScuD2IKw0pYwyeiuis31EOlYZkBey3ctJTxOj1Ko9qz2zndkQQ1hWElrOET\nUV2VzpRlDfUAYCWoV3uX4FaLdtLoVEqQD2GrxWaZRFRXfY/1ObbEqdRksxynpoxuEok0tmxZ23Ga\nFYdHJqJA+DHUQ09Pxta6BijMLLW6ZU5pKiWogcrCiCkdIqqrSpOjr5VbG/hyqZRq28i3OgZ8Iqqr\n0cHRsq1w6qlSC5tybeSjGPCZ0iGiuvLSCqdRmqmNfCOwhk9EddeooR4qaaY28o3AGj4RtaxmaiPf\nCAz4RNSyilv3rG3ohFbClA4RtbRah05oJb4HfBGZBvArmMEwlrx0DiAiovprVA3/d1T1TIOORURE\nDpjDJyKKiEYEfAVwUEReFJHhBhyPiIgcNCKl8wlVPSEiGwD8UEReU9UfWSvzF4FhAOjtjWbbWCKi\nRvC9hq+qJ/Kv8wD2A7i5ZP2Yqg6o6kB3d7ffxSEiiixfA76IXC4iV1rvAfwugFf8PCYRETnzO6XT\nA2C/iFjH+m+q+nc+H5OIiBz4GvBVdQrAR/08BhERecNmmUREEcGAT0QUEQz4RBRKnJqw/hjwiSh0\nrKkJzVj2ujI1YWnQr+WiEMULCgM+EYVOuakJLV4vCk5q+W4zY8AnotBxmqXKWn74cB+OH78HR48O\nVbwouPFyQWlFHA+fiELF1LIFZhiu1S5enMHJk0+4ft/LfLVRneuWNXwiChVTy3YO9l54ma/WbZtW\nn+uWAZ+IfJE9kkXfY32IPRxD32N9yB5xzo+XbveeSzrHC6/z1UZ1rlumdIio7rJHshh+bhgLl0ye\nfObcDIafM6OjZ27MlN1u/j1Bz7q11PDjnuertbaZmhrBxYuzSCR60d8/2vJTIYrq2m+d6m1gYEAn\nJyeDLgYR1ajvsT7MnFtdU093pjF93zQAE+yH9g9hWZeLthnsBkZuAMwQXF5ZOf9C7r+tLYVNm3Z7\nCuJzc9mmDv4i8qKX6WOZ0iGimmWzQF8fEIuZ15lzzg8/Z96ZRSwGdP1OFl/cP7wq2APAxGmvGfx4\n/tX+gLfwzaWlszh69A4cOtSGQ4cEhw4J/uEfuhzb8keliSYDPhHVJJsFhoeBmRlA1bziHZeHn+d6\noQqc/dgIFrXQLHKwG/j2vwQmPmVeLyx7CU3WxaLS5aFwUVlaOovXXvtiUTD32ua/XCetZunExYBP\nRDUZGQEWFkoWTowCi8UPRbGYNMsBoLNwBzDYDdy/Gdi4DoiJeb1MBIuLHb6UV3URx4/vWvns3kSz\n0Oa/3B1AM90hMOATUU1mneLlkQzw3BjwThpQMa/PjZnlAHCucAdwZz+wLl789bb4MhYWrsSpU2mo\nmjuHelpePotDhwR///ddMCkhZ6bN/9fL3gG43SEcPToUuqDPgE9EFZXm6LO2OLZ+vcuXjmSAx6aB\nh3Pm9YjtIajtDmBDwvnrV131NnbsmAZQ7QNc75aXzwLIVdjKrQPYbNGrw95DV9NnwCeispxy9MPD\nxUG/arY7gPmLzpvMz4e7E5TVSatcZ62wDdfAgE9EZTnl6BcWzHIAePvtNe44fwew59FxvPdecb7/\nvfeS2LMnvJ2g7J20nDpx2YVpuAYGfCIqyzFHb1vumtLxaGIig0cfHcOpU2nkcoJTp9J49NExTEyY\nFNDk5KBjDt+P3L4bkTbE46n8p/hKzX1uLouengw2bx5DoZlosTAN18COV0RUVl9fvqlliXQamJ4G\nurqAs2f9LcMjj2zDwMDEyufFxQS+8pVv4t57d+Hqq30++IoYTFC/VFgSS2LjxiGcPXsg30qneNC3\nWCzpufdvLbx2vOLQCkRUJJs16ZrZWaC3F7j+evPeXjdsbwfOnzcPcRtRZ3zggYOu6x588I/Q1rbk\nfyGQQ+kD3lxuASdPfh3FHb9M0E8k0qHrscuAT0QrrAe0Vs5+Zsa5dq/qf63eCyvtc++9u9DZaQq0\nsHA5Ll1ah6uuehu5XAxtbat789qp1toKqPSKZ4L9li3TtezUF0zpENEKt/RNsxoczOL++4exbl1p\nzzBjcbED3//+l/CZz+wt2qb2i4Bg61ZzN9CIcXqY0iGiqrVSsAcKdwB33jmCDRtm8e675gnzVVe9\njfn5XuzZM4qJiQxeffVfrWwzP9+Lw4e347OffaKGoB9baX9/7NjwSscsqxcugEBSPazhExEAk87Z\nubNxLV/Cbv/+LscHwrmcQEQrXgwuXkyivV0Qi11Yta7eKR+OlklEVRkZYbC3+9rXdmNxsb1o2eJi\nO955Zx9On05X/H4isQCR1cEeCK5tPgM+EQFwb2/vJBaByDExkcEjjzxV1D/g/PmncOutGYyNja7q\nLObE7S4gqLb5zOETEQDTBNNrDj9XafiZFjExkVl5DtDRATz5ZGE5UHg24KU1kF1QUylG4DpNRF6M\njgLJkkqrX4OWNaPFRWBoyNzdiJigv2PHNAYHc/jyl/euqvHncs4nr60tFVjbfN8Dvoh8WkSOicgb\nIvKg38cjorXJZICxMdODVsS83nXX6otAlC0vOw/p4DQ8xDPP3OVwEUjia1/b7TjqaCP42kpHROIA\njgP41wDeBPATADtU9edO27OVDlH4ZLOmZrvsPWMRael0oZfyV7+axbXXmjb4S0u9ePTRUTz/fKF2\nb6WJMjVW+MPSSudmAG+o6pSqLgL4DoBbfD4mEdVRJhOdnH09TE8D+/aZ93/4hybtc+JEDrffPl0U\n7AGTJtq1a/U+/OJ3wH8/gF/aPr+ZX0ZETaQ3PAM+hloq5T5/gNtQFI0coiLwh7YiMiwikyIyefr0\n6aCLQ0QOnB7oUrH2dmD3bvf5A8LA74B/AsAHbJ+vyy9boapjqjqgqgPd3d0+F4eIqmFNbbhzJ3DZ\nZdFof29JpSpvY/fUUyb9VU1/hrUcpxZ+//p+AmCTiHxQRDoA3A7gWZ+PSUR1UJqaOHs2Orn8dBq4\n7bbqtrcevLqlv1IpcxdgZ90VNIqvAV9VlwDcC+B5AEcBPK2qr/p5TCKqD6fURBQkkyaF9fTT3rYX\nMdtbnNJfyaQJ7E89Vdzs1boraBQOnkZEjrxObpJMmrTPzx0bWzeXWMzcxaRS1T1MLT1PpZPIjI76\nG9jD0iyTiJqUl5Y56bTprHXBeYywpmOlrKoJ9mmHcdQyGdM8M5czr42sxZfDgE9Ejiq1zBEpBLNq\nH1SGzVqHkLDSP82CAZ+IHFlDLcTjzuvtdwDN3E4/Hq9+WGgrBz82Fp7auxcM+ETkKpMB9u51fghZ\n6UFlM0gmzf+fU1rGTTodvlSNVwz4RFSW06BqpTVb+zZA4a7Aek2lzLgxYRCPr/7/cLpgtTkMHt9s\nKZxSDPhEVJGXh5DWNqrA0lLx65kzZpAw+0UjKMPDq/8/nC5q3/oWMD5e/kLXbNgsk4gC0dVV/Tgy\nySSwZQvwwgtrn44xnTbBvpWwWSYRhdru3at7nlrSaVO7dqphHzxoaujj42sblmBmxvQbCGpM+iBx\nikMiCoSVGqnUQalcCuXXv67+uCKFqRytkSwrHadVMKVDRE2pr895Dt543NwBrF8P/OpXZsx5i4hz\nKqjZ0zxM6RBRS3Pr7JXLmR+nB8Vu9dtm7zjmFQM+ETUlt85e9uWlrYvcWgc1c8exajDgE1FTchuV\nslw7+bV8p5Uw4BNRU/LSIawe32klfGhLRNTk+NCWiIiKMOATEUUEAz4RUUQw4BMRRQQDPhFRRDDg\nExFFBAM+EVFEMOATEUUEAz4RUUQw4BMRRQQDPhFRRDDgExFFBAM+EVFEMOATEUWEbwFfRB4SkRMi\n8lL+Z7tfxyLyJJs1E6HGYuY1mw26REQN1ebz/v9aVR/1+RhElWWzwPAwsLBgPs/MmM9AdGa/oMhj\nSoeiYWSkEOwtCwtmOVFE+B3w/1hEXhaRJ0XkGqcNRGRYRCZFZPL06dM+F4cia3a2uuVELaimgC8i\nB0XkFYefWwA8AaAfwMcAvAXgr5z2oapjqjqgqgPd3d21FIfIXW9vdcuJWlBNOXxV3eZlOxH5BoDv\n1XIsopqMjhbn8AEgmTTLiSLCz1Y619o+fg7AK34di6iiTAYYGwPSaUDEvI6N8YEtRYqfrXQeEZGP\nAVAA0wD+vY/HIqosk2GAp0jzLeCr6k6/9k1ERNVjs0wioohgwCciiggGfCKiiGDAJyKKCAZ8IqKI\nYMAnIooIBnwioohgwCciiggGfCKiiGDAJyKKCAZ8NGbmu3vuAdrazLhdbW3mMxFRI0U+4Fsz383M\nAKqFme+qDfrlLhr33AM88QSwvGw+Ly+bz9u2ed8Hp2Mlopqpamh+brrpJm20dFrVhPrin3S6/PfG\nx802IqqplGo8vnofd99tto3FnI8BmP1Y+0smi9clk2Z5uXVuZUqnV68notYEYFI9xFgx24bDwMCA\nTk5ONvSYsZgJoaVEgFyu8DmbNdOfzs4C69cD774LXLpUef93321q825SKeDMGVNrn5lZvT6dNq9u\n66anTdl27QLOni1en0xyyHeiKBCRF1V1oOJ2UQ/4boE2lQKuuMKsi8WKg3+9jY8Dd9zhvl7E/aK0\nb9/qiZzsrIsCEbUurwE/8jn80VFTE7br6DA1eOtC4GewB4CdZWYOiMfLT8c6MuIe7AHO0U1EBZEP\n+E4z3115pbd0Tb2Uu8laXgbOn3deNzPjfHdixzm6icgSmYBfrpVLJmPSHvv2mc+lufAgiay9PKVz\ndLOlD1G0RSLge2l6ad8mTGp5xCJSeF/pHPBiQNT6/JzEPDD2FjW9vSYlUprnXlgwLVus7YDagmsY\nXbgAfOEL5r1Trn9hwSwHih/8WhcDgC18iFpJy7XSsWqy5R5kRk06bS5qbi19envLN/skonCLbCud\nSq1Wosi603HS2+vekmdmhmkeolbScgGfzRBX6+11bn4qUuhn4MRaX8uQE0QUHi0X8N1qsqlUoddq\n1Fx/feHOJx43y+yduawxfuycOnvZc/5E1HxaLuA71WSTSWD3bpOPjmLQn5go5OiXl9177sbjhb4I\nbo92Su+g2LqHqHm0RMC3B52REWBoqLgjlX08GacLQtS4BfNczvyUuzDa76DqNdIoETVG0wd8p6Cz\nd68J7LmceR0ZKdRAgULPWipmD+Zud0r2jlyVmnoSUbg0fTv8atuX79xpLgzptMnrh6lXbZCsB7R9\nfSaoW3dE9v4M9uWA+wNyPjgnCqea2uGLyL8F8BCAGwDcrKqTtnV/AuBLAJYB/EdVfb7S/tbSDr/c\n8Mbr1zOge1Ga0/c6rHK5IZ3Zfp+ocRrVDv8VALcC+FHJwX8DwO0APgLg0wAeF5F4jcdy5NYqh8He\nu7W2xvGS9iGi8Kgp4KvqUVU95rDqFgDfUdWLqvpPAN4AcHMtx3LjFnSoNl7SMk4jjXLCFaLw8uuh\n7fsB/NL2+c38srqzBx3ANC1cWGDtvlaxmLfWNtZIo1brHgZ7ovCq+NBWRA4C2OiwakRVn6m1ACIy\nDGAYAHrXOHi7FWQ4hk559lm8Klle5gBqRK2mYg1fVbep6m86/JQL9icAfMD2+br8Mqf9j6nqgKoO\ndHd3V1d6G46hU1m1nc/YxJKotfiV0nkWwO0ikhCRDwLYBOD/+HQsAGwKWIlIcecz+1j55fC8ErWO\nmgK+iHxORN4EsAXA90XkeQBQ1VcBPA3g5wD+DsB/UFWHEVvqh1P5ladayMlnMsBdd3kL+jyvRK2j\n1lY6+1X1OlVNqGqPqv4b27pRVf1nqrpZVX9Qe1HLc2utk0r5feTmYR/24PHHzZSOVgubVApoby/e\nnk0siVpL0w+tYHFrIrh7N5tpWkpz8vYWNmfOAE89xSaWRK2s5Wa8cmJNeRi2+WqDIGICPBG1jsjO\neOXEqsmqMsXDnDxRdEUi4NtFOcXDnDxRtDX9aJnVsnLSQ0POMz21sqEh5uSJoixyNXzABL29e6NX\n0z9wIOgSEFGQIhnwgUKrnniZMTxTKeDuuwstV5odO1ERRVtkAz5QqOm7BfMrrjDt1a2mi80+S5bX\nAdEsnK+WqLVEOuADJuh7nbC7mebDdbqIWQOieQncnK+WqPVEPuAD3ibsBlZ37gozVed0ldcB0Thf\nLVHrYcCH95mbrA5c1hyvtbbpt+bVLSdW5jdUbuiIdNq9g5WXXD7nqyVqPQz48DZzk1OK4913gY6O\n4n0lk8DgYOVjiphnA7fd5r5NOg1cc43zunjcfegI62Ll1snKS+erWr5LRCGlqqH5uemmmzSs0mlV\nE+qLf1Ips07EvI6Pm+3Hx1WTSefvAGbbcvu1tnFbJ1Io2/i49zIkk4X15dTyXSJqLACT6iHGBh7k\n7T9hDvgilQNvqfFxc0Eo/Y49cLrt177/cheMStwuBn5/l4gax2vAj8TgafXQ1+c8+Fo6bVIz5ZTm\n/kdHC+kit/3aiRS3JEomOZIlERVw8LQ68/pg10m5ib69NPVU5bDFRFS7yI2ls1ZWgHWrqddjv241\nfS93EURElbCGX4VyNfV67Hd8fO13EURElTDgh4iX5qFERGvFgE9EFBHM4YeI1bnLGtLAGr8GYC2f\niGrHGn6IcPwaIvITA36IcPwaIvITA36IcPwaIvITA36I1NK5i4ioEgb8EGGzTCLyE1vphEwmwwBP\nRP5gDZ+IKCIY8ImIIoIBn4goIhjwiYgiggGfiCgiQjXjlYicBlBh/qc16wJwxqd910PYywewjPXC\nMtZH2MvYyPKlVbW70kahCvh+EpFJL1OABSXs5QNYxnphGesj7GUMY/mY0iEiiggGfCKiiIhSwB8L\nugAVhL18AMtYLyxjfYS9jKErX2Ry+EREURelGj4RUaS1bMAXkf8uIi/lf6ZF5CWX7aZF5Eh+u8kG\nlu8hETlhK+N2l+0+LSLHROQNEXmwUeXLH/srIvKaiLwsIvtF5GqX7Rp+DiudFzH+S379yyLy8UaU\ny3b8D4jI/xKRn4vIqyKyy2GbrSJyzvZv4E8bXMayv7cQnMPNtnPzkoi8KyL3lWzT8HMoIk+KyLyI\nvGJbtl5Efigir+dfr3H5bmB/zwAAVW35HwB/BeBPXdZNA+gKoEwPAbi/wjZxAL8A0A+gA8DPAPxG\nA8v4uwDa8u//EsBfhuEcejkvALYD+AEAAfBbAP6xwb/fawF8PP/+SgDHHcq4FcD3Gv1vz+vvLehz\n6PA7PwXT3jzQcwjgUwA+DuAV27JHADyYf/+g099K0H/Pqtq6NXyLiAiA2wB8O+iyrMHNAN5Q1SlV\nXQTwHQCs+O/JAAADMElEQVS3NOrgqvo/VXUp//HHAK5r1LEr8HJebgHwN2r8GMDVInJtowqoqm+p\n6k/z738F4CiA9zfq+HUS6DksMQjgF6rqV8dMz1T1RwDeLll8C4C9+fd7AXzW4auB/j0DLZzSsfkk\ngDlVfd1lvQI4KCIvishwA8sFAH+cv1V+0uUW8P0Afmn7/CaCCxpfhKntOWn0OfRyXkJz7kSkD8C/\nAPCPDqt/O/9v4Aci8pGGFqzy7y005xDA7XCvtAV5Di09qvpW/v0pAD0O2wR+Ppt6AhQROQhgo8Oq\nEVV9Jv9+B8rX7j+hqidEZAOAH4rIa/kruK/lA/AEgD+H+aP7c5i00xfrcdxqeDmHIjICYAlA1mU3\nvp3DZiciVwD4HwDuU9V3S1b/FECvqp7PP8P5WwCbGli8pvi9iUgHgD8A8CcOq4M+h6uoqopIKJs/\nNnXAV9Vt5daLSBuAWwHcVGYfJ/Kv8yKyH+a2qy7/6CuVz1bObwD4nsOqEwA+YPt8XX5Z3Xg4h38E\n4PcADGo+EemwD9/OoQsv58X3c1eJiLTDBPusqn63dL39AqCqB0TkcRHpUtWGjL/i4fcW+DnM+wyA\nn6rqXOmKoM+hzZyIXKuqb+XTXvMO2wR+Pls9pbMNwGuq+qbTShG5XESutN7DPKR8xWnbeivJhX7O\n5bg/AbBJRD6Yr+XcDuDZRpQPMC0KADwA4A9UdcFlmyDOoZfz8iyAz+dbmvwWgHO2W27f5Z8dfRPA\nUVX9qss2G/PbQURuhvl7PNug8nn5vQV6Dm1c79KDPIclngUwlH8/BOAZh20C/XsG0NqtdAB8C8Bd\nJcveB+BA/n0/zJPynwF4FSaN0aiy7QNwBMDL+V/6taXly3/eDtPC4xeNLF/+2G/A5Bxfyv98PSzn\n0Om8ALjL+n3DtCz5r/n1RwAMNPjcfQImXfey7fxtLynjvflz9jOYh+K/3cDyOf7ewnQO82W4HCaA\nd9qWBXoOYS4+bwG4BJOH/xKAFIAJAK8DOAhgfX7b0Pw9qyp72hIRRUWrp3SIiCiPAZ+IKCIY8ImI\nIoIBn4goIhjwiYgiggGfiCgiGPCJiCKCAZ+IKCL+P6cuTLOy9efGAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "draw_clusters(assigned, new_centroids)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "And now repeat the following iteration as often as you like... You will see how the centroids move and the algorithm slowly finds the correct clustering. Eventually, when there is no reassignment of points any more, the algorithm stops." ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD8CAYAAAB0IB+mAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X9wHGd5B/DvcyfpmrM9AkmOiAm6i4tjXCaFEg2DW9K6\ndVpALYRkpplkzsEUipqYdsgfHSagGRLKiLYpbXFnmmQU6qBY11BmmkAAA8VuPXQYQ1GYECcYJ8GR\nRH45tjMoieWRZd3TP/ZW2jvt7u3e7a+7/X5mNNLtrnbfrOJn333f531fUVUQEVHny8RdACIiigYD\nPhFRSjDgExGlBAM+EVFKMOATEaUEAz4RUUow4BMRpQQDPhFRSjDgExGlRFfcBbAaGBjQYrEYdzGI\niNrKI488clpVNzY6LlEBv1gsYnp6Ou5iEBG1FRGZ9XIcm3SIiFKCAZ+IKCUY8ImIUoIBn4goJRjw\niYhSggGfiCglGPCJKJHKR8sofrGIzGczKH6xiPLRctxFanuJysMnIgKMYD/6jVEsLC0AAGbnZzH6\njVEAQOmKUpxFa2us4RNRrOxq8mOHxlaCvWlhaQFjh8YCu0YaMeATUWzMmvzs/CwUulKTn523Hzg6\nNz9X87tegrjTNeyO7/QHAwM+ETkKOwA61eSdDPUOoXy0jIE7B7DrwV0Ng3j5aBm7H9rt6W3Bz4Oh\nXTHgE5GtKAKgtcbeSL47j5EtIxj9xijOnDuzZn99EDfLv6zLtuebnZ/13IzUKTV/UdW4y7BieHhY\nOXkaUTIUv1i0bVop9BYwc+tMU+c0A+vc/ByGeofw2vnXbIN3vaxkMXntJMYOjTk29wCAQLD/uv0N\nj6uX7867vlnU78935zHx/onEdCCLyCOqOtzoONbwiciWU+3bT63c5NQM88riK+jKNE4WXNZl/GDu\nBw2DeN9Ffa59AE4WlhaQlaztvqxkA+1AjhMDPhHZGuod8rXdidm0YleTX6os4ULlgqfz3D19t+v+\n7kw3APc+ADfLugyB1GzLd+ddm4TarXmHAZ+IbI3vHEe+O1+zLd+dx/jOcV/nsWsbD4OINGweMh8K\nThS1TdzbL92OQm/B8fh269hlwCciW6UrSph4/wQKvQUIBIXeQlPt1s00ATXj/PL5NTV0K4Eg15Xz\ndc5DzxzCm/vevObBZ9VOzTsM+ETkqHRFCeM7xzHUO4S5+bmVjBU39RktfRf1RVTatTX0+n2vnX/N\n9zkPzxxeefA5mZ2fbYtaPgM+ETnym5ppd7zXjtmkWtZllK4oYebWGdegf9ODN2HPt/ZEWDL/GPCJ\nUsBvHrl5/K4Hd3ketOR0/FJlCcsV+47PdjOyZcSx2UihuGf6npV7m8Tc/fZ97BKRJ34nIisfLePP\nvvZnWKosOZ6zfooD6/ntuDW1tIPy0TI+8e1PNOwUVujKwzCJk79x4BVRh/M7gGrgzoGGga3/on6s\n71mPufk5ZCTjmLrYCXoyPYAYncJeCARDvUOBD1pzvabHgVes4RN1OL8DqLyMfD1z7szKcZ0c7AHg\nfMVboDeZHdx2ospYchJIG76I7BORl0Tkccu2PhH5nog8Vf3++iCuRUT+BDWAihozxykk9Z4H1Wn7\nZQDvrdt2G4BDqroFwKHqZyKKmN8BVOt71kdRrI6TlezKOAW7ey4QjGwZial0hkACvqp+H8DLdZuv\nATBZ/XkSwAeDuBYR+eN3AFUu629wEhkqWlm5p6UrStj9tt01GT0KxZd+8iUM3DkQW+ZOmG34g6r6\nQvXnFwEM2h0kIqMARgFgaIivmERhKF1R8pwd8vK5+robeVHfXHPgqQNrspOWKksrfR9xZO5Ekoev\nRiqQbTqQqk6o6rCqDm/cuDGK4hCRi7jbmdvV6YXTNTn4XmbsjHpahjAD/kkRuQQAqt9fCvFaRNSk\n+gFCI1tG1rQ/d2e60X9Rf0wlXOvGx4Bn/hlYvsP4fuNjcZcIOLt0Frse3AX5rODDX/uw59+LMnMn\nzID/MIDd1Z93A/h6iNcioibYTYUw+dNJ7H7b7po2//s+eB9Of/K069QCUbnxMeDebwDFeSOAFeeN\nz0kI+iavUz4D0b5RBdKGLyIPANgBYEBEngVwO4C/A/BVEfkogFkA1wdxLSIKjtOyfgeeOrBmgFD5\naLmpyceC9vlDwLq6QcDrloztD/xmPGXyqifbUzOAq5npplsRSMBX1Rsddu0M4vxEFA6vA4T2fGsP\n7pm+JxFTJAzN+9ueFFnJYt81+2qWeBzfOR7pVAscaUuUYk5TAFibGcpHy4kJ9gAw12s049htTzJz\n1s0459LhbJlEKeZlUNbYobHEBHsA+PRO4GzdwlVnu43tSZaE/g8GfKIU8zIoy++C4GF74DeBj70f\nmOkFKjC+f+z9yWm/z0hmzYLoUbfVO+FsmUTkqutvujp+grRW5bvzNQ/K8tFypG31nC2TiALBYO/M\nnAq5PqDH3VbvhAGfiFwVeguJa9ZJgrDmtg8T2/CJOkRYS+o5zfCY9lk1Z+dnE7N0oVcM+EQdwO9i\n434ceOqA7fYkDMKKW6v3Oep1bxnwiTqA04jZICbminuVpqRr9j6H+ZB2woBP1AHCXFKPs2c21sx9\nDvMh7YQBn6gDhLmknt3grFb0ZHsCO1fYzBlCzbz6+vx6UzP3OY51bxnwiTqA32UM/agfnOUU9Oqt\n71m/JmAWegvY0LOh5TJFZX3PeujtigufuQC9XTF57WRg9zmOdW8Z8Ik6gN9lDJs5/8ytM6jcXsHk\ntZM1S/fVEwj0dsWrn3oVpz95uiZgztw601YratXXthvdZz+dsGE+pJ1wpC0R+SafdQ74jfLTi18s\nuub1ZySDilZ8lykrWc+DxLoz3RCRmqmK7fjJtTc7Ya3t8vUjcO1+J4gRuV5H2rKGT0S+OU0EJpCG\nNdRG+++/9n7o7YqMeA9PAsHolaO4ZfiWNW8f+e48bhm+Zc2CLvuu2beyrf+i/jV9C35r2810wlrf\nnGZunQl9dC5r+ETkm11tViC4efhm3PXHdzX8/YE7B1YW87bqv6gfpz95GoAxB//d03d7LpNAoNCV\nfoOXz73sq9bcam0789mM7ayiAkHldv9vLH5wLh0iCo0ZCJsNkHvft9e2+WPv+/aufDYfHBOPTNQ0\n1ZiBvZ657cy5M8h357H/uv2+Anar8994WVsgbqzhE1EsmqlRN2r/t4p6rptm2vCDwho+EcWqUUBv\npkbtJ0c96hHCrb71RIEBn4gCV1/bNacNABBKs4nTsVFL6rTIJmbpEFHgwpo2wOuoX7sMm6gnKksi\nBnwiClxY0wbYDXyaum4KU9dNuQ46i2OisiRipy0RBc6pczWuRUOSVp6gceAVEcUmjmkD3MQxUVkS\nMeATUeDCntvHrzgmKksiZukQUSiSlLEyvnPcNkc+rjeOuLCGT0QdL2lvHHFhpy0RUZtjpy0REdUI\nvQ1fRGYAvApgGcAFL08hIiIKXlSdtr+vqqcjuhYREdlgkw4RUUpEEfAVwEEReURERut3isioiEyL\nyPSpU6ciKA4RUTpFEfDfrapvB/A+AB8Xkd+17lTVCVUdVtXhjRs3RlAcIqJ0Cj3gq+pz1e8vAXgI\nwDvDviYREa0VasAXkXUissH8GcAfAXg8zGsSEZG9sLN0BgE8JCLmtf5dVb8T8jWJiMhGqAFfVU8A\neFuY1yAiIm+YlklElBIM+EREKcGAT0SJdfJkGUeOFHH4cAZHjhRx8qT9koRej0s7zodPRIl08mQZ\nx4+PolIx5rBfXJzF8ePG2M3BwZLv45yuceLEGBYX55DLDWHz5vGGv9POWMMnokQ6cWJsJYibKpUF\nnDgx1tRx9cwHxeLiLABdeVB08tsBa/hElEiLi/brzS4uzuLw4QxyuSH0949UA7b33ze5PSg6tZbP\nGj4RJVI22+ey16iRP//83Y5H5HLu69U6P1A6d2FzBnwiSiRjvGZzMpk8Nm92X6/W6YHQ6EHRzhjw\niSg0rWTZXLjwctPX3bp1omGzzObN48hk8jXbvDwo2hnb8IkoFK1m2WSzfVhePuP7urlcwVMbvHlM\nmrJ0uIg5EQXGmuZoNCAsrzkmlytg+/aZleOPHdtte1xXV3+1lu8nRknd8Vls2jSK3t7f8RTY2zVN\n0+si5gz4RBSI+pq6O0E224dK5VWong+9bHbX37TpZlx++V0rW+zKn8nkPTUPxc1rwGcbPhEFwi7N\n0ZlieflMg2Av6OrqD6Jottd//vl7avoUvOTzN+qTSPqIX7bhE1Eggk9nVFy48GrA56w9/7FjNwEw\n2vPd8v6PHCmiv38EL7446dgn0cqI36iwhk9EnrnVYLu63PLmmxV2c4/i2LFdOHy4vu2/lpHzf4/r\nG4DTG8KxY7sTU9NnDZ+IPGlUg01Qd2BI7P8DzTcD5zec5cTU9FnDJyJPGrVxLy83nzffzsyBWm4D\ntrzM7RMFBnwi8qTRVATuUyF0JutALbuBXFZJmLKBTTpE5EkuN2Q7UVkuN4STJ8uoVMLsYE0GkS5k\nMr3VAWHZmpq72VzjNK4gCVM2sIZPRJ7Y12AFi4uzOHZsd0z59NFSvbAS7M2gbvZlPPnknmrwX4Yx\nAGxVUqZsYA2fiGxZR51ms30QQbUN3wx21syWtTXazlb731upLNTN3Kkw708uV0jMiF0GfCJaoz4j\np3ZOm/pgT/a0ZhqJJGCTDhGt0XjULIO9F4uLsys5+EkYhcsaPhGt4bSKFPl3/PgoXnjhPvzqV/8N\n80EZ1yhc1vCJqIZR82xh9ZGQXXwQeNcNwO/9gfH94oNxl8hdpbKAX/3qEOrfiuLIzWfAJ6IaRhBK\nZpPNxQeBrV8Afu0kIGp83/qFOIJ+Bps23dLyWaLOzWfAJ6Ia3oNQNtRy2Nn8JSC7WFeKRWN7VLLZ\nfmzbdn91auXWQmjUufkM+ERUw3sQij4VM/eSv+1hUD1n+VRp4UwSeW4+Az4R1Wg0RUCcFi/2tz0M\n5gyYhw9n0HwINRZgiTo3P/SALyLvFZHjIvK0iNwW9vWIqDWDgyVs3TqBXK4AY2Wqfoj0xF0sAMCJ\nPweWc7XblnPG9mgtw+jn8FrD764u5iLI5Qp43ev+AM8/P4HDhwWHD3fhySf3hFdUi1DTMkUkC+Bf\nAfwhgGcB/FhEHlbVn4V5XSJqzeBgac1C405zxETppauN75u/ZDTjLF5sBHtze5LkcoVqemsWwBKy\n2fXYsmUv5ud/UDcqd3nls3XJxTCEuqatiGwHcIeqvqf6+VMAoKp/a3c817QlSi6jCSOZ2TtJtG3b\nlO0auZXKOdjfxyx27LjQ1LWSsqbtGwH80vL52eq2FSIyKiLTIjJ96tSpkItDRM1KwmyP7SKb7Xdc\nP8D5oRn+21PsnbaqOqGqw6o6vHHjxriLQ0QOktyZmyzduPzyvU3k2Ief5hp2wH8OwJssny+tbiOi\nNmLOnOk+v05nMjpbvdu27T4MDpYc34iy2fW22zdtGvVdNr/CDvg/BrBFRC4To5v/BgAPh3xNIgqQ\nOXNmGufXyeUKuPji630db3Z2270RZTJ5XH75PdVRumaNPotNm24JvcMWCDlLR1UviMhfAvgujP+6\nfar6RJjXJKJgpbVmby5a8uSTn/B1vMkM/OaaArnc0Mq8+IODpUgCfL1Qs3T8YpYOUfJ4yc7JZPLI\n5Yo4d65TMq6NRV66uvpx4cKZhkcDRlZOXIucJCVLh4jaXKPsnFyugK1bJ1CpnI2oRGETmBkzXoO9\ntSknyRjwiciV+3wvgu3bZzA4WIp85sdw+F/JKynr1XrBgE9ErgYHS46ZKtbaf7vn6RtTSfgL9ubb\nTTvU7gEGfCLyYMuWvbYZJ9aabbvm6WcyeWzbNoXt22eqQd8bc73adgn2AAM+EXlQP6GaXc229hjA\nmnYIIFGTsJnq/zvsHloia5MZ26kZx4pZOkQUGXMAl5mmGGduv1Pue30ZzcBul16ZFF6zdBjwiSg2\n//u/A1he9pIJk0HtVMTdyGZzWF5+relrm00ynYBpmUSUeJdfvhdAd93WTM3c8du2TWHbtvtrmpO2\nbbsPV131KnbsUGzbNuU4XYGbxcVZHD6cwZEjxerC7Z0v1JG2RERu3EajOh1rR7XZpQYVi4uzOH58\ntOE1OgGbdIiorR05UnToC8gCqCCb7UOl8ipUz7uep52beNikQ0Sp4Dzgq4IdOyq46qrTeMtb9q00\nCfk/T+dgwCeituY04Mu6fXCwhO3bZ7BjR8Ux177dB455wYBPRG3NaRpipzx5v8d3EgZ8ImprXgaF\ntXJ8J2GnLRFRm2OnLRER1WDAJyJKCQZ8IqKUYMAnIkoJBnwiopRgwCciSgkGfCKilGDAJyJKCQZ8\nIqKUYMAnIkoJBnwiopRgwCciSgkGfCKilGDAJyJKidACvojcISLPicij1a+RsK5FRESNdYV8/n9W\n1S+EfA0iIvKATTpERCkRdsD/KxF5TET2icjr7Q4QkVERmRaR6VOnToVcHCKi9GppiUMROQjgDTa7\nxgD8EMBpAArgcwAuUdWPuJ2PSxwSEfnndYnDltrwVfVqj4W5F8A3W7kWERG1JswsnUssH68F8HhY\n1yIiosbCzNK5U0TeDqNJZwbAX4R4LSIiaiC0gK+qN4V1biIi8o9pmUREKcGAT0SUEgz4REQpwYBP\nRJQSDPhERCnBgE9ElBIM+EREKcGAT0SUEgz4REQpwYBPRJQSDPhERCnBgA+gXAaKRSCTMb6Xy8Ff\nY88eIJsFRIyv9ev9XyeKchJR50p9wC+XgdFRYHYWUDW+j476D6Z79hiB2AzoGzasnmPPHuDuu4FK\nZfX4s2eBD31o7XWcgrqXcvKBQESuVDUxX1deeaVGrVBQNUJo7Veh4P57U1PGMSKq69fbnwNQ7e83\njnHab73O1JRqd3ft/u7u1Ws5/f7UlHGd+n35vLGPiDobgGn1EGNbWuIwaHEscZjJGOGxnkhtjbxc\nBsbGgLk5oK8PeOUVYGkpmDKY1x8YAM6cWbu/vx94+WX7cgJAPg8sLNjvKxSAmZlAiklECeV1icPU\nN+kMDdlv7+szmkVEjLb3XbtWm1POnAku2IusNr3YBXtzu1M5s1nnYA8YDygiIoABH+PjRg3ZqqfH\nqMHPzhqfrTX9oKkaDxMR9+PGx423EatMBlhedv89pwcFEaVPagK+U4dmqQRMTBhNHyLG9w0bgqvB\nByGTMR4K9Q+eRg+ifN54UBARASkJ+I0yXEolo517/37js1PTSlyafcNYWDD6HayZPsziIUqvVHTa\nFourzTNW/f1GPvzcnFEbPns28EsnQj4P7N4NTE7Wtvfn88bbTalU2yk9NGS8GZRK8ZWZiLzz2mkb\n2iLmcaoPXnbBHjBq8mZtvlODPWAE+YmJte395hsAYLzxmA8D8w0IYNAn6iQdV8M3m2+sNVkR55TG\ntBNxfigypZOoPaQ2LXNsbG2aomrjLJhOl83abx8ack7dnJ1lmz9RJ+m4gO8UvFSNGmsa5fPGW099\n+ml3N/Daa85vPyKtTzlBRMnRcQHfKe/cbJ5IY03f/G++6KLVbevWGdudMpLsmsGsbf5E1H46LuDb\nDaSy5qOncSDS2bPG5G3W4L6wAJw/b398oeBc67d7g2K6J1F76IiAbw04Y2NGCqJ1IJWZegjYPxDS\nyK0ZZ2bGufmr/oEZ1GyjRBS+tg/4dgFnctII7JWK8X1sbLX2CdSOrKVaZkBv9KZksuskZ9MPUTK1\nfcB3Czh2D4Ndu1YnQoujpn8jyngGRSwjg2dQxI1ITlXY7KS1ezDWvymZnDrJOWkbUfK0FPBF5E9F\n5AkRqYjIcN2+T4nI0yJyXETe01oxnbkFHLuHgVXUg61uRBn3YhRFzCIDRRGzuBejiQn6ZjOPdeDV\nzIzxpjQzYz8Iy6lPJI19JURJ12oN/3EA1wH4vnWjiPwGgBsAvBXAewHcJSIOmeCtcQs4TiNs4/J5\njGEdap9A67CAzyN57R9em2W8Nv0QUfxaCviqekxVj9vsugbAV1R1UVWfAfA0gHe2ci0nbgHHabBR\nXIZg/zritD1us7ONO1/tZhu1a/ohoviF1Yb/RgC/tHx+trptDREZFZFpEZk+deqU7wtZAw6wuiDI\n2FjjueKjNgf71xGn7WHYudPfADQvGTfmbKNuTT9EFL+GAV9EDorI4zZf1wRRAFWdUNVhVR3euHFj\nU+colVZr+maQn51NXhbOpzGOs6h9HTmLPD6NaNo/slng4EH3tMt6zLgh6hwNZ8tU1aubOO9zAN5k\n+XxpdVtonObQSZIHYFR9P48xDGEOcxjCpzG+sj1sO3as/jw+bmQrecGMG6LOEFaTzsMAbhCRnIhc\nBmALgP8L6VoA2icoPYASLsMMsqjgMsxEFuwB4OmnV38ulYz1ALxgxg1RZ2g1LfNaEXkWwHYA3xKR\n7wKAqj4B4KsAfgbgOwA+rqqhtqi7LfJNhvqspb17G49FYMYNUedoNUvnIVW9VFVzqjqoqu+x7BtX\n1V9X1a2q+u3Wi+rOKVvHbpbItBKp7YC1y7C55RZm3BB1qo5aAMVpmT5ze9Ly8uPARU2IOo/XBVA6\nKuA3Ui4DN92UvM7cKIk0vyg6ESVTale8clMqATffnLx0zSixA5YovVIV8AHgrruA/fvT25k7MhJ3\nCYgoLqkL+IBR05+cTGdn7oEDcZeAiOLScOBVpzIzT7wOPuoU7TJegYiCl8oavqlUcp9iQGR17hmR\nzmgGYhs+UXqlOuAD7kseqhqjU82JwSYnge7uSIvXkJ8OaBF/bfhcq5aos6Q+4JuDj5xYm0BKJeC+\n+7xPSRAFVec3j3Xrah8IqsZDy0vg5lq1RJ0nVXn4bopF+4FZbgOVMpn4c/oLBeOh5KccXgZfNXM/\niCgezMP3ycvKTfVNHH19rV2zUHB/W+jvN6Y66Omx32+Wz2+7vJeOW65VS9R5GPCrGq3cZNfE8cor\na4NxPm8E6UyDOyti1JSvv95+/86dwOnTRhrl+fNr92ezq+Vzelg5PUy8PCC4Vi1RB1LVxHxdeeWV\nmlSFgqoR6mu/+vuNfSLG96kp4/ipKdV83v53AONYt/OaxzjtE6kt39TU2nLYlSGfXy2jm1Z+l4ii\nBWBaPcTY2IO89SvJAV/EW+C1mpoyHgj1v2MNnE7ntZ7f7YHRiN2DwKtWfpeIouM14LPT1qNWOjGd\nZvF0O6+VSG2nbD7PaYuJaBU7bQPmpVPXidsi327jAEyqnKOeiFqX2qkV/DIDrFNNPYjzOtX0mQpJ\nREFgDd8Ht5p6EOedmmr+LYKIqBEG/ARplBpKRNQKBnwiopRgG36CmIO7FhaMz+b8NQBr+UTUOtbw\nE2RsbDXYmxYWjO1ERK1iwE8Qzl9DRGFiwE8Qzl9DRGFiwE+QVgZ3ERE1woCfIEzLJKIwMUsnYUol\nBngiCgdr+EREKcGAT0SUEgz4REQpwYBPRJQSDPhERCmRqBWvROQUgAbrPzVtAMDpkM4dFJYxGCxj\nMFjGYERRxoKqbmx0UKICfphEZNrLEmBxYhmDwTIGg2UMRpLKyCYdIqKUYMAnIkqJNAX8ibgL4AHL\nGAyWMRgsYzASU8bUtOETEaVdmmr4RESp1rEBX0T+Q0QerX7NiMijDsfNiMjR6nHTEZfxDhF5zlLO\nEYfj3isix0XkaRG5LeIy/oOI/FxEHhORh0TkdQ7HRX4fG90XMfxLdf9jIvKOKMpluf6bROR/RORn\nIvKEiHzC5pgdIjJv+X/gM1GWsVoG179dAu7jVsv9eVREXhGRW+uOifw+isg+EXlJRB63bOsTke+J\nyFPV7693+N14/k2rasd/AfhHAJ9x2DcDYCCmct0B4K8bHJMF8AsAmwH0APgpgN+IsIx/BKCr+vPf\nA/j7JNxHL/cFwAiAbwMQAO8C8KOI/76XAHhH9ecNAJ60KeMOAN+M4/8/r3+7uO+jzd/9RRh557He\nRwC/C+AdAB63bLsTwG3Vn2+z+/cS57/pjq3hm0REAFwP4IG4y9KkdwJ4WlVPqOp5AF8BcE1UF1fV\n/1LVC9WPPwRwaVTXbsDLfbkGwP1q+CGA14nIJVEVUFVfUNWfVH9+FcAxAG+M6voBivU+1tkJ4Beq\nGtYATc9U9fsAXq7bfA2AyerPkwA+aPOrsf2b7viAD+AqACdV9SmH/QrgoIg8IiKjEZbL9FfV1+R9\nDq9/bwTwS8vnZxFf0PgIjJqenajvo5f7kph7JyJFAL8F4Ec2u3+7+v/At0XkrZEWzNDob5eY+wjg\nBjhX3uK+jwAwqKovVH9+EcCgzTGx3c+2XgBFRA4CeIPNrjFV/Xr15xvhXrt/t6o+JyIXA/ieiPy8\n+uQOvYwA7gbwORj/4D4Ho+npI0Fd2ysv91FExgBcAFB2OE2o97Gdich6AP8J4FZVfaVu908ADKnq\na9U+nK8B2BJxEdvibyciPQA+AOBTNruTcB9rqKqKSKLSINs64Kvq1W77RaQLwHUArnQ5x3PV7y+J\nyEMwXrcC+5+9URlNInIvgG/a7HoOwJssny+tbguMh/v4YQB/AmCnVhshbc4R6n204eW+hH7vGhGR\nbhjBvqyqD9bvtz4AVPWAiNwlIgOqGtn8MB7+drHfx6r3AfiJqp6s35GE+1h1UkQuUdUXqs1eL9kc\nE9v97PQmnasB/FxVn7XbKSLrRGSD+TOMDsrH7Y4NQ1076LUO1/4xgC0iclm1hnMDgIejKB9gZBMA\n+CSAD6jqgsMxcdxHL/flYQAfqmaZvAvAvOV1O3TV/qN/A3BMVf/J4Zg3VI+DiLwTxr/JMxGW0cvf\nLtb7aOH4th73fbR4GMDu6s+7AXzd5pj4/k1H2asd9ReALwO4uW7bJgAHqj9vhtFD/lMAT8Bowoiy\nfPsBHAXwWPUPfkl9GaufR2BkePwihjI+DaO98dHq1z1JuY929wXAzebfHEZWyb9W9x8FMBzxvXs3\njOa6xyz3b6SujH9ZvWc/hdEp/tsRl9H2b5ek+1gtwzoYAbzXsi3W+wjj4fMCgCUY7fAfBdAP4BCA\npwAcBNBXPTYR/6Y50paIKCU6vUmHiIiqGPCJiFKCAZ+IKCUY8ImIUoIBn4goJRjwiYhSggGfiCgl\nGPCJiFI/A26/AAAAB0lEQVTi/wE9WmGBgf6MLAAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "assigned, new_centroids = iteration(rdd, new_centroids)\n", "draw_clusters(assigned, new_centroids)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.13" } }, "nbformat": 4, "nbformat_minor": 2 }