{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 5 minute append to state column \n",
    "\n",
    "After taking some time to look around I found usaddress for parsing states.  This notebook shows how easy it is to parse addresses with usaddress.  I feel that the list of tuples is a bit clunky, but it works and I only spotted one error in the results... Indianna??  really??"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import usaddress\n",
    "% matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv('https://query.data.world/s/78ou6jcu4jfseul53lu1w3nio')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>location</th>\n",
       "      <th>crowd-low-estimate</th>\n",
       "      <th>crowd-high-estimate</th>\n",
       "      <th>mean-high-low</th>\n",
       "      <th>source</th>\n",
       "      <th>Latitude</th>\n",
       "      <th>Longitude</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abilene, TX</td>\n",
       "      <td>200</td>\n",
       "      <td>200</td>\n",
       "      <td>200.0</td>\n",
       "      <td>http://www.reporternews.com/story/news/local/2...</td>\n",
       "      <td>32.576489</td>\n",
       "      <td>-99.665323</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Accident, MD</td>\n",
       "      <td>54</td>\n",
       "      <td>54</td>\n",
       "      <td>54.0</td>\n",
       "      <td>Twitter; on-site witness</td>\n",
       "      <td>39.628700</td>\n",
       "      <td>-79.319760</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Adak, AK</td>\n",
       "      <td>10</td>\n",
       "      <td>10</td>\n",
       "      <td>10.0</td>\n",
       "      <td>adn.com</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Adrian, MI</td>\n",
       "      <td>130</td>\n",
       "      <td>150</td>\n",
       "      <td>140.0</td>\n",
       "      <td>https://www.facebook.com/events/847360115406578/</td>\n",
       "      <td>41.889943</td>\n",
       "      <td>-84.065892</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Ajo, AZ</td>\n",
       "      <td>250</td>\n",
       "      <td>250</td>\n",
       "      <td>250.0</td>\n",
       "      <td>https://www.facebook.com/plugins/post.php?href...</td>\n",
       "      <td>32.384890</td>\n",
       "      <td>-112.890110</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       location crowd-low-estimate crowd-high-estimate  mean-high-low  \\\n",
       "0   Abilene, TX                200                 200          200.0   \n",
       "1  Accident, MD                 54                  54           54.0   \n",
       "2      Adak, AK                 10                  10           10.0   \n",
       "3    Adrian, MI                130                 150          140.0   \n",
       "4       Ajo, AZ                250                 250          250.0   \n",
       "\n",
       "                                              source   Latitude   Longitude  \n",
       "0  http://www.reporternews.com/story/news/local/2...  32.576489  -99.665323  \n",
       "1                          Twitter; on-site witness   39.628700  -79.319760  \n",
       "2                                            adn.com   0.000000    0.000000  \n",
       "3   https://www.facebook.com/events/847360115406578/  41.889943  -84.065892  \n",
       "4  https://www.facebook.com/plugins/post.php?href...  32.384890 -112.890110  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## original method\n",
    "using last 2 characters of location"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df['State'] = df['location'].str[-2:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>location</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>State</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>CA</th>\n",
       "      <td>69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WA</th>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NY</th>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OR</th>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AK</th>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       location\n",
       "State          \n",
       "CA           69\n",
       "WA           27\n",
       "NY           25\n",
       "OR           24\n",
       "AK           22"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('State').count().sort_values('location', ascending=False)[['location']].head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Looks Like there are a few errors in State that would need cleansed (at least 12)\n",
    "\n",
    "15 minutes is not enough to fix this by hand, I know there is a package that would do this faster that has been mentioned on talk python to me.  Ill have to look into this package another day"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "62"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df.groupby('State').count())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>location</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>State</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>DE</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OK</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LA</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DC</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RI</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ah</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>er</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ge</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>le</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>na</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nd</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>on</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>t)</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>te</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       location\n",
       "State          \n",
       "DE            2\n",
       "OK            2\n",
       "LA            2\n",
       "DC            1\n",
       "RI            1\n",
       "ah            1\n",
       "er            1\n",
       "es            1\n",
       "ge            1\n",
       "le            1\n",
       "na            1\n",
       "nd            1\n",
       "on            1\n",
       "t)            1\n",
       "te            1"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('State').count().sort_values('location', ascending=False)[['location']].tail(15)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Usaddress Method"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df['Address'] = df['location'].apply(usaddress.parse)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10         [(Albuquerque,, PlaceName), (NM, StateName)]\n",
       "11    [(Almanor, PlaceName), (West,, PlaceName), (CA...\n",
       "12              [(Alpine,, PlaceName), (TX, StateName)]\n",
       "13            [(Amarillo,, PlaceName), (TX, StateName)]\n",
       "14    [(Amelia, StreetName), (Island,, StreetNamePos...\n",
       "15           [(Anacortes,, PlaceName), (WA, StateName)]\n",
       "16           [(Anchorage,, PlaceName), (AK, StateName)]\n",
       "17    [(Ann, PlaceName), (Arbor,, PlaceName), (MI, S...\n",
       "18           [(Annapolis,, PlaceName), (MD, StateName)]\n",
       "19           [(Arlington,, PlaceName), (VA, StateName)]\n",
       "Name: Address, dtype: object"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Address'].iloc[10:20]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>(Albuquerque,, PlaceName)</td>\n",
       "      <td>(NM, StateName)</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>(Almanor, PlaceName)</td>\n",
       "      <td>(West,, PlaceName)</td>\n",
       "      <td>(CA, StateName)</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>(Alpine,, PlaceName)</td>\n",
       "      <td>(TX, StateName)</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>(Amarillo,, PlaceName)</td>\n",
       "      <td>(TX, StateName)</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>(Amelia, StreetName)</td>\n",
       "      <td>(Island,, StreetNamePostType)</td>\n",
       "      <td>(FL, OccupancyType)</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>(Anacortes,, PlaceName)</td>\n",
       "      <td>(WA, StateName)</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>(Anchorage,, PlaceName)</td>\n",
       "      <td>(AK, StateName)</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>(Ann, PlaceName)</td>\n",
       "      <td>(Arbor,, PlaceName)</td>\n",
       "      <td>(MI, StateName)</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>(Annapolis,, PlaceName)</td>\n",
       "      <td>(MD, StateName)</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>(Arlington,, PlaceName)</td>\n",
       "      <td>(VA, StateName)</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                            0                              1  \\\n",
       "10  (Albuquerque,, PlaceName)                (NM, StateName)   \n",
       "11       (Almanor, PlaceName)             (West,, PlaceName)   \n",
       "12       (Alpine,, PlaceName)                (TX, StateName)   \n",
       "13     (Amarillo,, PlaceName)                (TX, StateName)   \n",
       "14       (Amelia, StreetName)  (Island,, StreetNamePostType)   \n",
       "15    (Anacortes,, PlaceName)                (WA, StateName)   \n",
       "16    (Anchorage,, PlaceName)                (AK, StateName)   \n",
       "17           (Ann, PlaceName)            (Arbor,, PlaceName)   \n",
       "18    (Annapolis,, PlaceName)                (MD, StateName)   \n",
       "19    (Arlington,, PlaceName)                (VA, StateName)   \n",
       "\n",
       "                      2     3     4     5  \n",
       "10                 None  None  None  None  \n",
       "11      (CA, StateName)  None  None  None  \n",
       "12                 None  None  None  None  \n",
       "13                 None  None  None  None  \n",
       "14  (FL, OccupancyType)  None  None  None  \n",
       "15                 None  None  None  None  \n",
       "16                 None  None  None  None  \n",
       "17      (MI, StateName)  None  None  None  \n",
       "18                 None  None  None  None  \n",
       "19                 None  None  None  None  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame.from_records(df['Address'].values.tolist()).iloc[10:20]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def get_state(lst):\n",
    "    for tpl in lst:\n",
    "        if tpl[1] == 'StateName':\n",
    "            return tpl[0]\n",
    "    return None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df['State'] = df['Address'].apply(get_state)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "53"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df.groupby('State').count())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Errors? 53?\n",
    "Looks like there were 53 states it counted PR (Puerto Rico), and DC which is good. But it also included Indianna?? not sure why, but I am impressed with the reults."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>location</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>State</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>CA</th>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WA</th>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NY</th>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OR</th>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AK</th>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MI</th>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TX</th>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PA</th>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CO</th>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WI</th>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NC</th>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>FL</th>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>VA</th>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NM</th>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OH</th>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AZ</th>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ME</th>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NJ</th>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MN</th>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MA</th>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IL</th>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>UT</th>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IN</th>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MD</th>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TN</th>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NH</th>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IA</th>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GA</th>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HI</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CT</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SD</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SC</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PR</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MS</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MO</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AR</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NV</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>VT</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NE</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>KY</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MT</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ND</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WY</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AL</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LA</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>KS</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DE</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OK</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WV</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RI</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Indiana</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DC</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         location\n",
       "State            \n",
       "CA             70\n",
       "WA             27\n",
       "NY             25\n",
       "OR             24\n",
       "AK             22\n",
       "MI             20\n",
       "TX             20\n",
       "PA             19\n",
       "CO             19\n",
       "WI             16\n",
       "NC             15\n",
       "FL             14\n",
       "VA             13\n",
       "NM             11\n",
       "OH             11\n",
       "AZ             11\n",
       "ME             11\n",
       "NJ             10\n",
       "MN             10\n",
       "MA             10\n",
       "IL              9\n",
       "UT              8\n",
       "IN              8\n",
       "ID              8\n",
       "MD              7\n",
       "TN              7\n",
       "NH              7\n",
       "IA              6\n",
       "GA              6\n",
       "HI              5\n",
       "CT              5\n",
       "SD              4\n",
       "SC              4\n",
       "PR              4\n",
       "MS              4\n",
       "MO              4\n",
       "AR              4\n",
       "NV              3\n",
       "VT              3\n",
       "NE              3\n",
       "KY              3\n",
       "MT              3\n",
       "ND              3\n",
       "WY              3\n",
       "AL              2\n",
       "LA              2\n",
       "KS              2\n",
       "DE              2\n",
       "OK              2\n",
       "WV              2\n",
       "RI              1\n",
       "Indiana         1\n",
       "DC              1"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('State').count().sort_values('location', ascending=False)[['location']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('Amarillo,', 'PlaceName'), ('TX', 'StateName')]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Address'].values.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}