flask-chartist-example/notebooks/data exploration.ipynb
2017-05-19 16:02:30 -05:00

244 lines
8.7 KiB
Text

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data Exploration\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true,
"scrolled": true
},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"import settings"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Nation</th>\n",
" <th>Year</th>\n",
" <th>Pop</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>North America</td>\n",
" <td>1980</td>\n",
" <td>320.27638</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Bermuda</td>\n",
" <td>1980</td>\n",
" <td>0.05473</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Argentina</td>\n",
" <td>1980</td>\n",
" <td>28.3698</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Former U.S.S.R.</td>\n",
" <td>1980</td>\n",
" <td>265.92182</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Georgia</td>\n",
" <td>1980</td>\n",
" <td>--</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Nation Year Pop\n",
"0 North America 1980 320.27638\n",
"1 Bermuda 1980 0.05473\n",
"2 Argentina 1980 28.3698\n",
"3 Former U.S.S.R. 1980 265.92182\n",
"4 Georgia 1980 --"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv(os.path.join(settings.data_dir, 'pop_by_country_long_form.csv'))\n",
"df['Year'] = df['Year'].str[4:].astype(int)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"array(['North America', 'Bermuda', 'Argentina', 'Former U.S.S.R.',\n",
" 'Georgia', 'Kazakhstan', 'Kyrgyzstan', 'Latvia', 'Lithuania',\n",
" 'Moldova', 'Russia', 'Tajikistan', 'Turkmenistan', 'Aruba',\n",
" 'Ukraine', 'Uzbekistan', 'Middle East', 'Bahrain', 'Iran', 'Iraq',\n",
" 'Israel', 'Jordan', 'Kuwait', 'Lebanon', 'Bahamas, The', 'Oman',\n",
" 'Palestine', 'Qatar', 'Saudi Arabia', 'Syria',\n",
" 'United Arab Emirates', 'Yemen', 'Africa', 'Algeria', 'Angola',\n",
" 'Barbados', 'Benin', 'Botswana', 'Burkina Faso', 'Burundi',\n",
" 'Cameroon', 'Cape Verde', 'Central African Republic', 'Chad',\n",
" 'Comoros', 'Congo (Brazzaville)', 'Belize', 'Congo (Kinshasa)',\n",
" 'Cote dIvoire (IvoryCoast)', 'Djibouti', 'Egypt',\n",
" 'Equatorial Guinea', 'Eritrea', 'Ethiopia', 'Gabon', 'Gambia, The',\n",
" 'Ghana', 'Bolivia', 'Guinea', 'Guinea-Bissau', 'Kenya', 'Lesotho',\n",
" 'Liberia', 'Libya', 'Madagascar', 'Malawi', 'Mali', 'Mauritania',\n",
" 'Brazil', 'Mauritius', 'Morocco', 'Mozambique', 'Namibia', 'Niger',\n",
" 'Nigeria', 'Reunion', 'Rwanda', 'Saint Helena',\n",
" 'Sao Tome and Principe', 'Cayman Islands', 'Senegal', 'Seychelles',\n",
" 'Sierra Leone', 'Somalia', 'South Africa', 'Sudan', 'Swaziland',\n",
" 'Tanzania', 'Togo', 'Tunisia', 'Chile', 'Uganda', 'Western Sahara',\n",
" 'Zambia', 'Zimbabwe', 'Asia & Oceania', 'Afghanistan',\n",
" 'American Samoa', 'Australia', 'Bangladesh', 'Bhutan', 'Colombia',\n",
" 'Brunei', 'Burma (Myanmar)', 'Cambodia', 'China', 'Cook Islands',\n",
" 'Fiji', 'French Polynesia', 'Guam', 'Hawaiian Trade Zone',\n",
" 'Hong Kong', 'Canada', 'Costa Rica', 'India', 'Indonesia', 'Japan',\n",
" 'Kiribati', 'Korea, North', 'Korea, South', 'Laos', 'Macau',\n",
" 'Malaysia', 'Maldives', 'Cuba', 'Mongolia', 'Nauru', 'Nepal',\n",
" 'New Caledonia', 'New Zealand', 'Niue', 'Pakistan',\n",
" 'Papua New Guinea', 'Philippines', 'Samoa', 'Dominica', 'Singapore',\n",
" 'Solomon Islands', 'Sri Lanka', 'Taiwan', 'Thailand',\n",
" 'Timor-Leste (East Timor)', 'Tonga', 'U.S. Pacific Islands',\n",
" 'Vanuatu', 'Vietnam', 'Dominican Republic', 'Wake Island', 'World',\n",
" 'Ecuador', 'El Salvador', 'Falkland Islands (Islas Malvinas)',\n",
" 'French Guiana', 'Grenada', 'Guadeloupe', 'Greenland', 'Guatemala',\n",
" 'Guyana', 'Haiti', 'Honduras', 'Jamaica', 'Martinique',\n",
" 'Montserrat', 'Netherlands Antilles', 'Nicaragua', 'Panama',\n",
" 'Mexico', 'Paraguay', 'Peru', 'Puerto Rico',\n",
" 'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Vincent/Grenadines',\n",
" 'Suriname', 'Trinidad and Tobago', 'Turks and Caicos Islands',\n",
" 'Uruguay', 'Saint Pierre and Miquelon', 'Venezuela',\n",
" 'Virgin Islands, U.S.', 'Virgin Islands, British', 'Europe',\n",
" 'Albania', 'Austria', 'Belgium', 'Bosnia and Herzegovina',\n",
" 'Bulgaria', 'Croatia', 'United States', 'Cyprus', 'Czech Republic',\n",
" 'Denmark', 'Faroe Islands', 'Finland', 'Former Czechoslovakia',\n",
" 'Former Serbia and Montenegro', 'Former Yugoslavia', 'France',\n",
" 'Germany', 'Central & South America', 'Germany, East',\n",
" 'Germany, West', 'Gibraltar', 'Greece', 'Hungary', 'Iceland',\n",
" 'Ireland', 'Italy', 'Luxembourg', 'Macedonia', 'Antarctica',\n",
" 'Malta', 'Montenegro', 'Netherlands', 'Norway', 'Poland',\n",
" 'Portugal', 'Romania', 'Serbia', 'Slovakia', 'Slovenia',\n",
" 'Antigua and Barbuda', 'Spain', 'Sweden', 'Switzerland', 'Turkey',\n",
" 'United Kingdom', 'Eurasia', 'Armenia', 'Azerbaijan', 'Belarus',\n",
" 'Estonia'], dtype=object)"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.Nation.drop_duplicates().values"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Object `to_json` not found.\n"
]
}
],
"source": [
"nation = 'United States'\n",
"df2 = df.groupby(['Year', 'Nation']).sum().unstack()\n",
"df2.columns = df2.columns.droplevel()#[nation]\n",
"df2[[nation]].rename(columns={'United States':'Series', 'Year':'Values'}).to_json?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df2[[nation]].rename(columns={'United States':'Series', 'Year':'Values'}).to_json"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'labels:[1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010]\\nseries: [227.22468, 229.46571, 231.66446, 233.79199, 235.8249, 237.9238, 240.13289, 242.28892, 244.49898, 246.81923, 249.62281, 252.98094, 256.51422, 259.91859, 263.12582, 266.27839, 269.39428, 272.64693, 275.8541, 279.04017, 282.17196, 285.08156, 287.80391, 290.32642, 293.04574, 295.75315, 298.59321, 301.5799, 304.37485, 307.00655, 310.23286]'"
]
},
"execution_count": 97,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f'labels:{df2.index.astype(int).tolist()}, series: [{df2[nation].astype(float).tolist()}]'"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}