{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Data Exploration\n", "\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "import settings" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NationYearPop
0North America1980320.27638
1Bermuda19800.05473
2Argentina198028.3698
3Former U.S.S.R.1980265.92182
4Georgia1980--
\n", "
" ], "text/plain": [ " Nation Year Pop\n", "0 North America 1980 320.27638\n", "1 Bermuda 1980 0.05473\n", "2 Argentina 1980 28.3698\n", "3 Former U.S.S.R. 1980 265.92182\n", "4 Georgia 1980 --" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv(os.path.join(settings.data_dir, 'pop_by_country_long_form.csv'))\n", "df['Year'] = df['Year'].str[4:].astype(int)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "array(['North America', 'Bermuda', 'Argentina', 'Former U.S.S.R.',\n", " 'Georgia', 'Kazakhstan', 'Kyrgyzstan', 'Latvia', 'Lithuania',\n", " 'Moldova', 'Russia', 'Tajikistan', 'Turkmenistan', 'Aruba',\n", " 'Ukraine', 'Uzbekistan', 'Middle East', 'Bahrain', 'Iran', 'Iraq',\n", " 'Israel', 'Jordan', 'Kuwait', 'Lebanon', 'Bahamas, The', 'Oman',\n", " 'Palestine', 'Qatar', 'Saudi Arabia', 'Syria',\n", " 'United Arab Emirates', 'Yemen', 'Africa', 'Algeria', 'Angola',\n", " 'Barbados', 'Benin', 'Botswana', 'Burkina Faso', 'Burundi',\n", " 'Cameroon', 'Cape Verde', 'Central African Republic', 'Chad',\n", " 'Comoros', 'Congo (Brazzaville)', 'Belize', 'Congo (Kinshasa)',\n", " 'Cote dIvoire (IvoryCoast)', 'Djibouti', 'Egypt',\n", " 'Equatorial Guinea', 'Eritrea', 'Ethiopia', 'Gabon', 'Gambia, The',\n", " 'Ghana', 'Bolivia', 'Guinea', 'Guinea-Bissau', 'Kenya', 'Lesotho',\n", " 'Liberia', 'Libya', 'Madagascar', 'Malawi', 'Mali', 'Mauritania',\n", " 'Brazil', 'Mauritius', 'Morocco', 'Mozambique', 'Namibia', 'Niger',\n", " 'Nigeria', 'Reunion', 'Rwanda', 'Saint Helena',\n", " 'Sao Tome and Principe', 'Cayman Islands', 'Senegal', 'Seychelles',\n", " 'Sierra Leone', 'Somalia', 'South Africa', 'Sudan', 'Swaziland',\n", " 'Tanzania', 'Togo', 'Tunisia', 'Chile', 'Uganda', 'Western Sahara',\n", " 'Zambia', 'Zimbabwe', 'Asia & Oceania', 'Afghanistan',\n", " 'American Samoa', 'Australia', 'Bangladesh', 'Bhutan', 'Colombia',\n", " 'Brunei', 'Burma (Myanmar)', 'Cambodia', 'China', 'Cook Islands',\n", " 'Fiji', 'French Polynesia', 'Guam', 'Hawaiian Trade Zone',\n", " 'Hong Kong', 'Canada', 'Costa Rica', 'India', 'Indonesia', 'Japan',\n", " 'Kiribati', 'Korea, North', 'Korea, South', 'Laos', 'Macau',\n", " 'Malaysia', 'Maldives', 'Cuba', 'Mongolia', 'Nauru', 'Nepal',\n", " 'New Caledonia', 'New Zealand', 'Niue', 'Pakistan',\n", " 'Papua New Guinea', 'Philippines', 'Samoa', 'Dominica', 'Singapore',\n", " 'Solomon Islands', 'Sri Lanka', 'Taiwan', 'Thailand',\n", " 'Timor-Leste (East Timor)', 'Tonga', 'U.S. Pacific Islands',\n", " 'Vanuatu', 'Vietnam', 'Dominican Republic', 'Wake Island', 'World',\n", " 'Ecuador', 'El Salvador', 'Falkland Islands (Islas Malvinas)',\n", " 'French Guiana', 'Grenada', 'Guadeloupe', 'Greenland', 'Guatemala',\n", " 'Guyana', 'Haiti', 'Honduras', 'Jamaica', 'Martinique',\n", " 'Montserrat', 'Netherlands Antilles', 'Nicaragua', 'Panama',\n", " 'Mexico', 'Paraguay', 'Peru', 'Puerto Rico',\n", " 'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Vincent/Grenadines',\n", " 'Suriname', 'Trinidad and Tobago', 'Turks and Caicos Islands',\n", " 'Uruguay', 'Saint Pierre and Miquelon', 'Venezuela',\n", " 'Virgin Islands, U.S.', 'Virgin Islands, British', 'Europe',\n", " 'Albania', 'Austria', 'Belgium', 'Bosnia and Herzegovina',\n", " 'Bulgaria', 'Croatia', 'United States', 'Cyprus', 'Czech Republic',\n", " 'Denmark', 'Faroe Islands', 'Finland', 'Former Czechoslovakia',\n", " 'Former Serbia and Montenegro', 'Former Yugoslavia', 'France',\n", " 'Germany', 'Central & South America', 'Germany, East',\n", " 'Germany, West', 'Gibraltar', 'Greece', 'Hungary', 'Iceland',\n", " 'Ireland', 'Italy', 'Luxembourg', 'Macedonia', 'Antarctica',\n", " 'Malta', 'Montenegro', 'Netherlands', 'Norway', 'Poland',\n", " 'Portugal', 'Romania', 'Serbia', 'Slovakia', 'Slovenia',\n", " 'Antigua and Barbuda', 'Spain', 'Sweden', 'Switzerland', 'Turkey',\n", " 'United Kingdom', 'Eurasia', 'Armenia', 'Azerbaijan', 'Belarus',\n", " 'Estonia'], dtype=object)" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.Nation.drop_duplicates().values" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Object `to_json` not found.\n" ] } ], "source": [ "nation = 'United States'\n", "df2 = df.groupby(['Year', 'Nation']).sum().unstack()\n", "df2.columns = df2.columns.droplevel()#[nation]\n", "df2[[nation]].rename(columns={'United States':'Series', 'Year':'Values'}).to_json?" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df2[[nation]].rename(columns={'United States':'Series', 'Year':'Values'}).to_json" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'labels:[1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010]\\nseries: [227.22468, 229.46571, 231.66446, 233.79199, 235.8249, 237.9238, 240.13289, 242.28892, 244.49898, 246.81923, 249.62281, 252.98094, 256.51422, 259.91859, 263.12582, 266.27839, 269.39428, 272.64693, 275.8541, 279.04017, 282.17196, 285.08156, 287.80391, 290.32642, 293.04574, 295.75315, 298.59321, 301.5799, 304.37485, 307.00655, 310.23286]'" ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f'labels:{df2.index.astype(int).tolist()}, series: [{df2[nation].astype(float).tolist()}]'" ] } ], "metadata": { "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 2 }