{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Explore Movie Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2017-06-26 18:57:49 \n",
      "\n",
      "CPython 3.6.1\n",
      "IPython 6.1.0\n",
      "\n",
      "pea 0.0.7\n",
      "pandas 0.20.2\n",
      "\n",
      "compiler   : MSC v.1900 64 bit (AMD64)\n",
      "system     : Windows\n",
      "release    : 7\n",
      "machine    : AMD64\n",
      "processor  : Intel64 Family 6 Model 42 Stepping 7, GenuineIntel\n",
      "CPU cores  : 8\n",
      "interpreter: 64bit\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import settings\n",
    "import etl\n",
    "\n",
    "%matplotlib inline\n",
    "\n",
    "%load_ext watermark\n",
    "%watermark -d -t -v -m -p pea,pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data = etl.Data()\n",
    "data.load()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Available Columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',\n",
       "       'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',\n",
       "       'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',\n",
       "       'movie_title', 'num_voted_users', 'cast_total_facebook_likes',\n",
       "       'actor_3_name', 'facenumber_in_poster', 'plot_keywords',\n",
       "       'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',\n",
       "       'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',\n",
       "       'imdb_score', 'aspect_ratio', 'movie_facebook_likes'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.movie.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## plotting with IPlotter\n",
    "\n",
    "This example is using my own branch of IPlotter which builds the dictionary from a pandas DataFrame.  Much less verbose, but can be done with the current version on PyPI."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from iplotter import C3Plotter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "c3 = C3Plotter()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Timeseries of mean gross"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<iframe srcdoc=\"\n",
       "        <!-- Load c3.css -->\n",
       "        <link href='https://cdnjs.cloudflare.com/ajax/libs/c3/0.4.10/c3.min.css' rel='stylesheet' type='text/css'/>\n",
       "\n",
       "        <!-- Load d3.js and c3.js -->\n",
       "        <script src='http://d3js.org/d3.v3.min.js' charset='utf-8'></script>\n",
       "        <script src='http://cdnjs.cloudflare.com/ajax/libs/c3/0.4.10/c3.min.js'></script>\n",
       "\n",
       "    \n",
       "        <h1></h1>\n",
       "        <div id=chart style='width: 100%; height: 100%'></div>\n",
       "        <script>\n",
       "            var chart = document.getElementById('chart');\n",
       "            var data = {\n",
       "    'size': {\n",
       "        'height': 300\n",
       "    },\n",
       "    'data': {\n",
       "        'x': 'x',\n",
       "        'type': 'line',\n",
       "        'axes': {\n",
       "            'gross': 'y',\n",
       "            'x': 'y'\n",
       "        },\n",
       "        'columns': [\n",
       "            [\n",
       "                'gross',\n",
       "                0.0,\n",
       "                3000000.0,\n",
       "                0.0,\n",
       "                26435.0,\n",
       "                1408975.0,\n",
       "                0.0,\n",
       "                0.0,\n",
       "                2300000.0,\n",
       "                0.0,\n",
       "                3000000.0,\n",
       "                163245.0,\n",
       "                184925485.0,\n",
       "                0.0,\n",
       "                110428945.0,\n",
       "                80350000.0,\n",
       "                0.0,\n",
       "                102797150.0,\n",
       "                0.0,\n",
       "                0.0,\n",
       "                0.0,\n",
       "                22025000.0,\n",
       "                7927.0,\n",
       "                2956000.0,\n",
       "                0.0,\n",
       "                8000000.0,\n",
       "                0.0,\n",
       "                36000000.0,\n",
       "                20500000.0,\n",
       "                4934530.5,\n",
       "                0.0,\n",
       "                0.0,\n",
       "                27200000.0,\n",
       "                0.0,\n",
       "                25000000.0,\n",
       "                32000000.0,\n",
       "                43650000.0,\n",
       "                11033517.5,\n",
       "                42950000.0,\n",
       "                38237907.166666664,\n",
       "                69310231.8,\n",
       "                6100000.0,\n",
       "                43100000.0,\n",
       "                36757685.5,\n",
       "                41711931.0,\n",
       "                10450000.0,\n",
       "                27247057.75,\n",
       "                67501217.5,\n",
       "                102919529.0,\n",
       "                55052942.85714286,\n",
       "                124409732.33333333,\n",
       "                71117623.5,\n",
       "                106290809.28571428,\n",
       "                71542234.6,\n",
       "                63579571.428571425,\n",
       "                57697266.733333334,\n",
       "                41460781.52941176,\n",
       "                75037552.1875,\n",
       "                70192386.35714285,\n",
       "                62939598.73913044,\n",
       "                59223134.13333333,\n",
       "                44436464.04,\n",
       "                40233264.77419355,\n",
       "                41190351.833333336,\n",
       "                49678453.15151515,\n",
       "                78203971.25,\n",
       "                53844501.666666664,\n",
       "                63665195.14705882,\n",
       "                45302091.41304348,\n",
       "                59395666.16981132,\n",
       "                44909519.98550725,\n",
       "                42044174.25263158,\n",
       "                44793772.43103448,\n",
       "                38377007.96124031,\n",
       "                38072176.27710843,\n",
       "                42172627.58083832,\n",
       "                43255716.92553192,\n",
       "                43511151.485,\n",
       "                48727746.72327044,\n",
       "                40726529.11,\n",
       "                41159143.29064039,\n",
       "                39237855.9537037,\n",
       "                46267501.02234637,\n",
       "                44573509.378109455,\n",
       "                46207440.2,\n",
       "                49908326.01005025,\n",
       "                45785836.64397906,\n",
       "                62873527.67955801,\n",
       "                56158357.77540107,\n",
       "                62412136.94610778,\n",
       "                66530966.47552448,\n",
       "                76924035.8918919\n",
       "            ],\n",
       "            [\n",
       "                'x',\n",
       "                '1916.0',\n",
       "                '1920.0',\n",
       "                '1925.0',\n",
       "                '1927.0',\n",
       "                '1929.0',\n",
       "                '1930.0',\n",
       "                '1932.0',\n",
       "                '1933.0',\n",
       "                '1934.0',\n",
       "                '1935.0',\n",
       "                '1936.0',\n",
       "                '1937.0',\n",
       "                '1938.0',\n",
       "                '1939.0',\n",
       "                '1940.0',\n",
       "                '1941.0',\n",
       "                '1942.0',\n",
       "                '1943.0',\n",
       "                '1944.0',\n",
       "                '1945.0',\n",
       "                '1946.0',\n",
       "                '1947.0',\n",
       "                '1948.0',\n",
       "                '1949.0',\n",
       "                '1950.0',\n",
       "                '1951.0',\n",
       "                '1952.0',\n",
       "                '1953.0',\n",
       "                '1954.0',\n",
       "                '1955.0',\n",
       "                '1956.0',\n",
       "                '1957.0',\n",
       "                '1958.0',\n",
       "                '1959.0',\n",
       "                '1960.0',\n",
       "                '1961.0',\n",
       "                '1962.0',\n",
       "                '1963.0',\n",
       "                '1964.0',\n",
       "                '1965.0',\n",
       "                '1966.0',\n",
       "                '1967.0',\n",
       "                '1968.0',\n",
       "                '1969.0',\n",
       "                '1970.0',\n",
       "                '1971.0',\n",
       "                '1972.0',\n",
       "                '1973.0',\n",
       "                '1974.0',\n",
       "                '1975.0',\n",
       "                '1976.0',\n",
       "                '1977.0',\n",
       "                '1978.0',\n",
       "                '1979.0',\n",
       "                '1980.0',\n",
       "                '1981.0',\n",
       "                '1982.0',\n",
       "                '1983.0',\n",
       "                '1984.0',\n",
       "                '1985.0',\n",
       "                '1986.0',\n",
       "                '1987.0',\n",
       "                '1988.0',\n",
       "                '1989.0',\n",
       "                '1990.0',\n",
       "                '1991.0',\n",
       "                '1992.0',\n",
       "                '1993.0',\n",
       "                '1994.0',\n",
       "                '1995.0',\n",
       "                '1996.0',\n",
       "                '1997.0',\n",
       "                '1998.0',\n",
       "                '1999.0',\n",
       "                '2000.0',\n",
       "                '2001.0',\n",
       "                '2002.0',\n",
       "                '2003.0',\n",
       "                '2004.0',\n",
       "                '2005.0',\n",
       "                '2006.0',\n",
       "                '2007.0',\n",
       "                '2008.0',\n",
       "                '2009.0',\n",
       "                '2010.0',\n",
       "                '2011.0',\n",
       "                '2012.0',\n",
       "                '2013.0',\n",
       "                '2014.0',\n",
       "                '2015.0',\n",
       "                '2016.0'\n",
       "            ]\n",
       "        ]\n",
       "    },\n",
       "    'subchart': {\n",
       "        'show': false\n",
       "    },\n",
       "    'point': {\n",
       "        'show': false\n",
       "    },\n",
       "    'grid': {\n",
       "        'x': {\n",
       "            'show': false\n",
       "        },\n",
       "        'y': {\n",
       "            'show': false\n",
       "        }\n",
       "    },\n",
       "    'axis': {\n",
       "        'x': {},\n",
       "        'y': {},\n",
       "        'y2': {}\n",
       "    },\n",
       "    'zoom': {\n",
       "        'enabled': true\n",
       "    }\n",
       "};\n",
       "            data['bindto']='#chart'\n",
       "            c3.generate(data);\n",
       "        </script>\n",
       "    \" src=\"\" width=\"800\" height=\"420\" frameborder=\"0\" sandbox=\"allow-scripts\"></iframe>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "plot_data = data.movie.groupby(['title_year']).mean()[['gross']].fillna(0)\n",
    "c3.plot(plot_data, zoom=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "country_group = data.movie.groupby('country').count()['duration']\n",
    "counts = country_group.values.tolist()\n",
    "countries = country_group.index.values.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "from iplotter import PlotlyPlotter\n",
    "from IPython.display import HTML\n",
    "\n",
    "plotly = PlotlyPlotter()\n",
    "\n",
    "c3_plotter = C3Plotter()\n",
    "\n",
    "plotly_chart = [{\n",
    "    \"type\": 'choropleth',\n",
    "    \"locationmode\": 'country names',\n",
    "    \"locations\": countries,\n",
    "    \"z\": counts,\n",
    "    \"zmin\": 0,\n",
    "    \"zmax\": max(counts),\n",
    "    \"colorscale\": [\n",
    "        [0, 'rgb(242,240,247)'], [0.2, 'rgb(218,218,235)'],\n",
    "        [0.4, 'rgb(188,189,220)'], [0.6, 'rgb(158,154,200)'],\n",
    "        [0.8, 'rgb(117,107,177)'], [1, 'rgb(84,39,143)']\n",
    "    ],\n",
    "    \"colorbar\": {\n",
    "        \"title\": 'Count',\n",
    "        \"thickness\": 10\n",
    "    },\n",
    "    \"marker\": {\n",
    "        \"line\": {\n",
    "            \"color\": 'rgb(255,255,255)',\n",
    "            \"width\": 2\n",
    "        }\n",
    "    }\n",
    "}]\n",
    "\n",
    "plotly_layout = {\n",
    "    \"title\": 'Movie Counts by Country',\n",
    "    \"geo\": {\n",
    "        \"scope\": 'country names',\n",
    "    }\n",
    "}\n",
    "\n",
    "\n",
    "\n",
    "country_plot = plotly.plot(data=plotly_chart)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "variables": {
     " country_plot ": "<iframe srcdoc=\"\n        <!-- Load d3.js and plotly.js -->\n        <script src='https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.6/d3.min.js'></script>\n        <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script>\n        <script src='https://cdn.plot.ly/plotly-latest.min.js'></script>\n    \n        <div id=chart style='width: 100%; height: 100%' ></div>\n        <script>\n            var chart = document.getElementById('chart');\n            Plotly.plot(chart, [\n    {\n        'type': 'choropleth',\n        'locationmode': 'country names',\n        'locations': [\n            'Afghanistan',\n            'Argentina',\n            'Aruba',\n            'Australia',\n            'Bahamas',\n            'Belgium',\n            'Brazil',\n            'Bulgaria',\n            'Cambodia',\n            'Cameroon',\n            'Canada',\n            'Chile',\n            'China',\n            'Colombia',\n            'Czech Republic',\n            'Denmark',\n            'Dominican Republic',\n            'Egypt',\n            'Finland',\n            'France',\n            'Georgia',\n            'Germany',\n            'Greece',\n            'Hong Kong',\n            'Hungary',\n            'Iceland',\n            'India',\n            'Indonesia',\n            'Iran',\n            'Ireland',\n            'Israel',\n            'Italy',\n            'Japan',\n            'Kenya',\n            'Kyrgyzstan',\n            'Libya',\n            'Mexico',\n            'Netherlands',\n            'New Line',\n            'New Zealand',\n            'Nigeria',\n            'Norway',\n            'Official site',\n            'Pakistan',\n            'Panama',\n            'Peru',\n            'Philippines',\n            'Poland',\n            'Romania',\n            'Russia',\n            'Slovakia',\n            'Slovenia',\n            'South Africa',\n            'South Korea',\n            'Soviet Union',\n            'Spain',\n            'Sweden',\n            'Switzerland',\n            'Taiwan',\n            'Thailand',\n            'Turkey',\n            'UK',\n            'USA',\n            'United Arab Emirates',\n            'West Germany'\n        ],\n        'z': [\n            1,\n            4,\n            1,\n            54,\n            1,\n            4,\n            8,\n            1,\n            1,\n            1,\n            126,\n            1,\n            30,\n            1,\n            3,\n            11,\n            1,\n            1,\n            1,\n            154,\n            1,\n            97,\n            2,\n            17,\n            2,\n            3,\n            31,\n            1,\n            4,\n            12,\n            4,\n            23,\n            23,\n            1,\n            1,\n            1,\n            17,\n            5,\n            1,\n            15,\n            1,\n            8,\n            1,\n            0,\n            1,\n            1,\n            1,\n            5,\n            4,\n            11,\n            1,\n            1,\n            8,\n            14,\n            1,\n            33,\n            6,\n            3,\n            2,\n            5,\n            1,\n            445,\n            3801,\n            1,\n            3\n        ],\n        'zmin': 0,\n        'zmax': 3801,\n        'colorscale': [\n            [\n                0,\n                'rgb(242,240,247)'\n            ],\n            [\n                0.2,\n                'rgb(218,218,235)'\n            ],\n            [\n                0.4,\n                'rgb(188,189,220)'\n            ],\n            [\n                0.6,\n                'rgb(158,154,200)'\n            ],\n            [\n                0.8,\n                'rgb(117,107,177)'\n            ],\n            [\n                1,\n                'rgb(84,39,143)'\n            ]\n        ],\n        'colorbar': {\n            'title': 'Count',\n            'thickness': 10\n        },\n        'marker': {\n            'line': {\n                'color': 'rgb(255,255,255)',\n                'width': 2\n            }\n        }\n    }\n], null);\n        </script>\n    \" src=\"\" width=\"800\" height=\"420\" frameborder=\"0\" sandbox=\"allow-scripts\"></iframe>"
    }
   },
   "source": [
    "### Movies by Country\n",
    "\n",
    "{{ country_plot }}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}