{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Explore Movie Dataset"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2017-06-26 18:57:49 \n",
"\n",
"CPython 3.6.1\n",
"IPython 6.1.0\n",
"\n",
"pea 0.0.7\n",
"pandas 0.20.2\n",
"\n",
"compiler : MSC v.1900 64 bit (AMD64)\n",
"system : Windows\n",
"release : 7\n",
"machine : AMD64\n",
"processor : Intel64 Family 6 Model 42 Stepping 7, GenuineIntel\n",
"CPU cores : 8\n",
"interpreter: 64bit\n"
]
}
],
"source": [
"import os\n",
"import pandas as pd\n",
"import settings\n",
"import etl\n",
"\n",
"%matplotlib inline\n",
"\n",
"%load_ext watermark\n",
"%watermark -d -t -v -m -p pea,pandas"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data = etl.Data()\n",
"data.load()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Available Columns"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',\n",
" 'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',\n",
" 'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',\n",
" 'movie_title', 'num_voted_users', 'cast_total_facebook_likes',\n",
" 'actor_3_name', 'facenumber_in_poster', 'plot_keywords',\n",
" 'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',\n",
" 'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',\n",
" 'imdb_score', 'aspect_ratio', 'movie_facebook_likes'],\n",
" dtype='object')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.movie.columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## plotting with IPlotter\n",
"\n",
"This example is using my own branch of IPlotter which builds the dictionary from a pandas DataFrame. Much less verbose, but can be done with the current version on PyPI."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from iplotter import C3Plotter"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"c3 = C3Plotter()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Timeseries of mean gross"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"plot_data = data.movie.groupby(['title_year']).mean()[['gross']].fillna(0)\n",
"c3.plot(plot_data, zoom=True)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"country_group = data.movie.groupby('country').count()['duration']\n",
"counts = country_group.values.tolist()\n",
"countries = country_group.index.values.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"from iplotter import PlotlyPlotter\n",
"from IPython.display import HTML\n",
"\n",
"plotly = PlotlyPlotter()\n",
"\n",
"c3_plotter = C3Plotter()\n",
"\n",
"plotly_chart = [{\n",
" \"type\": 'choropleth',\n",
" \"locationmode\": 'country names',\n",
" \"locations\": countries,\n",
" \"z\": counts,\n",
" \"zmin\": 0,\n",
" \"zmax\": max(counts),\n",
" \"colorscale\": [\n",
" [0, 'rgb(242,240,247)'], [0.2, 'rgb(218,218,235)'],\n",
" [0.4, 'rgb(188,189,220)'], [0.6, 'rgb(158,154,200)'],\n",
" [0.8, 'rgb(117,107,177)'], [1, 'rgb(84,39,143)']\n",
" ],\n",
" \"colorbar\": {\n",
" \"title\": 'Count',\n",
" \"thickness\": 10\n",
" },\n",
" \"marker\": {\n",
" \"line\": {\n",
" \"color\": 'rgb(255,255,255)',\n",
" \"width\": 2\n",
" }\n",
" }\n",
"}]\n",
"\n",
"plotly_layout = {\n",
" \"title\": 'Movie Counts by Country',\n",
" \"geo\": {\n",
" \"scope\": 'country names',\n",
" }\n",
"}\n",
"\n",
"\n",
"\n",
"country_plot = plotly.plot(data=plotly_chart)"
]
},
{
"cell_type": "markdown",
"metadata": {
"variables": {
" country_plot ": ""
}
},
"source": [
"### Movies by Country\n",
"\n",
"{{ country_plot }}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}