{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Explore Movie Dataset" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2017-06-26 18:57:49 \n", "\n", "CPython 3.6.1\n", "IPython 6.1.0\n", "\n", "pea 0.0.7\n", "pandas 0.20.2\n", "\n", "compiler : MSC v.1900 64 bit (AMD64)\n", "system : Windows\n", "release : 7\n", "machine : AMD64\n", "processor : Intel64 Family 6 Model 42 Stepping 7, GenuineIntel\n", "CPU cores : 8\n", "interpreter: 64bit\n" ] } ], "source": [ "import os\n", "import pandas as pd\n", "import settings\n", "import etl\n", "\n", "%matplotlib inline\n", "\n", "%load_ext watermark\n", "%watermark -d -t -v -m -p pea,pandas" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data = etl.Data()\n", "data.load()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Available Columns" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',\n", " 'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',\n", " 'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',\n", " 'movie_title', 'num_voted_users', 'cast_total_facebook_likes',\n", " 'actor_3_name', 'facenumber_in_poster', 'plot_keywords',\n", " 'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',\n", " 'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',\n", " 'imdb_score', 'aspect_ratio', 'movie_facebook_likes'],\n", " dtype='object')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.movie.columns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## plotting with IPlotter\n", "\n", "This example is using my own branch of IPlotter which builds the dictionary from a pandas DataFrame. Much less verbose, but can be done with the current version on PyPI." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from iplotter import C3Plotter" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "c3 = C3Plotter()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Timeseries of mean gross" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plot_data = data.movie.groupby(['title_year']).mean()[['gross']].fillna(0)\n", "c3.plot(plot_data, zoom=True)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "country_group = data.movie.groupby('country').count()['duration']\n", "counts = country_group.values.tolist()\n", "countries = country_group.index.values.tolist()" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "from iplotter import PlotlyPlotter\n", "from IPython.display import HTML\n", "\n", "plotly = PlotlyPlotter()\n", "\n", "c3_plotter = C3Plotter()\n", "\n", "plotly_chart = [{\n", " \"type\": 'choropleth',\n", " \"locationmode\": 'country names',\n", " \"locations\": countries,\n", " \"z\": counts,\n", " \"zmin\": 0,\n", " \"zmax\": max(counts),\n", " \"colorscale\": [\n", " [0, 'rgb(242,240,247)'], [0.2, 'rgb(218,218,235)'],\n", " [0.4, 'rgb(188,189,220)'], [0.6, 'rgb(158,154,200)'],\n", " [0.8, 'rgb(117,107,177)'], [1, 'rgb(84,39,143)']\n", " ],\n", " \"colorbar\": {\n", " \"title\": 'Count',\n", " \"thickness\": 10\n", " },\n", " \"marker\": {\n", " \"line\": {\n", " \"color\": 'rgb(255,255,255)',\n", " \"width\": 2\n", " }\n", " }\n", "}]\n", "\n", "plotly_layout = {\n", " \"title\": 'Movie Counts by Country',\n", " \"geo\": {\n", " \"scope\": 'country names',\n", " }\n", "}\n", "\n", "\n", "\n", "country_plot = plotly.plot(data=plotly_chart)" ] }, { "cell_type": "markdown", "metadata": { "variables": { " country_plot ": "" } }, "source": [ "### Movies by Country\n", "\n", "{{ country_plot }}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 2 }