{ "cells": [ { "cell_type": "markdown", "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 0, "height": 11, "hidden": false, "row": 0, "width": 6 }, "report_default": { "hidden": true } } } } }, "source": [ "## Explore Movie Dataset" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 4, "height": 10, "hidden": false, "row": 11, "width": 4 }, "report_default": { "hidden": true } } } } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2017-06-29 08:29:53 \n", "\n", "CPython 3.6.1\n", "IPython 6.1.0\n", "\n", "pea 0.0.7\n", "pandas 0.20.2\n", "\n", "compiler : MSC v.1900 64 bit (AMD64)\n", "system : Windows\n", "release : 7\n", "machine : AMD64\n", "processor : Intel64 Family 6 Model 42 Stepping 7, GenuineIntel\n", "CPU cores : 8\n", "interpreter: 64bit\n" ] } ], "source": [ "import os\n", "import pandas as pd\n", "import settings\n", "import etl\n", "\n", "%matplotlib inline\n", "\n", "%load_ext watermark\n", "%watermark -d -t -v -m -p pea,pandas" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true, "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "hidden": true }, "report_default": { "hidden": true } } } } }, "outputs": [], "source": [ "data = etl.Data()\n", "data.load()" ] }, { "cell_type": "markdown", "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 8, "height": 4, "hidden": false, "row": 0, "width": 4 }, "report_default": { "hidden": true } } } } }, "source": [ "## Available Columns" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 0, "height": 11, "hidden": false, "row": 11, "width": 4 }, "report_default": { "hidden": true } } } } }, "outputs": [ { "data": { "text/plain": [ "Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',\n", " 'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',\n", " 'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',\n", " 'movie_title', 'num_voted_users', 'cast_total_facebook_likes',\n", " 'actor_3_name', 'facenumber_in_poster', 'plot_keywords',\n", " 'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',\n", " 'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',\n", " 'imdb_score', 'aspect_ratio', 'movie_facebook_likes'],\n", " dtype='object')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.movie.columns" ] }, { "cell_type": "markdown", "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 8, "height": 4, "hidden": false, "row": 4, "width": 4 }, "report_default": { "hidden": true } } } } }, "source": [ "### Add Calulations to etl" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 8, "height": 17, "hidden": false, "row": 8, "width": 4 }, "report_default": { "hidden": true } } } } }, "outputs": [ { "data": { "text/plain": [ "color object\n", "director_name object\n", "num_critic_for_reviews float64\n", "duration float64\n", "director_facebook_likes float64\n", "actor_3_facebook_likes float64\n", "actor_2_name object\n", "actor_1_facebook_likes float64\n", "gross float64\n", "genres object\n", "actor_1_name object\n", "movie_title object\n", "num_voted_users int64\n", "cast_total_facebook_likes int64\n", "actor_3_name object\n", "facenumber_in_poster float64\n", "plot_keywords object\n", "movie_imdb_link object\n", "num_user_for_reviews float64\n", "language object\n", "country object\n", "content_rating object\n", "budget float64\n", "title_year float64\n", "actor_2_facebook_likes float64\n", "imdb_score float64\n", "aspect_ratio float64\n", "movie_facebook_likes int64\n", "dtype: object" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.movie.dtypes" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true, "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "hidden": true }, "report_default": { "hidden": true } } } } }, "outputs": [], "source": [ "data.movie['net'] = data.movie['gross'] - data.movie['budget']" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 0, "height": 79, "hidden": false, "row": 25, "width": 5 }, "report_default": { "hidden": true } } } } }, "outputs": [ { "data": { "text/html": [ "
| \n", " | movie_title | \n", "title_year | \n", "budget | \n", "gross | \n", "net | \n", "
|---|---|---|---|---|---|
| 2988 | \n", "The Host | \n", "2006.0 | \n", "1.221550e+10 | \n", "2201412.0 | \n", "-1.221330e+10 | \n", "
| 3859 | \n", "Lady Vengeance | \n", "2005.0 | \n", "4.200000e+09 | \n", "211667.0 | \n", "-4.199788e+09 | \n", "
| 3005 | \n", "Fateless | \n", "2005.0 | \n", "2.500000e+09 | \n", "195888.0 | \n", "-2.499804e+09 | \n", "
| 2323 | \n", "Princess Mononoke | \n", "1997.0 | \n", "2.400000e+09 | \n", "2298191.0 | \n", "-2.397702e+09 | \n", "
| 2334 | \n", "Steamboy | \n", "2004.0 | \n", "2.127520e+09 | \n", "410388.0 | \n", "-2.127110e+09 | \n", "
| 3423 | \n", "Akira | \n", "1988.0 | \n", "1.100000e+09 | \n", "439162.0 | \n", "-1.099561e+09 | \n", "
| 4542 | \n", "Godzilla 2000 | \n", "1999.0 | \n", "1.000000e+09 | \n", "10037390.0 | \n", "-9.899626e+08 | \n", "
| 3075 | \n", "Kabhi Alvida Naa Kehna | \n", "2006.0 | \n", "7.000000e+08 | \n", "3275443.0 | \n", "-6.967246e+08 | \n", "
| 3851 | \n", "Tango | \n", "1998.0 | \n", "7.000000e+08 | \n", "1687311.0 | \n", "-6.983127e+08 | \n", "
| 3273 | \n", "Kites | \n", "2010.0 | \n", "6.000000e+08 | \n", "1602466.0 | \n", "-5.983975e+08 | \n", "
| 1338 | \n", "Red Cliff | \n", "2008.0 | \n", "5.536320e+08 | \n", "626809.0 | \n", "-5.530052e+08 | \n", "
| 3311 | \n", "The Legend of Suriyothai | \n", "2001.0 | \n", "4.000000e+08 | \n", "454255.0 | \n", "-3.995457e+08 | \n", "
| 1016 | \n", "The Messenger: The Story of Joan of Arc | \n", "1999.0 | \n", "3.900000e+08 | \n", "14131298.0 | \n", "-3.758687e+08 | \n", "
| 2740 | \n", "Ong-bak 2 | \n", "2008.0 | \n", "3.000000e+08 | \n", "102055.0 | \n", "-2.998979e+08 | \n", "
| 1 | \n", "Pirates of the Caribbean: At World's End | \n", "2007.0 | \n", "3.000000e+08 | \n", "309404152.0 | \n", "9.404152e+06 | \n", "
| 5 | \n", "John Carter | \n", "2012.0 | \n", "2.637000e+08 | \n", "73058679.0 | \n", "-1.906413e+08 | \n", "
| 7 | \n", "Tangled | \n", "2010.0 | \n", "2.600000e+08 | \n", "200807262.0 | \n", "-5.919274e+07 | \n", "
| 6 | \n", "Spider-Man 3 | \n", "2007.0 | \n", "2.580000e+08 | \n", "336530303.0 | \n", "7.853030e+07 | \n", "
| 3461 | \n", "Spider-Man 3 | \n", "2007.0 | \n", "2.580000e+08 | \n", "336530303.0 | \n", "7.853030e+07 | \n", "
| 10 | \n", "Batman v Superman: Dawn of Justice | \n", "2016.0 | \n", "2.500000e+08 | \n", "330249062.0 | \n", "8.024906e+07 | \n", "
| 9 | \n", "Harry Potter and the Half-Blood Prince | \n", "2009.0 | \n", "2.500000e+08 | \n", "301956980.0 | \n", "5.195698e+07 | \n", "
| 8 | \n", "Avengers: Age of Ultron | \n", "2015.0 | \n", "2.500000e+08 | \n", "458991599.0 | \n", "2.089916e+08 | \n", "
| 18 | \n", "Pirates of the Caribbean: On Stranger Tides | \n", "2011.0 | \n", "2.500000e+08 | \n", "241063875.0 | \n", "-8.936125e+06 | \n", "
| 20 | \n", "The Hobbit: The Battle of the Five Armies | \n", "2014.0 | \n", "2.500000e+08 | \n", "255108370.0 | \n", "5.108370e+06 | \n", "
| 3 | \n", "The Dark Knight Rises | \n", "2012.0 | \n", "2.500000e+08 | \n", "448130642.0 | \n", "1.981306e+08 | \n", "
| 27 | \n", "Captain America: Civil War | \n", "2016.0 | \n", "2.500000e+08 | \n", "407197282.0 | \n", "1.571973e+08 | \n", "
| 2 | \n", "Spectre | \n", "2015.0 | \n", "2.450000e+08 | \n", "200074175.0 | \n", "-4.492582e+07 | \n", "
| 0 | \n", "Avatar | \n", "2009.0 | \n", "2.370000e+08 | \n", "760505847.0 | \n", "5.235058e+08 | \n", "
| 21 | \n", "The Amazing Spider-Man | \n", "2012.0 | \n", "2.300000e+08 | \n", "262030663.0 | \n", "3.203066e+07 | \n", "
| 15 | \n", "Man of Steel | \n", "2013.0 | \n", "2.250000e+08 | \n", "291021565.0 | \n", "6.602156e+07 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 4902 | \n", "3 | \n", "2010.0 | \n", "NaN | \n", "59774.0 | \n", "NaN | \n", "
| 4909 | \n", "Baghead | \n", "2008.0 | \n", "NaN | \n", "140016.0 | \n", "NaN | \n", "
| 4910 | \n", "Solitude | \n", "2014.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 4912 | \n", "Ordet | \n", "1955.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 4913 | \n", "Good Dick | \n", "2008.0 | \n", "NaN | \n", "15542.0 | \n", "NaN | \n", "
| 4919 | \n", "H. | \n", "2014.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 4927 | \n", "The Calling | \n", "2014.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 4934 | \n", "When the Lights Went Out | \n", "2012.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 4935 | \n", "Heroes of Dirt | \n", "2015.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 4944 | \n", "Sound of My Voice | \n", "2011.0 | \n", "NaN | \n", "405614.0 | \n", "NaN | \n", "
| 4960 | \n", "The Mighty | \n", "1998.0 | \n", "NaN | \n", "2643689.0 | \n", "NaN | \n", "
| 4967 | \n", "Open Secret | \n", "1948.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 4969 | \n", "The Night Visitor | \n", "1971.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 4974 | \n", "I Love You, Don't Touch Me! | \n", "1997.0 | \n", "NaN | \n", "33598.0 | \n", "NaN | \n", "
| 4982 | \n", "Supporting Characters | \n", "2012.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 4985 | \n", "The Dirties | \n", "2013.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 4986 | \n", "Gabriela | \n", "1983.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 4989 | \n", "The Naked Ape | \n", "2006.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 5001 | \n", "The Last Waltz | \n", "1978.0 | \n", "NaN | \n", "321952.0 | \n", "NaN | \n", "
| 5003 | \n", "The Exploding Girl | \n", "2009.0 | \n", "NaN | \n", "24705.0 | \n", "NaN | \n", "
| 5005 | \n", "Mutual Appreciation | \n", "2005.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 5007 | \n", "Down Terrace | \n", "2009.0 | \n", "NaN | \n", "9609.0 | \n", "NaN | \n", "
| 5010 | \n", "Funny Ha Ha | \n", "2002.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 5014 | \n", "Rampage | \n", "2009.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 5019 | \n", "Exeter | \n", "2015.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 5030 | \n", "On the Downlow | \n", "2004.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 5032 | \n", "Bang | \n", "1995.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 5038 | \n", "Signed Sealed Delivered | \n", "2013.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 5039 | \n", "The Following | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 5041 | \n", "Shanghai Calling | \n", "2012.0 | \n", "NaN | \n", "10443.0 | \n", "NaN | \n", "
5043 rows × 5 columns
\n", "