diff --git a/src/pyDataVizDay.py b/src/pyDataVizDay.py index d82508f..02d7c31 100644 --- a/src/pyDataVizDay.py +++ b/src/pyDataVizDay.py @@ -102,8 +102,9 @@ def enthusiast(): @app.route('/Exploritory') def exploritory(): - notebook = render_template('Exploritory_nb.html') - return render_template('Exploritory.html', body=notebook) + title = render_template('Exploratory_Charts-Movie_Data-Latest_title.html') + notebook = render_template('Exploratory_Charts-Movie_Data-Latest.html') + return render_template('Exploritory.html', body=title + notebook) @app.route('/slides') def slides(): diff --git a/src/templates/Exploratory_Charts-Movie_Data-Latest.html b/src/templates/Exploratory_Charts-Movie_Data-Latest.html new file mode 100644 index 0000000..70677dd --- /dev/null +++ b/src/templates/Exploratory_Charts-Movie_Data-Latest.html @@ -0,0 +1,5633 @@ + +
+
+
In [1]:
+
+
+
import numpy as np # linear algebra
+import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
+
+ +
+
+
+ +
+
+
+
In [2]:
+
+
+
from subprocess import check_output
+print(check_output(["ls", "C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw"]).decode("utf8"))
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
movie_metadata.csv
+
+
+
+
+ +
+
+ +
+
+
+
In [3]:
+
+
+
import os
+import pandas as pd
+from pandas import DataFrame,Series
+from sklearn import tree
+import matplotlib
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn import svm
+from sklearn.preprocessing import StandardScaler
+import statsmodels.formula.api as smf
+import statsmodels.api as sm
+from mpl_toolkits.mplot3d import Axes3D
+import seaborn as sns
+from sklearn import neighbors
+from sklearn import linear_model
+from pandas.core import datetools
+%matplotlib inline
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
C:\Users\alurus\AppData\Local\Continuum\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
+  from pandas.core import datetools
+
+
+
+ +
+
+ +
+
+
+
In [20]:
+
+
+
f = pd.read_csv("C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw/movie_metadata.csv")
+
+ +
+
+
+ +
+
+
+
In [21]:
+
+
+
data=DataFrame(f)
+data.head()[:2]
+
+ +
+
+
+ +
+
+ + +
+ +
Out[21]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
colordirector_namenum_critic_for_reviewsdurationdirector_facebook_likesactor_3_facebook_likesactor_2_nameactor_1_facebook_likesgrossgenres...num_user_for_reviewslanguagecountrycontent_ratingbudgettitle_yearactor_2_facebook_likesimdb_scoreaspect_ratiomovie_facebook_likes
0ColorJames Cameron723.0178.00.0855.0Joel David Moore1000.0760505847.0Action|Adventure|Fantasy|Sci-Fi...3054.0EnglishUSAPG-13237000000.02009.0936.07.91.7833000
1ColorGore Verbinski302.0169.0563.01000.0Orlando Bloom40000.0309404152.0Action|Adventure|Fantasy...1238.0EnglishUSAPG-13300000000.02007.05000.07.12.350
+

2 rows × 28 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [22]:
+
+
+
X_data=data.dtypes[data.dtypes!='object'].index
+X_train=data[X_data]
+X_train.head()[:2] 
+
+ +
+
+
+ +
+
+ + +
+ +
Out[22]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
num_critic_for_reviewsdurationdirector_facebook_likesactor_3_facebook_likesactor_1_facebook_likesgrossnum_voted_userscast_total_facebook_likesfacenumber_in_posternum_user_for_reviewsbudgettitle_yearactor_2_facebook_likesimdb_scoreaspect_ratiomovie_facebook_likes
0723.0178.00.0855.01000.0760505847.088620448340.03054.0237000000.02009.0936.07.91.7833000
1302.0169.0563.01000.040000.0309404152.0471220483500.01238.0300000000.02007.05000.07.12.350
+
+
+ +
+ +
+
+ +
+
+
+
In [32]:
+
+
+
# GETTING Correllation matrix
+corr_mat=X_train.corr(method='pearson')
+plt.figure(figsize=(20,10))
+sns.heatmap(corr_mat,vmax=1,square=True,annot=True,cmap='cubehelix')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[32]:
+ + + + +
+
<matplotlib.axes._subplots.AxesSubplot at 0x106d77f0>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
In [19]:
+
+
+
!jupyter nbconvert Exploratory_Charts-Movie_Data-Copy2.ipynb --template basic
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[NbConvertApp] Converting notebook Exploratory_Charts-Movie_Data-Copy2.ipynb to html
+[NbConvertApp] Writing 206376 bytes to Exploratory_Charts-Movie_Data-Copy2.html
+
+
+
+ +
+
+ +
+
+
+
In [5]:
+
+
+
df = pd.read_csv('C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw/movie_metadata.csv')
+
+ +
+
+
+ +
+
+
+
In [6]:
+
+
+
df.head()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[6]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
colordirector_namenum_critic_for_reviewsdurationdirector_facebook_likesactor_3_facebook_likesactor_2_nameactor_1_facebook_likesgrossgenres...num_user_for_reviewslanguagecountrycontent_ratingbudgettitle_yearactor_2_facebook_likesimdb_scoreaspect_ratiomovie_facebook_likes
0ColorJames Cameron723.0178.00.0855.0Joel David Moore1000.0760505847.0Action|Adventure|Fantasy|Sci-Fi...3054.0EnglishUSAPG-13237000000.02009.0936.07.91.7833000
1ColorGore Verbinski302.0169.0563.01000.0Orlando Bloom40000.0309404152.0Action|Adventure|Fantasy...1238.0EnglishUSAPG-13300000000.02007.05000.07.12.350
2ColorSam Mendes602.0148.00.0161.0Rory Kinnear11000.0200074175.0Action|Adventure|Thriller...994.0EnglishUKPG-13245000000.02015.0393.06.82.3585000
3ColorChristopher Nolan813.0164.022000.023000.0Christian Bale27000.0448130642.0Action|Thriller...2701.0EnglishUSAPG-13250000000.02012.023000.08.52.35164000
4NaNDoug WalkerNaNNaN131.0NaNRob Walker131.0NaNDocumentary...NaNNaNNaNNaNNaNNaN12.07.1NaN0
+

5 rows × 28 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [7]:
+
+
+
df['diff_gross'] = df['gross'] - df['budget']
+df_copy = df.copy().dropna()
+director_budge = df_copy.groupby(df_copy['director_name'])['diff_gross'].sum()
+direcotr_budge_indx = director_budge.sort_values(ascending=False)[:20].index
+director_budge_pivot = pd.pivot_table(data = df_copy[df_copy['director_name'].isin(direcotr_budge_indx)],
+                                      index=['title_year'],
+                                      columns=['director_name'],
+                                      values=['diff_gross'],
+                                      aggfunc='sum')
+
+
+fig,ax = plt.subplots(figsize=(8,6))
+sns.heatmap(director_budge_pivot['diff_gross'],vmin=0,annot=False,linewidth=.5,ax=ax,cmap='PuBu')
+plt.title('Director vs Year and diff_gross')
+plt.ylabel('Year')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[7]:
+ + + + +
+
<matplotlib.text.Text at 0xd419a58>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
In [8]:
+
+
+
data = pd.read_csv("C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw/movie_metadata.csv")
+
+ +
+
+
+ +
+
+
+
In [40]:
+
+
+
matplotlib.rcParams['figure.figsize'] = (9.0, 5.0)
+scores = pd.DataFrame({"imdb score":data["imdb_score"]})
+scores.hist(bins=20)
+
+ +
+
+
+ +
+
+ + +
+ +
Out[40]:
+ + + + +
+
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x0000000012C971D0>]], dtype=object)
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
In [9]:
+
+
+
plt.figure(figsize = (18, 9))
+sns.countplot(x = 'imdb_score', data = data)
+xt = plt.xticks(rotation=56)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
In [10]:
+
+
+
plt.figure(figsize = (12, 9))
+sns.countplot(x = 'content_rating', data = data)
+xt = plt.xticks(rotation=56)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
In [ ]:
+
+
+
 
+
+ +
+
+
+ +
+
+
+
In [13]:
+
+
+
plt.figure(figsize = (11, 4))
+sns.boxplot(x = 'content_rating', y = 'imdb_score',  data = data)
+xt = plt.xticks(rotation=45)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
In [14]:
+
+
+
plt.figure(figsize = (11, 4))
+sns.violinplot('content_rating', 'imdb_score', data = data)
+
+ +
+
+
+ +
+
+ + +
+ +
Out[14]:
+ + + + +
+
<matplotlib.axes._subplots.AxesSubplot at 0xd1f2828>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
In [ ]:
+
+
+
 
+
+ +
+
+
+ +
+ + diff --git a/src/templates/Exploratory_Charts-Movie_Data-Latest_title.html b/src/templates/Exploratory_Charts-Movie_Data-Latest_title.html new file mode 100644 index 0000000..84a755b --- /dev/null +++ b/src/templates/Exploratory_Charts-Movie_Data-Latest_title.html @@ -0,0 +1,3 @@ +
+

Exploritory Analysis in Python

+This page is a guest post by Suresh Aluru, created directly from a jupyter notebook. This is a very powerful form of data visualization in python.It allows the user a platform for quick and easy iterations. While exploring a dataset the plots and tables instantly come to life, withoutre-running the entire script. It eliminates the need to setup any complexhtml css templates. The notebook can be simply saved as an html file, then served from a simple web server or shared via a static html file. \ No newline at end of file