diff --git a/src/pyDataVizDay.py b/src/pyDataVizDay.py index d82508f..02d7c31 100644 --- a/src/pyDataVizDay.py +++ b/src/pyDataVizDay.py @@ -102,8 +102,9 @@ def enthusiast(): @app.route('/Exploritory') def exploritory(): - notebook = render_template('Exploritory_nb.html') - return render_template('Exploritory.html', body=notebook) + title = render_template('Exploratory_Charts-Movie_Data-Latest_title.html') + notebook = render_template('Exploratory_Charts-Movie_Data-Latest.html') + return render_template('Exploritory.html', body=title + notebook) @app.route('/slides') def slides(): diff --git a/src/templates/Exploratory_Charts-Movie_Data-Latest.html b/src/templates/Exploratory_Charts-Movie_Data-Latest.html new file mode 100644 index 0000000..70677dd --- /dev/null +++ b/src/templates/Exploratory_Charts-Movie_Data-Latest.html @@ -0,0 +1,5633 @@ + +
import numpy as np # linear algebra
+import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
+from subprocess import check_output
+print(check_output(["ls", "C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw"]).decode("utf8"))
+import os
+import pandas as pd
+from pandas import DataFrame,Series
+from sklearn import tree
+import matplotlib
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn import svm
+from sklearn.preprocessing import StandardScaler
+import statsmodels.formula.api as smf
+import statsmodels.api as sm
+from mpl_toolkits.mplot3d import Axes3D
+import seaborn as sns
+from sklearn import neighbors
+from sklearn import linear_model
+from pandas.core import datetools
+%matplotlib inline
+f = pd.read_csv("C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw/movie_metadata.csv")
+data=DataFrame(f)
+data.head()[:2]
+X_data=data.dtypes[data.dtypes!='object'].index
+X_train=data[X_data]
+X_train.head()[:2]
+# GETTING Correllation matrix
+corr_mat=X_train.corr(method='pearson')
+plt.figure(figsize=(20,10))
+sns.heatmap(corr_mat,vmax=1,square=True,annot=True,cmap='cubehelix')
+!jupyter nbconvert Exploratory_Charts-Movie_Data-Copy2.ipynb --template basic
+df = pd.read_csv('C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw/movie_metadata.csv')
+df.head()
+df['diff_gross'] = df['gross'] - df['budget']
+df_copy = df.copy().dropna()
+director_budge = df_copy.groupby(df_copy['director_name'])['diff_gross'].sum()
+direcotr_budge_indx = director_budge.sort_values(ascending=False)[:20].index
+director_budge_pivot = pd.pivot_table(data = df_copy[df_copy['director_name'].isin(direcotr_budge_indx)],
+ index=['title_year'],
+ columns=['director_name'],
+ values=['diff_gross'],
+ aggfunc='sum')
+
+
+fig,ax = plt.subplots(figsize=(8,6))
+sns.heatmap(director_budge_pivot['diff_gross'],vmin=0,annot=False,linewidth=.5,ax=ax,cmap='PuBu')
+plt.title('Director vs Year and diff_gross')
+plt.ylabel('Year')
+data = pd.read_csv("C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw/movie_metadata.csv")
+matplotlib.rcParams['figure.figsize'] = (9.0, 5.0)
+scores = pd.DataFrame({"imdb score":data["imdb_score"]})
+scores.hist(bins=20)
+plt.figure(figsize = (18, 9))
+sns.countplot(x = 'imdb_score', data = data)
+xt = plt.xticks(rotation=56)
+plt.figure(figsize = (12, 9))
+sns.countplot(x = 'content_rating', data = data)
+xt = plt.xticks(rotation=56)
+
+plt.figure(figsize = (11, 4))
+sns.boxplot(x = 'content_rating', y = 'imdb_score', data = data)
+xt = plt.xticks(rotation=45)
+plt.figure(figsize = (11, 4))
+sns.violinplot('content_rating', 'imdb_score', data = data)
+
+