diff --git a/src/templates/Exploratory_Charts-Movie_Data-Latest.html b/src/templates/Exploratory_Charts-Movie_Data-Latest.html index 0f76e19..9600691 100644 --- a/src/templates/Exploratory_Charts-Movie_Data-Latest.html +++ b/src/templates/Exploratory_Charts-Movie_Data-Latest.html @@ -1,7 +1,7 @@
-
In [3]:
+
In [2]:
import numpy as np # linear algebra
@@ -15,7 +15,7 @@
 
-
In [4]:
+
In [12]:
from subprocess import check_output
@@ -48,7 +48,7 @@
 
-
In [8]:
+
In [14]:
import os
@@ -67,7 +67,6 @@
 from sklearn import neighbors
 from sklearn import linear_model
 from pandas.core import datetools
-from pandas.core import datetools
 %matplotlib inline
 
@@ -75,40 +74,59 @@
-
-
-
-
In [5]:
-
-
-
f = pd.read_csv("C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw/movie_metadata.csv")
-
- -
-
-
- -
-
-
-
In [7]:
-
-
-
data=DataFrame(f)
-data.head()[:2]
-
- -
-
-
-
-
Out[7]:
+
+ + +
+
C:\Users\alurus\AppData\Local\Continuum\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
+  from pandas.core import datetools
+
+
+
+ +
+
+ +
+
+
+
In [7]:
+
+
+
f = pd.read_csv("C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw/movie_metadata.csv")
+
+ +
+
+
+ +
+
+
+
In [14]:
+
+
+
data=DataFrame(f)
+data.head()[:5]
+
+ +
+
+
+ +
+
+ + +
+ +
Out[14]:
@@ -203,9 +221,81 @@ 2.35 0 + + 2 + Color + Sam Mendes + 602.0 + 148.0 + 0.0 + 161.0 + Rory Kinnear + 11000.0 + 200074175.0 + Action|Adventure|Thriller + ... + 994.0 + English + UK + PG-13 + 245000000.0 + 2015.0 + 393.0 + 6.8 + 2.35 + 85000 + + + 3 + Color + Christopher Nolan + 813.0 + 164.0 + 22000.0 + 23000.0 + Christian Bale + 27000.0 + 448130642.0 + Action|Thriller + ... + 2701.0 + English + USA + PG-13 + 250000000.0 + 2012.0 + 23000.0 + 8.5 + 2.35 + 164000 + + + 4 + NaN + Doug Walker + NaN + NaN + 131.0 + NaN + Rob Walker + 131.0 + NaN + Documentary + ... + NaN + NaN + NaN + NaN + NaN + NaN + 12.0 + 7.1 + NaN + 0 + -

2 rows × 28 columns

+

5 rows × 28 columns

@@ -217,11 +307,23 @@
-
In [9]:
+
In [10]:
X_data=data.dtypes[data.dtypes!='object'].index
-X_train=data[X_data]
+
+ +
+
+
+ +
+
+
+
In [11]:
+
+
+
X_train=data[X_data]
 X_train.head()[:2] 
 
@@ -235,7 +337,7 @@
-
Out[9]:
+
Out[11]:
@@ -336,12 +438,12 @@
-
In [10]:
+
In [13]:
# GETTING Correllation matrix
 corr_mat=X_train.corr(method='pearson')
-plt.figure(figsize=(20,10))
+plt.figure(figsize=(23,10))
 sns.heatmap(corr_mat,vmax=1,square=True,annot=True,cmap='Oranges');
 
@@ -361,2440 +463,2441 @@
-
-
In [11]:
+
In [33]:
-
df = pd.read_csv('C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw/movie_metadata.csv')
+
df = pd.read_csv('C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay/data/raw/movie_metadata.csv')
 
@@ -2820,7 +2923,7 @@ jRB/YHtFyzwREbF+SYckIiIiIiKayRqSiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIho
-
In [12]:
+
In [34]:
df.head()
@@ -2836,7 +2939,7 @@ jRB/YHtFyzwREbF+SYckIiIiIiKayRqSiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIho
 
 
-
Out[12]:
+
Out[34]:
@@ -3025,7 +3128,7 @@ jRB/YHtFyzwREbF+SYckIiIiIiKayRqSiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIho
-
In [13]:
+
In [29]:
df['diff_gross'] = df['gross'] - df['budget']
@@ -3039,7 +3142,7 @@ jRB/YHtFyzwREbF+SYckIiIiIiKayRqSiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIho
                                       aggfunc='sum')
 
 
-fig,ax = plt.subplots(figsize=(8,6))
+fig,ax = plt.subplots(figsize=(14,8))
 sns.heatmap(director_budge_pivot['diff_gross'],vmin=0,annot=False,linewidth=.5,ax=ax,cmap='Oranges')
 plt.title('Director vs Year and Share')
 plt.ylabel('Year');
@@ -3061,571 +3164,771 @@ jRB/YHtFyzwREbF+SYckIiIiIiKayRqSiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIho
 
 
 
-
@@ -3638,7 +3941,7 @@ QqFQKIxTyiBdKBQKhcI4pQzShUKhUCiMU/4/DsC3OtXvG94AAAAASUVORK5CYII=
-
In [14]:
+
In [18]:
data = pd.read_csv("C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw/movie_metadata.csv")
@@ -3659,7 +3962,7 @@ QqFQKIxTyiBdKBQKhcI4pQzShUKhUCiMU/4/DsC3OtXvG94AAAAASUVORK5CYII=
 
-
In [32]:
+
In [50]:
matplotlib.rcParams['figure.figsize'] = (18, 9.0)
@@ -5604,7 +5907,7 @@ VcKBppmZmZlV4j9rH7MwnBa6QwAAAABJRU5ErkJggg==
 
-
In [39]:
+
In [45]:
!jupyter nbconvert Exploratory_Charts-Movie_Data-Latest_a.ipynb --template basic
@@ -5625,7 +5928,7 @@ VcKBppmZmZlV4j9rH7MwnBa6QwAAAABJRU5ErkJggg==
 
 
[NbConvertApp] Converting notebook Exploratory_Charts-Movie_Data-Latest_a.ipynb to html
-[NbConvertApp] Writing 397209 bytes to Exploratory_Charts-Movie_Data-Latest_a.html
+[NbConvertApp] Writing 412667 bytes to Exploratory_Charts-Movie_Data-Latest_a.html
 
@@ -5636,10 +5939,23 @@ VcKBppmZmZlV4j9rH7MwnBa6QwAAAABJRU5ErkJggg==
-
In [ ]:
+
In [11]:
-
 
+
Pf = pd.read_csv('C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay/data/raw/movie_metadata.csv')
+
+ +
+
+
+ +
+
+
+
In [15]:
+
+
+
Pf.head([:5];