From 536afe563e563a6b29012311cfbda51695907336 Mon Sep 17 00:00:00 2001 From: Walker Waylon Scott Date: Mon, 31 Jul 2017 11:32:38 -0500 Subject: [PATCH 01/10] fixed exploritory formatting --- src/templates/Exploritory.html | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/templates/Exploritory.html b/src/templates/Exploritory.html index 4ffd3b3..3475ace 100644 --- a/src/templates/Exploritory.html +++ b/src/templates/Exploritory.html @@ -14,7 +14,9 @@
-
+
+ +
{{ body | safe }}
@@ -22,7 +24,8 @@
+ + + + ''' + + template = ''' +

{{title}}

+
+ + ''' + + def __init__(self): + super(C3Plotter, self).__init__() + + def render(self, + data, + div_id="chart", + custom_css='', + title="", + head="", + y_axis_tick_format='', + secondary_y_axis_tick_format='' + , + **kwargs): + ''' + render the data in HTML template + ''' + try: + data = self.pandas_data(data, **kwargs) + except AttributeError: + pass + + if not self.is_valid_name(div_id): + raise ValueError( + "Name {} is invalid. Only letters, numbers, '_', and '-' are permitted ".format( + div_id)) + + return Template(head + self.template).render( + div_id=div_id.replace(" ", "_"), + custom_css=custom_css, + title=title, + y_axis_tick_format=y_axis_tick_format, + secondary_y_axis_tick_format=secondary_y_axis_tick_format, + data=json.dumps( + data, indent=4).replace("'", "\\'").replace('"', "'")) + + def plot_and_save(self, + data, + w=800, + h=430, + filename='chart', + subplots=False, + subplot_groups=False, + title=False, + overwrite=True): + ''' + save the rendered html to a file and returns an IFrame to display the plot in the notebook + ''' + self.save(data, filename, overwrite,) + return IFrame(filename + '.html', w, h) + + def plot(self, + data, + w=800, + h=430, + div_id='chart', + subplots=False, + subplot_groups=False, + title=False, + **kwargs): + ''' + output an iframe containing the plot in the notebook without saving + ''' + + if subplots: + if title: + if len(title) > 0: + title = title + '
' + body = '' + + if not subplot_groups: + subplot_groups = {col: [col] for col in data.columns} + + for group in subplot_groups: + body = body + (self.render(data=data[subplot_groups[group]], + div_id=str(div_id) + str(group), + head=self.head, + title=str(title) + str(group), + **kwargs + ) + ) + title='' + else: + body = self.render( + data=data, + div_id=div_id, + head=self.head, + **kwargs) + + return HTML(self.iframe.format(source=body, w=w, h=h*len(subplot_groups))) + + def update(): + pass + + def save(self, data, filename='chart', overwrite=True): + ''' + save the rendered html to a file in the same directory as the notebook + ''' + try: + data = self.pandas_data(data, **kwargs) + except AttributeError: + pass + + html = self.render(data=data, div_id=filename, head=self.head) + if overwrite: + with open(filename.replace(" ", "_") + '.html', 'w') as f: + f.write(html) + else: + if not os.path.exists(filename.replace(" ", "_") + '.html'): + with open(filename.replace(" ", "_") + '.html', 'w') as f: + f.write(html) + else: + raise IOError('File Already Exists!') + + def pandas_data(self, + df, + colors=False, + data_label_formats=False, + data_labels=False, + grid=False, + group=False, + height=300, + hue=False, + kind='line', + kinds=None, + legend=True, + mark_right=False, + point=False, + secondary_y=list(), + stacked=False, + subchart=False, + subplots=False, + tick_count=10, + value=False, + value_labels=False, + x_axis_tick_culling=False, + x_axis_type='auto', + x_tick_values=False, + xlabels=False, + xlim=False, + xregions=False, + xy_rotated=False, + ylabels=False, + ylim=False, + yregions=False, + zoom=False, + ): + ''' + create data dictionary from pandas DataFrame + + TODO: + ## Pandas Features + * proper docstring + * subplots + * layout + * height -> figsize + * use_index + * legend + * xlim (axis.x.min, axis.x.max or axis.x.extent) + * ylim + * colorbar + * table + * axis-rotation + ## Seaborn-esque features + * hue - ability to provide long form data + ## C3 Features + * interaction: {enabled: false} + * transition: {duration: 500} + * onrendered: function() {...} + * onmouseover/out + * data.empty.label.text + * data.selection.enabled + * data.selection.grouped + * data.selection.multiple + * data.selection.draggable + * axis.x.tick.fit + * axis.x.tick.values + * axis.x.tick.rotate + * axis.x.label + * axis.x.show + * legend.hide + * legend.position + * tooltip.show + * tooltip.grouped + * point.focus.expand.enabled + * subchart.size.height + * point.focus.expand.r + * point.select.r + * line.connectNull + + + param kind: str + * line + * spline + * step + * areas + * area-spline + * area-step + * bar + * scatter + * pie + * donut + * gauge + + param x_axis_type: str + * timeseries + * category + * numeric + + ''' + # kinds = ['line', 'spline', 'step', 'area','area-spline', 'area-step', + # 'bar', 'scatter', 'pie', 'donut', 'gauge'] + + data = { + 'size': { + 'height': height, + }, + "data": { + 'x': 'x', + 'axes': dict() + }, + 'subchart': { + 'show': subchart + }, + 'point': { + 'show': point + }, + 'grid': { + 'x': { + 'show': grid + }, + 'y': { + 'show': grid + } + }, + 'axis': { + 'rotated': xy_rotated, + 'x': {'tick': {'count': tick_count, + 'values': x_tick_values, + 'culling': dict(), + }, + }, + 'y': {'tick': {'format': ''}}, + 'y2': {'tick': {}}, + }, + 'zoom': {} + + } + if kind: + data['data']['type'] = kind + if kinds: + data['data']['types'] = kinds + + if mark_right: + df = df.rename( + columns={col: col + '(right)' for col in secondary_y}) + secondary_y = [y + '(right)' for y in secondary_y] + if hue and value: + df = df.groupby([df.index.name, hue])[value].sum().unstack() + + df = df.copy() + df['x'] = df.index + df['x'] = df['x'].astype('str').values.tolist() + + data['data']['columns'] = [[col] + df[col].values.tolist() + for col in df.columns] + # data['data']['columns'].extend([['x'] + df.index.astype('str').values.tolist()]) + for col in df.columns: + if col in secondary_y: + data['data']['axes'][col] = 'y2' + else: + data['data']['axes'][col] = 'y' + if len(secondary_y) > 0: + data['axis']['y2']['show'] = True + + if colors: + # repeat color palette if not long enough + colors = colors*math.ceil(len(df.columns)/len(colors)) + color_data = {} + for col, color in zip(df.columns, colors): + color_data[col] = color + data['data']['colors'] = color_data + + if x_axis_type == 'auto': + index_type = str(df.index.dtype) + + if 'date' in index_type: + data['axis']['x']['type'] = 'timeseries' + data['axis']['x']['tick']['format'] = '%Y-%m-%d' + + if 'object' in index_type or 'category' in index_type: + data['axis']['x']['type'] = 'category' + data['axis']['x']['tick']['culling'][ + 'max'] = x_axis_tick_culling + else: + if 'date' in x_axis_type or 'time' in x_axis_type: + data['axis']['x']['type'] = 'timeseries' + data['axis']['x']['tick']['format'] = '%Y-%m-%d' + + if 'categor' in x_axis_type or 'str' in x_axis_type: + data['axis']['x']['type'] = 'category' + data['axis']['x']['tick']['culling'][ + 'max'] = x_axis_tick_culling + + if xlim: + data['axis']['x']['min'] = xlim[0] + data['axis']['x']['max'] = xlim[1] + + if ylim: + data['axis']['y']['min'] = ylim[0] + data['axis']['y']['max'] = ylim[1] + + if stacked: + group = df.columns.values.tolist() + group.pop(-1) + group = [group] + + if group: + data['data']['groups'] = group + + if zoom: + data['zoom']['enabled'] = True + data['zoom']['rescale'] = True + + if xregions: + data['regions'] = [{'axis': 'x', 'start': region[ + 0], 'end':region[1]} for region in xregions] + + if yregions: + data['regions'] = [{'axis': 'y', 'start': region[ + 0], 'end':region[1]} for region in yregions] + + if xlabels: + data['grid']['x']['lines'] = [ + {'value': label[0], 'text': label[1]} for label in xlabels] + + if ylabels: + data['grid']['y']['lines'] = [ + {'value': label[0], 'text': label[1]} for label in ylabels] + + if data_labels: + if data_labels == True: + data_labels = df.drop('x', axis=1).columns + if data_label_formats: + data['data']['labels'] = {} + for column in data_label_formats: + data['data']['labels'][column] = data_label_formats[column] + else: + data['data']['labels'] = True + + return data diff --git a/src/iplotter/chartist_plotter.py b/src/iplotter/chartist_plotter.py new file mode 100644 index 0000000..30d9e92 --- /dev/null +++ b/src/iplotter/chartist_plotter.py @@ -0,0 +1,98 @@ +from jinja2 import Template +from IPython.display import IFrame, HTML +import os +import json +from .base_plotter import IPlotter + + +class ChartistPlotter(IPlotter): + """ + Class for creating chartist.js charts in ipython notebook + """ + + head = ''' + + + + ''' + + template = ''' +
+ + ''' + + def __init__(self): + super(ChartistPlotter, self).__init__() + + def render(self, data, chart_type, options=None, div_id="chart", head=""): + ''' + render the data in HTML template + ''' + if not self.is_valid_name(div_id): + raise ValueError( + "Name {} is invalid. Only letters, numbers, '_', and '-' are permitted ".format( + div_id)) + + return Template(head + self.template).render( + div_id=div_id.replace(" ", "_"), + data=json.dumps( + data, indent=4).replace("'", "\\'").replace('"', "'"), + chart_type=chart_type, + options=json.dumps( + options, indent=4).replace("'", "\\'").replace('"', "'")) + + def plot_and_save(self, + data, + chart_type, + options=None, + w=800, + h=420, + filename='chart', + overwrite=True): + ''' + save the rendered html to a file and return an IFrame to display the plot in the notebook + ''' + self.save(data, chart_type, options, filename, overwrite) + return IFrame(filename + '.html', w, h) + + def plot(self, data, chart_type, options=None, w=800, h=420): + ''' + output an iframe containing the plot in the notebook without saving + ''' + return HTML( + self.iframe.format( + source=self.render( + data=data, + options=options, + chart_type=chart_type, + head=self.head), + w=w, + h=h)) + + def save(self, + data, + chart_type, + options=None, + filename='chart', + overwrite=True): + ''' + save the rendered html to a file in the same directory as the notebook + ''' + html = self.render( + data=data, + chart_type=chart_type, + options=options, + div_id=filename, + head=self.head) + + if overwrite: + with open(filename.replace(" ", "_") + '.html', 'w') as f: + f.write(html) + else: + if not os.path.exists(filename.replace(" ", "_") + '.html'): + with open(filename.replace(" ", "_") + '.html', 'w') as f: + f.write(html) + else: + raise IOError('File Already Exists!') diff --git a/src/iplotter/chartjs_plotter.py b/src/iplotter/chartjs_plotter.py new file mode 100644 index 0000000..e8e3762 --- /dev/null +++ b/src/iplotter/chartjs_plotter.py @@ -0,0 +1,115 @@ +from jinja2 import Template +from IPython.display import IFrame, HTML +import os +import json +from .base_plotter import IPlotter + + +class ChartJSPlotter(IPlotter): + """ + Class for creating charts.js charts in ipython notebook + """ + + head = ''' + + + ''' + + template = ''' + + + ''' + + def __init__(self): + super(ChartJSPlotter, self).__init__() + + def render(self, + data, + chart_type, + options=None, + div_id="chart", + head="", + w=800, + h=420): + ''' + render the data in HTML template + ''' + if not self.is_valid_name(div_id): + raise ValueError( + "Name {} is invalid. Only letters, numbers, '_', and '-' are permitted ".format( + div_id)) + + return Template(head + self.template).render( + div_id=div_id.replace(" ", "_"), + data=json.dumps( + data, indent=4).replace("'", "\\'").replace('"', "'"), + chart_type=chart_type, + options=json.dumps( + options, indent=4).replace("'", "\\'").replace('"', "'"), + w=w, + h=h) + + def plot_and_save(self, + data, + chart_type, + options=None, + w=800, + h=420, + filename='chart', + overwrite=True): + ''' + save the rendered html to a file and return an IFrame to display the plot in the notebook + ''' + self.save(data, chart_type, options, filename, w, h, overwrite) + return IFrame(filename + '.html', w, h) + + def plot(self, data, chart_type, options=None, w=800, h=420): + ''' + output an iframe containing the plot in the notebook without saving + ''' + return HTML( + self.iframe.format( + source=self.render( + data=data, + chart_type=chart_type, + options=options, + head=self.head, + w=w, + h=h), + w=w, + h=h)) + + def save(self, + data, + chart_type, + options=None, + filename='chart', + w=800, + h=420, + overwrite=True): + ''' + save the rendered html to a file in the same directory as the notebook + ''' + html = self.render( + data=data, + chart_type=chart_type, + options=options, + div_id=filename, + head=self.head, + w=w, + h=h) + + if overwrite: + with open(filename.replace(" ", "_") + '.html', 'w') as f: + f.write(html) + else: + if not os.path.exists(filename.replace(" ", "_") + '.html'): + with open(filename.replace(" ", "_") + '.html', 'w') as f: + f.write(html) + else: + raise IOError('File Already Exists!') diff --git a/src/iplotter/export.py b/src/iplotter/export.py new file mode 100644 index 0000000..8c065b7 --- /dev/null +++ b/src/iplotter/export.py @@ -0,0 +1,35 @@ +import time +from selenium import webdriver +import os + + +class VirtualBrowser(object): + """Helper class for converting html charts to png""" + + def __init__(self, driver=webdriver.Chrome): + super(VirtualBrowser, self).__init__() + self.driver = driver() + + def __enter__(self): + return self + + def save_as_png(self, filename, width=300, height=250, render_time=1): + ''' + open saved html file in an virtual browser and save a screen shot to PNG format + ''' + self.driver.set_window_size(width, height) + self.driver.get('file://{path}/{filename}'.format( + path=os.getcwd(), filename=filename + ".html")) + time.sleep(render_time) + self.driver.save_screenshot(filename + ".png") + + def __exit__(self, type, value, traceback): + self.driver.quit() + return True + + def quit(self): + ''' + shutdown virtual browser when finished + ''' + self.driver.quit() + return True \ No newline at end of file diff --git a/src/iplotter/google_plotter.py b/src/iplotter/google_plotter.py new file mode 100644 index 0000000..2dcb163 --- /dev/null +++ b/src/iplotter/google_plotter.py @@ -0,0 +1,124 @@ +from jinja2 import Template +from IPython.display import IFrame, HTML +import os +import json +from .base_plotter import IPlotter + + +class GCPlotter(IPlotter): + """ + Class for creating Google Charts in ipython notebook + """ + head = ''' + + + ''' + + template = ''' +
+ + ''' + + def __init__(self): + super(GCPlotter, self).__init__() + + def render(self, + data, + chart_type, + chart_package='corechart', + options=None, + div_id="chart", + head=""): + ''' + render the data in HTML template + ''' + if not self.is_valid_name(div_id): + raise ValueError( + "Name {} is invalid. Only letters, numbers, '_', and '-' are permitted ".format( + div_id)) + + return Template(head + self.template).render( + div_id=div_id.replace(" ", "_"), + data=json.dumps( + data, indent=4).replace("'", "\\'").replace('"', "'"), + chart_type=chart_type, + chart_package=chart_package, + options=json.dumps( + options, indent=4).replace("'", "\\'").replace('"', "'")) + + def plot_and_save(self, + data, + chart_type, + chart_package='corechart', + options=None, + w=800, + h=420, + filename='chart', + overwrite=True): + ''' + save the rendered html to a file and return an IFrame to display the plot in the notebook + ''' + self.save(data, chart_type, chart_package, options, filename, + overwrite) + return IFrame(filename + '.html', w, h) + + def plot(self, + data, + chart_type, + chart_package='corechart', + options=None, + w=800, + h=420): + ''' + output an iframe containing the plot in the notebook without saving + ''' + return HTML( + self.iframe.format( + source=self.render( + data=data, + options=options, + chart_type=chart_type, + chart_package=chart_package, + head=self.head), + w=w, + h=h)) + + def save(self, + data, + chart_type, + chart_package='corechart', + options=None, + filename='chart', + overwrite=True): + ''' + save the rendered html to a file in the same directory as the notebook + ''' + html = self.render( + data=data, + chart_type=chart_type, + chart_package=chart_package, + options=options, + div_id=filename, + head=self.head) + + if overwrite: + with open(filename.replace(" ", "_") + '.html', 'w') as f: + f.write(html) + else: + if not os.path.exists(filename.replace(" ", "_") + '.html'): + with open(filename.replace(" ", "_") + '.html', 'w') as f: + f.write(html) + else: + raise IOError('File Already Exists!') diff --git a/src/iplotter/plotly_plotter.py b/src/iplotter/plotly_plotter.py new file mode 100644 index 0000000..dec9677 --- /dev/null +++ b/src/iplotter/plotly_plotter.py @@ -0,0 +1,88 @@ +from jinja2 import Template +from IPython.display import IFrame, HTML +import os +import json +from .base_plotter import IPlotter + + +class PlotlyPlotter(IPlotter): + """ + Class for creating plotly.js charts in ipython notebook + """ + + head = ''' + + + + + ''' + + template = ''' +
+ + ''' + + def __init__(self): + super(PlotlyPlotter, self).__init__() + + def render(self, data, layout=None, div_id="chart", head=""): + ''' + render the data in HTML template + ''' + if not self.is_valid_name(div_id): + raise ValueError( + "Name {} is invalid. Only letters, numbers, '_', and '-' are permitted ".format( + div_id)) + + return Template(head + self.template).render( + div_id=div_id.replace(" ", "_"), + data=json.dumps( + data, indent=4).replace("'", "\\'").replace('"', "'"), + layout=json.dumps( + layout, indent=4).replace("'", "\\'").replace('"', "'")) + + def plot_and_save(self, + data, + layout=None, + w=800, + h=420, + filename='chart', + overwrite=True): + ''' + save the rendered html to a file and return an IFrame to display the plot in the notebook + ''' + self.save(data, layout, filename, overwrite) + return IFrame(filename + '.html', w, h) + + def plot(self, data, layout=None, w=800, h=420): + ''' + output an iframe containing the plot in the notebook without saving + ''' + return HTML( + self.iframe.format( + source=self.render( + data=data, + layout=layout, + head=self.head, ), + w=w, + h=h)) + + def save(self, data, layout=None, filename='chart', overwrite=True): + ''' + save the rendered html to a file in the same directory as the notebook + ''' + html = self.render( + data=data, layout=layout, div_id=filename, head=self.head) + + if overwrite: + with open(filename.replace(" ", "_") + '.html', 'w') as f: + f.write(html) + else: + if not os.path.exists(filename.replace(" ", "_") + '.html'): + with open(filename.replace(" ", "_") + '.html', 'w') as f: + f.write(html) + else: + raise IOError('File Already Exists!') From 96cc6abfa5980c9961182381afc16a42bccaeddb Mon Sep 17 00:00:00 2001 From: Walker Waylon Scott Date: Wed, 2 Aug 2017 14:02:21 -0500 Subject: [PATCH 07/10] Updated Enthusiast content from Suresh --- .../Exploratory_Charts-Movie_Data-Latest.html | 8589 +++++++++-------- 1 file changed, 4310 insertions(+), 4279 deletions(-) diff --git a/src/templates/Exploratory_Charts-Movie_Data-Latest.html b/src/templates/Exploratory_Charts-Movie_Data-Latest.html index 70677dd..0f76e19 100644 --- a/src/templates/Exploratory_Charts-Movie_Data-Latest.html +++ b/src/templates/Exploratory_Charts-Movie_Data-Latest.html @@ -1,7 +1,7 @@
-
In [1]:
+
In [3]:
import numpy as np # linear algebra
@@ -15,7 +15,7 @@
 
-
In [2]:
+
In [4]:
from subprocess import check_output
@@ -48,7 +48,7 @@
 
-
In [3]:
+
In [8]:
import os
@@ -67,6 +67,7 @@
 from sklearn import neighbors
 from sklearn import linear_model
 from pandas.core import datetools
+from pandas.core import datetools
 %matplotlib inline
 
@@ -74,29 +75,10 @@
-
-
- - -
- -
- - -
-
C:\Users\alurus\AppData\Local\Continuum\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
-  from pandas.core import datetools
-
-
-
- -
-
-
-
In [20]:
+
In [5]:
f = pd.read_csv("C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw/movie_metadata.csv")
@@ -109,7 +91,7 @@
 
-
In [21]:
+
In [7]:
data=DataFrame(f)
@@ -126,7 +108,7 @@
 
 
-
Out[21]:
+
Out[7]:
@@ -235,7 +217,7 @@
-
In [22]:
+
In [9]:
X_data=data.dtypes[data.dtypes!='object'].index
@@ -253,7 +235,7 @@
 
 
-
Out[22]:
+
Out[9]:
@@ -343,16 +325,24 @@
+
+
+
+
+
+

GETTING Correllation matrix

+
+
-
In [32]:
+
In [10]:
# GETTING Correllation matrix
 corr_mat=X_train.corr(method='pearson')
 plt.figure(figsize=(20,10))
-sns.heatmap(corr_mat,vmax=1,square=True,annot=True,cmap='cubehelix')
+sns.heatmap(corr_mat,vmax=1,square=True,annot=True,cmap='Oranges');
 
@@ -363,19 +353,6 @@
-
- -
Out[32]:
- - - - -
-
<matplotlib.axes._subplots.AxesSubplot at 0x106d77f0>
-
- -
-
@@ -385,2470 +362,2439 @@
@@ -2861,39 +2807,7 @@ uhbmo2gAAAAASUVORK5CYII=
-
In [19]:
-
-
-
!jupyter nbconvert Exploratory_Charts-Movie_Data-Copy2.ipynb --template basic
-
- -
-
-
- -
-
- - -
- -
- - -
-
[NbConvertApp] Converting notebook Exploratory_Charts-Movie_Data-Copy2.ipynb to html
-[NbConvertApp] Writing 206376 bytes to Exploratory_Charts-Movie_Data-Copy2.html
-
-
-
- -
-
- -
-
-
-
In [5]:
+
In [11]:
df = pd.read_csv('C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw/movie_metadata.csv')
@@ -2906,7 +2820,7 @@ uhbmo2gAAAAASUVORK5CYII=
 
-
In [6]:
+
In [12]:
df.head()
@@ -2922,7 +2836,7 @@ uhbmo2gAAAAASUVORK5CYII=
 
 
-
Out[6]:
+
Out[12]:
@@ -3100,10 +3014,18 @@ uhbmo2gAAAAASUVORK5CYII=
+
+
+
+
+
+

Director Vs Share by Year

+
+
-
In [7]:
+
In [13]:
df['diff_gross'] = df['gross'] - df['budget']
@@ -3118,9 +3040,9 @@ uhbmo2gAAAAASUVORK5CYII=
 
 
 fig,ax = plt.subplots(figsize=(8,6))
-sns.heatmap(director_budge_pivot['diff_gross'],vmin=0,annot=False,linewidth=.5,ax=ax,cmap='PuBu')
-plt.title('Director vs Year and diff_gross')
-plt.ylabel('Year')
+sns.heatmap(director_budge_pivot['diff_gross'],vmin=0,annot=False,linewidth=.5,ax=ax,cmap='Oranges')
+plt.title('Director vs Year and Share')
+plt.ylabel('Year');
 
@@ -3131,19 +3053,6 @@ uhbmo2gAAAAASUVORK5CYII=
-
- -
Out[7]:
- - - - -
-
<matplotlib.text.Text at 0xd419a58>
-
- -
-
@@ -3153,582 +3062,570 @@ uhbmo2gAAAAASUVORK5CYII=
@@ -3741,7 +3638,7 @@ OJQeAAAAAElFTkSuQmCC
-
In [8]:
+
In [14]:
data = pd.read_csv("C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw/movie_metadata.csv")
@@ -3751,15 +3648,23 @@ OJQeAAAAAElFTkSuQmCC
 
+
+
+
+
+
+

IMDB Score In Histogram

+
+
-
In [40]:
+
In [32]:
-
matplotlib.rcParams['figure.figsize'] = (9.0, 5.0)
+
matplotlib.rcParams['figure.figsize'] = (18, 9.0)
 scores = pd.DataFrame({"imdb score":data["imdb_score"]})
-scores.hist(bins=20)
+scores.hist(bins=20);
 
@@ -3770,19 +3675,6 @@ OJQeAAAAAElFTkSuQmCC
-
- -
Out[40]:
- - - - -
-
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x0000000012C971D0>]], dtype=object)
-
- -
-
@@ -3791,140 +3683,183 @@ OJQeAAAAAElFTkSuQmCC
-
@@ -3934,10 +3869,18 @@ TkSuQmCC
+
+
+
+
+
+

IMDB Score By Count Plot

+
+
-
In [9]:
+
In [26]:
plt.figure(figsize = (18, 9))
@@ -4191,13 +4134,21 @@ n0ty44m7n1Br/WKLfgCA5fNRDgAAAKAZH+UAAAAAmhFMAAAAAM0IJgAAAIBmBBMAAABAM/8fUHwp
 
+
+
+
+
+
+

Count by Content Rating - Count Plot

+
+
-
In [10]:
+
In [21]:
-
plt.figure(figsize = (12, 9))
+
plt.figure(figsize = (11, 4))
 sns.countplot(x = 'content_rating', data = data)
 xt = plt.xticks(rotation=56)
 
@@ -4218,293 +4169,257 @@ n0ty44m7n1Br/WKLfgCA5fNRDgAAAKAZH+UAAAAAmhFMAAAAAM0IJgAAAIBmBBMAAABAM/8fUHwp
- -
-
In [ ]:
+
+
-
-
 
-
- +
+

Content Rating Vs IBDB Score - Box Plot

-
-
@@ -4886,14 +4796,23 @@ RKRo/x9J7Ry013cL/gAAAABJRU5ErkJggg==
+
+
+
+
+
+

Content Rating Vs IBDB Score - Violin Plot

+
+
-
In [14]:
+
In [33]:
plt.figure(figsize = (11, 4))
 sns.violinplot('content_rating', 'imdb_score', data = data)
+xt = plt.xticks(rotation=45)
 
@@ -4904,19 +4823,6 @@ RKRo/x9J7Ry013cL/gAAAABJRU5ErkJggg==
-
- -
Out[14]:
- - - - -
-
<matplotlib.axes._subplots.AxesSubplot at 0xd1f2828>
-
- -
-
@@ -4925,687 +4831,767 @@ RKRo/x9J7Ry013cL/gAAAABJRU5ErkJggg==
-
@@ -5615,6 +5601,51 @@ KjQVRVEURVGUQPj/AQny1vTwN90tAAAAAElFTkSuQmCC
+
+
+
+
In [39]:
+
+
+
!jupyter nbconvert Exploratory_Charts-Movie_Data-Latest_a.ipynb --template basic
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[NbConvertApp] Converting notebook Exploratory_Charts-Movie_Data-Latest_a.ipynb to html
+[NbConvertApp] Writing 397209 bytes to Exploratory_Charts-Movie_Data-Latest_a.html
+
+
+
+ +
+
+ +
+
+
+
In [ ]:
+
+
+
 
+
+ +
+
+
+
From 2e9cdef714a4b13271a60d2f73df5ec0b8390639 Mon Sep 17 00:00:00 2001 From: Walker Waylon Scott Date: Sat, 5 Aug 2017 10:47:05 -0500 Subject: [PATCH 08/10] updated presentation --- src/templates/index.md | 32 +++++++++++++++++++--- src/templates/slides.md | 60 +++++++++++++++++++---------------------- 2 files changed, 57 insertions(+), 35 deletions(-) diff --git a/src/templates/index.md b/src/templates/index.md index a6b5098..d3beb08 100644 --- a/src/templates/index.md +++ b/src/templates/index.md @@ -29,23 +29,50 @@ app = Flask(__name__) @app.route('/') def index(): + """ + Index page built by converting index.md to html, then inserting that into the index.html template + + go to your base_url + / to view this page + """ body = markdown.markdown(render_template('index.md'), extensions=['markdown.extensions.fenced_code']) return render_template('index.html', body=body) if __name__ == '__main__': + # if this is ran as the main program run the app + # if the program is imported it will not run allowing us to reuse some components in other projects easily. app.run() - ``` +#### Javascript + +For the Enthusiast page we need to implement some javascript in order to update the page dynamically without a page refresh We need to implement a bit of javascript that will talk to our api and change the information on the page. Currently javascript is the only client side scripting language for the browser. There are a number of languages that can be transpiled into browser ready javascript, but javascript is the only language that run in the browser. + +I believe that modern (ES6) is a much better language. It looks much closer to python than older javascript. Currently ES6 does not run in many browsers natively and needs to be compiled down to browser ready javascript. I am trying to keep this project very simple, and did not want a complicated build tool chain. If you are using javascript more often, or have a more complicated use case do yourself a favor and look into setting up build tools that compile ES6 to browser ready javascript. For this reason I have chosen to use jquery. It provides a simple interface into the features that I need, and runs natively in the browser. + +```javascript +$('#top').change(function(){update_kpi()}) +// ties the update function to the top div, and triggers an update any time there is a change in the top div that contains our form data + +function update_kpi() +{ + var url = '/api/score_timeseries?' + $('top').val() + var kpi_data = $.get(url); // gets the json response data from our python api + kpi_data.done(function(results) // waits for the response to come back from python + { + $('#gross').html(results.responseJSON.gross) // updates the gross kpi that sits in the gross div + }) +} +``` + ### [Slides](/slides) Since we are serving up a web app we can embrace the power and flexibility that this gives us. The slides for this event will be served along side of the visualization. This will make these slides available anywhere you have a connection to the web. These slides were written in markdown, and are very simple to write.

#### Example Markdown Code for Slides +This file wil be placed into a slides.html template that has been setup to render reveal slides from markdown. The reveal.js web page has examples of how to setup the html. It will render typical markdown code as html, and will create a new slide at every ```----``` and a new fragment at every ```---``` ``` markdown - # pyDataVizDay *a python implementation for Data Viz Day* @@ -62,7 +89,6 @@ Since we are serving up a web app we can embrace the power and flexibility that * Tools Used * Other Considerations * Pros/Cons - ``` ### [api](/api/doc/#/pyDataVizDay) diff --git a/src/templates/slides.md b/src/templates/slides.md index 55b1517..d8ae9b8 100644 --- a/src/templates/slides.md +++ b/src/templates/slides.md @@ -1,4 +1,5 @@ # pyDataVizDay +--- *a python implementation for Data Viz Day* ![python](https://s3.amazonaws.com/files.dezyre.com/images/blog/Python+for+Data+Science+vs.+Python+for+Web+Development/Python+for+Data+Science+vs+Web+Devlopment.png) @@ -6,10 +7,9 @@ ---- # Agenda - -1. Viz Walk (3 Views) +--- +1. Viz Walk (2 Views) 1. Full Web App - * Simple Web App * Exploritory Notebook * Tools Used * Other Considerations @@ -17,8 +17,8 @@ ---- -## About Me - +# About Me +--- ![profile](/static/profile_photo_sm.jpg) Waylon Walker @@ -28,7 +28,7 @@ Product Engineering ---- # Open The Viz - +--- [pydatavizday.herokuapp.com](pydatavizday.herokuapp.com) --- @@ -39,7 +39,7 @@ Product Engineering ---- -## External Resources +# External Resources --- @@ -90,10 +90,12 @@ Python ---- ## Other Considerations - +--- * Jupyter Notebooks * Jupyter Dashboards * DASH (just released in mid JUNE) +* Bokeh +* Data shader --- @@ -125,7 +127,7 @@ Python ---- ## Pros of Python - +--- * Fast High Level Data Science * reusable * Powerful Web stack @@ -142,20 +144,17 @@ python has a vast ecosysytem for data wrangling ``` python import pandas as pd -import glob, os -path = "C:/reports" -files = glob.glob(path + os.sep + '*_report.csv*') +raw_example = pd.read_csv('example_data.csv') -frames = [] -for file in files: - frames.append(pd.read_csv(file)) - -all_reports = (pd.concat(frames) - .dropna() - .query('DIVISION == ACCOUNTING') - ) +example = (raw_example + .groupby(['Date']) + .sum() + .resample('m') + .fillna(0) + ) +example.plot() ``` --- @@ -176,19 +175,23 @@ data.update() ### Testing -The ability to easily reuse code/datasets/plot gives us the ability to spend time making large projects more . - +Well written tests give us the confidence to push to production without manually spending our own time testing each feature of our end product. ``` python +import unittest +import etl + class Testdata(unittest.TestCase): """ Test suite for my dataframe data """ + def setUp(self): + self.data = etl.Data() + important_cols = ['DATE', 'PRODUCT', 'QTY'] def test_cols(self): for col in important_cols: - self.assertLess(len(data[data[col].isnull()]), 0, msg=f'column {col} has unexpected null values') - self.assertIn(col, data.columns.tolist(), msg=f'column {col} is missing - check the /data/raw/shipments.csv file to ensure logistics has not changed the data format') + self.assertIn(col, sdata.columns.tolist()) ``` @@ -227,10 +230,9 @@ else: ---- ## Cons on python - +--- * Code * Interactivity -* Speed * ML research --- @@ -244,12 +246,6 @@ else: --- -### Slow - -*slow runtime compared to statically typed languages (c, java)* - ---- - ### Latest ML aglorithms are typically developed in R --- From 5b6a4d52ef84e83e561ad6ab2149ce8f85d7f63d Mon Sep 17 00:00:00 2001 From: Walker Waylon Scott Date: Mon, 7 Aug 2017 12:06:04 -0500 Subject: [PATCH 09/10] updated Suresh's page Signed-off-by: Walker Waylon Scott --- .../Exploratory_Charts-Movie_Data-Latest.html | 6414 +++++++++-------- 1 file changed, 3365 insertions(+), 3049 deletions(-) diff --git a/src/templates/Exploratory_Charts-Movie_Data-Latest.html b/src/templates/Exploratory_Charts-Movie_Data-Latest.html index 0f76e19..9600691 100644 --- a/src/templates/Exploratory_Charts-Movie_Data-Latest.html +++ b/src/templates/Exploratory_Charts-Movie_Data-Latest.html @@ -1,7 +1,7 @@
-
In [3]:
+
In [2]:
import numpy as np # linear algebra
@@ -15,7 +15,7 @@
 
-
In [4]:
+
In [12]:
from subprocess import check_output
@@ -48,7 +48,7 @@
 
-
In [8]:
+
In [14]:
import os
@@ -67,7 +67,6 @@
 from sklearn import neighbors
 from sklearn import linear_model
 from pandas.core import datetools
-from pandas.core import datetools
 %matplotlib inline
 
@@ -75,40 +74,59 @@
-
-
-
-
In [5]:
-
-
-
f = pd.read_csv("C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw/movie_metadata.csv")
-
- -
-
-
- -
-
-
-
In [7]:
-
-
-
data=DataFrame(f)
-data.head()[:2]
-
- -
-
-
-
-
Out[7]:
+
+ + +
+
C:\Users\alurus\AppData\Local\Continuum\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
+  from pandas.core import datetools
+
+
+
+ +
+
+ +
+
+
+
In [7]:
+
+
+
f = pd.read_csv("C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw/movie_metadata.csv")
+
+ +
+
+
+ +
+
+
+
In [14]:
+
+
+
data=DataFrame(f)
+data.head()[:5]
+
+ +
+
+
+ +
+
+ + +
+ +
Out[14]:
@@ -203,9 +221,81 @@ 2.35 0 + + 2 + Color + Sam Mendes + 602.0 + 148.0 + 0.0 + 161.0 + Rory Kinnear + 11000.0 + 200074175.0 + Action|Adventure|Thriller + ... + 994.0 + English + UK + PG-13 + 245000000.0 + 2015.0 + 393.0 + 6.8 + 2.35 + 85000 + + + 3 + Color + Christopher Nolan + 813.0 + 164.0 + 22000.0 + 23000.0 + Christian Bale + 27000.0 + 448130642.0 + Action|Thriller + ... + 2701.0 + English + USA + PG-13 + 250000000.0 + 2012.0 + 23000.0 + 8.5 + 2.35 + 164000 + + + 4 + NaN + Doug Walker + NaN + NaN + 131.0 + NaN + Rob Walker + 131.0 + NaN + Documentary + ... + NaN + NaN + NaN + NaN + NaN + NaN + 12.0 + 7.1 + NaN + 0 + -

2 rows × 28 columns

+

5 rows × 28 columns

@@ -217,11 +307,23 @@
-
In [9]:
+
In [10]:
X_data=data.dtypes[data.dtypes!='object'].index
-X_train=data[X_data]
+
+ +
+
+
+ +
+
+
+
In [11]:
+
+
+
X_train=data[X_data]
 X_train.head()[:2] 
 
@@ -235,7 +337,7 @@
-
Out[9]:
+
Out[11]:
@@ -336,12 +438,12 @@
-
In [10]:
+
In [13]:
# GETTING Correllation matrix
 corr_mat=X_train.corr(method='pearson')
-plt.figure(figsize=(20,10))
+plt.figure(figsize=(23,10))
 sns.heatmap(corr_mat,vmax=1,square=True,annot=True,cmap='Oranges');
 
@@ -361,2440 +463,2441 @@
-
-
In [11]:
+
In [33]:
-
df = pd.read_csv('C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw/movie_metadata.csv')
+
df = pd.read_csv('C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay/data/raw/movie_metadata.csv')
 
@@ -2820,7 +2923,7 @@ jRB/YHtFyzwREbF+SYckIiIiIiKayRqSiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIho
-
In [12]:
+
In [34]:
df.head()
@@ -2836,7 +2939,7 @@ jRB/YHtFyzwREbF+SYckIiIiIiKayRqSiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIho
 
 
-
Out[12]:
+
Out[34]:
@@ -3025,7 +3128,7 @@ jRB/YHtFyzwREbF+SYckIiIiIiKayRqSiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIho
-
In [13]:
+
In [29]:
df['diff_gross'] = df['gross'] - df['budget']
@@ -3039,7 +3142,7 @@ jRB/YHtFyzwREbF+SYckIiIiIiKayRqSiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIho
                                       aggfunc='sum')
 
 
-fig,ax = plt.subplots(figsize=(8,6))
+fig,ax = plt.subplots(figsize=(14,8))
 sns.heatmap(director_budge_pivot['diff_gross'],vmin=0,annot=False,linewidth=.5,ax=ax,cmap='Oranges')
 plt.title('Director vs Year and Share')
 plt.ylabel('Year');
@@ -3061,571 +3164,771 @@ jRB/YHtFyzwREbF+SYckIiIiIiKayRqSiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIhoJh2SiIiIiIho
 
 
 
-
@@ -3638,7 +3941,7 @@ QqFQKIxTyiBdKBQKhcI4pQzShUKhUCiMU/4/DsC3OtXvG94AAAAASUVORK5CYII=
-
In [14]:
+
In [18]:
data = pd.read_csv("C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay2/data/raw/movie_metadata.csv")
@@ -3659,7 +3962,7 @@ QqFQKIxTyiBdKBQKhcI4pQzShUKhUCiMU/4/DsC3OtXvG94AAAAASUVORK5CYII=
 
-
In [32]:
+
In [50]:
matplotlib.rcParams['figure.figsize'] = (18, 9.0)
@@ -5604,7 +5907,7 @@ VcKBppmZmZlV4j9rH7MwnBa6QwAAAABJRU5ErkJggg==
 
-
In [39]:
+
In [45]:
!jupyter nbconvert Exploratory_Charts-Movie_Data-Latest_a.ipynb --template basic
@@ -5625,7 +5928,7 @@ VcKBppmZmZlV4j9rH7MwnBa6QwAAAABJRU5ErkJggg==
 
 
[NbConvertApp] Converting notebook Exploratory_Charts-Movie_Data-Latest_a.ipynb to html
-[NbConvertApp] Writing 397209 bytes to Exploratory_Charts-Movie_Data-Latest_a.html
+[NbConvertApp] Writing 412667 bytes to Exploratory_Charts-Movie_Data-Latest_a.html
 
@@ -5636,10 +5939,23 @@ VcKBppmZmZlV4j9rH7MwnBa6QwAAAABJRU5ErkJggg==
-
In [ ]:
+
In [11]:
-
 
+
Pf = pd.read_csv('C:/Users/alurus/GIT-Repository/VIZ Day/pyDataVizDay/data/raw/movie_metadata.csv')
+
+ +
+
+
+ +
+
+
+
In [15]:
+
+
+
Pf.head([:5];
 
From 3441570a5adf7322d4c1ad74ecc2ac73dc306d46 Mon Sep 17 00:00:00 2001 From: Walker Waylon Scott Date: Tue, 8 Aug 2017 11:11:50 -0500 Subject: [PATCH 10/10] removed blur on images. --- src/static/css/custom.css | 6 ------ src/templates/index.html | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/static/css/custom.css b/src/static/css/custom.css index 516d831..6513687 100644 --- a/src/static/css/custom.css +++ b/src/static/css/custom.css @@ -46,12 +46,6 @@ pre { } -img.blur { - width:367; - height:459px; - -webkit-filter: blur(10px); - filter: blur(10px); -} /*@keyframes fadein{ 0% { opacity:0; } diff --git a/src/templates/index.html b/src/templates/index.html index 7e0c507..7b617e1 100644 --- a/src/templates/index.html +++ b/src/templates/index.html @@ -7,7 +7,7 @@ - + {% endblock %}