implemented keyword api

This commit is contained in:
walkews 2017-07-11 08:26:35 -05:00
parent 741faf194d
commit c8d14fd6cc
4 changed files with 104 additions and 18 deletions

View file

@ -74,4 +74,6 @@ This is the wireframe that the team has been given to replicate in python using
* define update method * define update method
* docstrings * docstrings
* add keyword/genre to load method * add keyword/genre to load method
* **1 hr** implemented data.filter() method
* **30 min** api parser/docs/keywords

View file

@ -1,4 +1,5 @@
flask flask
flask-restplus
gunicorn gunicorn
pandas pandas
-e git+https://github.com/WaylonWalker/iplotter.git@master#egg=iplotter -e git+https://github.com/WaylonWalker/iplotter.git@master#egg=iplotter

View file

@ -14,6 +14,10 @@ class Data(object):
""" """
if data == None: if data == None:
self.load() self.load()
else:
self.movie = data.movie
self.genre = data.genre
self.keyword = data.keyword
def __str__(self): def __str__(self):
@ -58,6 +62,65 @@ class Data(object):
keyword = generate_keyword(movie) keyword = generate_keyword(movie)
keyword.to_pickle(os.path.join(settings.processed_data_dir, 'keyword.pkl')) keyword.to_pickle(os.path.join(settings.processed_data_dir, 'keyword.pkl'))
def filter(self, start_year=None, end_year=None,
genre=None, country=None, language=None,
top=None, title=None, color=None):
"""
Efficiently filters
"""
data = Data(self)
if start_year:
start_year_mask = data.movie.title_year > f'{str(int(start_year)-1)}-01-01'
else:
start_year_mask = True
if end_year:
end_year_mask = data.movie.title_year <= f'{str(end_year)}-01-01'
else:
end_year_mask = True
if genre:
genre_indexes = data.genre[data.genre.genres == genre]['index'].values
genre_mask = data.movie.index.isin(genre_indexes)
else:
genre_mask = True
if country:
country_mask = data.movie.country == country
else:
country_mask = True
if language:
language_mask = data.movie.language == language
else:
language_mask = True
if title:
title_mask = data.movie.movie_title == title
else:
title_mask = True
if color:
color_mask = data.movie.color == color
else:
color_mask = True
masks = genre_mask & start_year_mask & end_year_mask & country_mask & language_mask & title_mask & color_mask
try:
len(masks)
except TypeError: # object type 'bool' has no len() i.e. not a list
masks = [True]*len(data.movie)
data.movie = data.movie[masks].sort_values('imdb_score', ascending=False)
if top:
data.movie = data.movie.head(int(top))
data.genre = data.genre[data.genre['index'].isin(data.movie.index.values.tolist())]
data.keyword = data.keyword[data.keyword['index'].isin(data.movie.index.values.tolist())]
return data
def generate_genre(movie): def generate_genre(movie):
""" """

View file

@ -9,9 +9,11 @@ IMDB 5000 Movie Dataset.
import os import os
import io import io
import base64 as b64 import base64 as b64
from collections import Counter
from flask import Flask from flask import Flask
from flask import request, render_template, make_response, jsonify from flask import request, render_template, make_response, jsonify, Blueprint, url_for
from flask_restplus import Resource, Api, fields, reqparse
import settings import settings
import etl import etl
import palettes as pal import palettes as pal
@ -19,26 +21,25 @@ import palettes as pal
from iplotter import C3Plotter from iplotter import C3Plotter
c3 = C3Plotter() c3 = C3Plotter()
def fig_to_html(fig): app = Flask(__name__)
""" api_blueprint = Blueprint('api', __name__, url_prefix='/api')
converts a matplotlib figure into an html image api = Api(api_blueprint, title='pyDataVizday api',
description='This api is used for the pyDataVizDay visualization',
doc='/doc/')
app.register_blueprint(api_blueprint)
parser = reqparse.RequestParser()
parser.add_argument('start_year', help='start date for data', required=False)
parser.add_argument('end_year', help='end date for data', required=False)
parser.add_argument('genre', help='movie genre', required=False)
parser.add_argument('country', help='geographical country location', required=False)
parser.add_argument('language', help='language of the movie (ex. english)', required=False)
parser.add_argument('top', help='top n titles by imdb rating', required=False)
parser.add_argument('title', help='title of the movie', required=False)
parser.add_argument('color', help='"Color" or "Black and White"', required=False)
:param fig: matplotlibe figure object
:returns: STR html string
"""
buf = io.BytesIO()
fig.savefig(buf, format='png')
img = ('<img src="data:image/png;base64,{}">'
.format(b64.b64encode(buf.getvalue()))
.replace("b'",'')
.replace("'",''))
return img
data = etl.Data() data = etl.Data()
data.load()
app = Flask(__name__)
@app.route('/') @app.route('/')
def index(): def index():
@ -71,6 +72,25 @@ def slides():
slide_body = render_template('slide_body.html') slide_body = render_template('slide_body.html')
return render_template('slides.html', body=slide_body) return render_template('slides.html', body=slide_body)
@api.route('/keywords')
@api.expect(parser)
class keywords(Resource):
def get(self):
args = parser.parse_args()
keyword_data = data.filter(start_year=args['start_year'],
end_year=args['end_year'],
genre=args['genre'],
country=args['country'],
language=args['language'],
top=args['top'],
title=args['title'],
color=args['color']
)
c = Counter(keyword_data.keyword.plot_keywords.values.tolist())
words = [{'text': word[0], 'weight': word[1]} for word in c.most_common(50)]
return jsonify(words)
if __name__ == '__main__': if __name__ == '__main__':
port = int(os.environ.get("PORT", 5000)) port = int(os.environ.get("PORT", 5000))
app.run(host='0.0.0.0', port=port, debug=True) app.run(host='0.0.0.0', port=port, debug=True)