implemented keyword api

This commit is contained in:
walkews 2017-07-11 08:26:35 -05:00
parent 741faf194d
commit c8d14fd6cc
4 changed files with 104 additions and 18 deletions

View file

@ -74,4 +74,6 @@ This is the wireframe that the team has been given to replicate in python using
* define update method
* docstrings
* add keyword/genre to load method
* **1 hr** implemented data.filter() method
* **30 min** api parser/docs/keywords

View file

@ -1,4 +1,5 @@
flask
flask-restplus
gunicorn
pandas
-e git+https://github.com/WaylonWalker/iplotter.git@master#egg=iplotter

View file

@ -14,6 +14,10 @@ class Data(object):
"""
if data == None:
self.load()
else:
self.movie = data.movie
self.genre = data.genre
self.keyword = data.keyword
def __str__(self):
@ -58,6 +62,65 @@ class Data(object):
keyword = generate_keyword(movie)
keyword.to_pickle(os.path.join(settings.processed_data_dir, 'keyword.pkl'))
def filter(self, start_year=None, end_year=None,
genre=None, country=None, language=None,
top=None, title=None, color=None):
"""
Efficiently filters
"""
data = Data(self)
if start_year:
start_year_mask = data.movie.title_year > f'{str(int(start_year)-1)}-01-01'
else:
start_year_mask = True
if end_year:
end_year_mask = data.movie.title_year <= f'{str(end_year)}-01-01'
else:
end_year_mask = True
if genre:
genre_indexes = data.genre[data.genre.genres == genre]['index'].values
genre_mask = data.movie.index.isin(genre_indexes)
else:
genre_mask = True
if country:
country_mask = data.movie.country == country
else:
country_mask = True
if language:
language_mask = data.movie.language == language
else:
language_mask = True
if title:
title_mask = data.movie.movie_title == title
else:
title_mask = True
if color:
color_mask = data.movie.color == color
else:
color_mask = True
masks = genre_mask & start_year_mask & end_year_mask & country_mask & language_mask & title_mask & color_mask
try:
len(masks)
except TypeError: # object type 'bool' has no len() i.e. not a list
masks = [True]*len(data.movie)
data.movie = data.movie[masks].sort_values('imdb_score', ascending=False)
if top:
data.movie = data.movie.head(int(top))
data.genre = data.genre[data.genre['index'].isin(data.movie.index.values.tolist())]
data.keyword = data.keyword[data.keyword['index'].isin(data.movie.index.values.tolist())]
return data
def generate_genre(movie):
"""

View file

@ -9,9 +9,11 @@ IMDB 5000 Movie Dataset.
import os
import io
import base64 as b64
from collections import Counter
from flask import Flask
from flask import request, render_template, make_response, jsonify
from flask import request, render_template, make_response, jsonify, Blueprint, url_for
from flask_restplus import Resource, Api, fields, reqparse
import settings
import etl
import palettes as pal
@ -19,26 +21,25 @@ import palettes as pal
from iplotter import C3Plotter
c3 = C3Plotter()
def fig_to_html(fig):
"""
converts a matplotlib figure into an html image
app = Flask(__name__)
api_blueprint = Blueprint('api', __name__, url_prefix='/api')
api = Api(api_blueprint, title='pyDataVizday api',
description='This api is used for the pyDataVizDay visualization',
doc='/doc/')
app.register_blueprint(api_blueprint)
parser = reqparse.RequestParser()
parser.add_argument('start_year', help='start date for data', required=False)
parser.add_argument('end_year', help='end date for data', required=False)
parser.add_argument('genre', help='movie genre', required=False)
parser.add_argument('country', help='geographical country location', required=False)
parser.add_argument('language', help='language of the movie (ex. english)', required=False)
parser.add_argument('top', help='top n titles by imdb rating', required=False)
parser.add_argument('title', help='title of the movie', required=False)
parser.add_argument('color', help='"Color" or "Black and White"', required=False)
:param fig: matplotlibe figure object
:returns: STR html string
"""
buf = io.BytesIO()
fig.savefig(buf, format='png')
img = ('<img src="data:image/png;base64,{}">'
.format(b64.b64encode(buf.getvalue()))
.replace("b'",'')
.replace("'",''))
return img
data = etl.Data()
data.load()
app = Flask(__name__)
@app.route('/')
def index():
@ -71,6 +72,25 @@ def slides():
slide_body = render_template('slide_body.html')
return render_template('slides.html', body=slide_body)
@api.route('/keywords')
@api.expect(parser)
class keywords(Resource):
def get(self):
args = parser.parse_args()
keyword_data = data.filter(start_year=args['start_year'],
end_year=args['end_year'],
genre=args['genre'],
country=args['country'],
language=args['language'],
top=args['top'],
title=args['title'],
color=args['color']
)
c = Counter(keyword_data.keyword.plot_keywords.values.tolist())
words = [{'text': word[0], 'weight': word[1]} for word in c.most_common(50)]
return jsonify(words)
if __name__ == '__main__':
port = int(os.environ.get("PORT", 5000))
app.run(host='0.0.0.0', port=port, debug=True)