implemented keyword api
This commit is contained in:
parent
741faf194d
commit
c8d14fd6cc
4 changed files with 104 additions and 18 deletions
|
|
@ -74,4 +74,6 @@ This is the wireframe that the team has been given to replicate in python using
|
|||
* define update method
|
||||
* docstrings
|
||||
* add keyword/genre to load method
|
||||
* **1 hr** implemented data.filter() method
|
||||
* **30 min** api parser/docs/keywords
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
flask
|
||||
flask-restplus
|
||||
gunicorn
|
||||
pandas
|
||||
-e git+https://github.com/WaylonWalker/iplotter.git@master#egg=iplotter
|
||||
63
src/etl.py
63
src/etl.py
|
|
@ -14,6 +14,10 @@ class Data(object):
|
|||
"""
|
||||
if data == None:
|
||||
self.load()
|
||||
else:
|
||||
self.movie = data.movie
|
||||
self.genre = data.genre
|
||||
self.keyword = data.keyword
|
||||
|
||||
def __str__(self):
|
||||
|
||||
|
|
@ -58,6 +62,65 @@ class Data(object):
|
|||
keyword = generate_keyword(movie)
|
||||
keyword.to_pickle(os.path.join(settings.processed_data_dir, 'keyword.pkl'))
|
||||
|
||||
def filter(self, start_year=None, end_year=None,
|
||||
genre=None, country=None, language=None,
|
||||
top=None, title=None, color=None):
|
||||
|
||||
"""
|
||||
Efficiently filters
|
||||
|
||||
"""
|
||||
data = Data(self)
|
||||
|
||||
if start_year:
|
||||
start_year_mask = data.movie.title_year > f'{str(int(start_year)-1)}-01-01'
|
||||
else:
|
||||
start_year_mask = True
|
||||
if end_year:
|
||||
end_year_mask = data.movie.title_year <= f'{str(end_year)}-01-01'
|
||||
else:
|
||||
end_year_mask = True
|
||||
|
||||
if genre:
|
||||
genre_indexes = data.genre[data.genre.genres == genre]['index'].values
|
||||
genre_mask = data.movie.index.isin(genre_indexes)
|
||||
else:
|
||||
genre_mask = True
|
||||
|
||||
if country:
|
||||
country_mask = data.movie.country == country
|
||||
else:
|
||||
country_mask = True
|
||||
|
||||
if language:
|
||||
language_mask = data.movie.language == language
|
||||
else:
|
||||
language_mask = True
|
||||
|
||||
if title:
|
||||
title_mask = data.movie.movie_title == title
|
||||
else:
|
||||
title_mask = True
|
||||
|
||||
if color:
|
||||
color_mask = data.movie.color == color
|
||||
else:
|
||||
color_mask = True
|
||||
masks = genre_mask & start_year_mask & end_year_mask & country_mask & language_mask & title_mask & color_mask
|
||||
|
||||
try:
|
||||
len(masks)
|
||||
except TypeError: # object type 'bool' has no len() i.e. not a list
|
||||
masks = [True]*len(data.movie)
|
||||
|
||||
data.movie = data.movie[masks].sort_values('imdb_score', ascending=False)
|
||||
if top:
|
||||
data.movie = data.movie.head(int(top))
|
||||
data.genre = data.genre[data.genre['index'].isin(data.movie.index.values.tolist())]
|
||||
data.keyword = data.keyword[data.keyword['index'].isin(data.movie.index.values.tolist())]
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def generate_genre(movie):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -9,9 +9,11 @@ IMDB 5000 Movie Dataset.
|
|||
import os
|
||||
import io
|
||||
import base64 as b64
|
||||
from collections import Counter
|
||||
|
||||
from flask import Flask
|
||||
from flask import request, render_template, make_response, jsonify
|
||||
from flask import request, render_template, make_response, jsonify, Blueprint, url_for
|
||||
from flask_restplus import Resource, Api, fields, reqparse
|
||||
import settings
|
||||
import etl
|
||||
import palettes as pal
|
||||
|
|
@ -19,26 +21,25 @@ import palettes as pal
|
|||
from iplotter import C3Plotter
|
||||
c3 = C3Plotter()
|
||||
|
||||
def fig_to_html(fig):
|
||||
"""
|
||||
converts a matplotlib figure into an html image
|
||||
app = Flask(__name__)
|
||||
api_blueprint = Blueprint('api', __name__, url_prefix='/api')
|
||||
api = Api(api_blueprint, title='pyDataVizday api',
|
||||
description='This api is used for the pyDataVizDay visualization',
|
||||
doc='/doc/')
|
||||
app.register_blueprint(api_blueprint)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('start_year', help='start date for data', required=False)
|
||||
parser.add_argument('end_year', help='end date for data', required=False)
|
||||
parser.add_argument('genre', help='movie genre', required=False)
|
||||
parser.add_argument('country', help='geographical country location', required=False)
|
||||
parser.add_argument('language', help='language of the movie (ex. english)', required=False)
|
||||
parser.add_argument('top', help='top n titles by imdb rating', required=False)
|
||||
parser.add_argument('title', help='title of the movie', required=False)
|
||||
parser.add_argument('color', help='"Color" or "Black and White"', required=False)
|
||||
|
||||
:param fig: matplotlibe figure object
|
||||
:returns: STR html string
|
||||
"""
|
||||
buf = io.BytesIO()
|
||||
fig.savefig(buf, format='png')
|
||||
img = ('<img src="data:image/png;base64,{}">'
|
||||
.format(b64.b64encode(buf.getvalue()))
|
||||
.replace("b'",'')
|
||||
.replace("'",''))
|
||||
return img
|
||||
|
||||
data = etl.Data()
|
||||
data.load()
|
||||
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
|
|
@ -71,6 +72,25 @@ def slides():
|
|||
slide_body = render_template('slide_body.html')
|
||||
return render_template('slides.html', body=slide_body)
|
||||
|
||||
@api.route('/keywords')
|
||||
@api.expect(parser)
|
||||
class keywords(Resource):
|
||||
def get(self):
|
||||
args = parser.parse_args()
|
||||
keyword_data = data.filter(start_year=args['start_year'],
|
||||
end_year=args['end_year'],
|
||||
genre=args['genre'],
|
||||
country=args['country'],
|
||||
language=args['language'],
|
||||
top=args['top'],
|
||||
title=args['title'],
|
||||
color=args['color']
|
||||
)
|
||||
c = Counter(keyword_data.keyword.plot_keywords.values.tolist())
|
||||
words = [{'text': word[0], 'weight': word[1]} for word in c.most_common(50)]
|
||||
|
||||
return jsonify(words)
|
||||
|
||||
if __name__ == '__main__':
|
||||
port = int(os.environ.get("PORT", 5000))
|
||||
app.run(host='0.0.0.0', port=port, debug=True)
|
||||
Loading…
Add table
Add a link
Reference in a new issue