feat(etl) Initial etl commit

Added etl to load in data.  Extra data features may be added later as they are identified.
This commit is contained in:
Walker Waylon Scott 2017-06-26 19:31:49 -05:00
parent 8774e7a9ec
commit 2b58d707f5

35
src/etl.py Normal file
View file

@ -0,0 +1,35 @@
import os
import pandas as pd
import settings
class Data(object):
"""
A data object for loading, updating, cleaning, and holding data.
"""
def __init__(self, data=None):
"""
loads data on creation if no data is provided
"""
if data == None:
self.load()
def __str__(self):
value = ''
for key in self.__dict__.keys():
if isinstance(self.__dict__[key], pd.DataFrame):
value = value + f'item: {key},\ntype:{type(self.__dict__[key])},\nhead: {self.__dict__[key].head(1).T}\n\n'
else:
value = value + f'item: {key},\ntype:{type(self.__dict__[key])},\nvalue: {self.__dict__[key]}\n\n'
return value
def load(self):
"""
loads/reloads data. Can be called to update data without redefining a
new data object.
"""
self.movie = pd.read_csv(os.path.join(settings.raw_data_dir, 'movie_metadata.csv'))