From 2b58d707f5b01824714abcdc9ce051ed859d76af Mon Sep 17 00:00:00 2001 From: Walker Waylon Scott Date: Mon, 26 Jun 2017 19:31:49 -0500 Subject: [PATCH] feat(etl) Initial etl commit Added etl to load in data. Extra data features may be added later as they are identified. --- src/etl.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 src/etl.py diff --git a/src/etl.py b/src/etl.py new file mode 100644 index 0000000..4be0508 --- /dev/null +++ b/src/etl.py @@ -0,0 +1,35 @@ +import os +import pandas as pd +import settings + + +class Data(object): + """ + A data object for loading, updating, cleaning, and holding data. + """ + + def __init__(self, data=None): + """ + loads data on creation if no data is provided + """ + if data == None: + self.load() + + def __str__(self): + + value = '' + for key in self.__dict__.keys(): + if isinstance(self.__dict__[key], pd.DataFrame): + value = value + f'item: {key},\ntype:{type(self.__dict__[key])},\nhead: {self.__dict__[key].head(1).T}\n\n' + else: + value = value + f'item: {key},\ntype:{type(self.__dict__[key])},\nvalue: {self.__dict__[key]}\n\n' + + return value + + def load(self): + """ + loads/reloads data. Can be called to update data without redefining a + new data object. + """ + + self.movie = pd.read_csv(os.path.join(settings.raw_data_dir, 'movie_metadata.csv'))