feat(etl) Initial etl commit
Added etl to load in data. Extra data features may be added later as they are identified.
This commit is contained in:
parent
8774e7a9ec
commit
2b58d707f5
1 changed files with 35 additions and 0 deletions
35
src/etl.py
Normal file
35
src/etl.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
import os
|
||||
import pandas as pd
|
||||
import settings
|
||||
|
||||
|
||||
class Data(object):
|
||||
"""
|
||||
A data object for loading, updating, cleaning, and holding data.
|
||||
"""
|
||||
|
||||
def __init__(self, data=None):
|
||||
"""
|
||||
loads data on creation if no data is provided
|
||||
"""
|
||||
if data == None:
|
||||
self.load()
|
||||
|
||||
def __str__(self):
|
||||
|
||||
value = ''
|
||||
for key in self.__dict__.keys():
|
||||
if isinstance(self.__dict__[key], pd.DataFrame):
|
||||
value = value + f'item: {key},\ntype:{type(self.__dict__[key])},\nhead: {self.__dict__[key].head(1).T}\n\n'
|
||||
else:
|
||||
value = value + f'item: {key},\ntype:{type(self.__dict__[key])},\nvalue: {self.__dict__[key]}\n\n'
|
||||
|
||||
return value
|
||||
|
||||
def load(self):
|
||||
"""
|
||||
loads/reloads data. Can be called to update data without redefining a
|
||||
new data object.
|
||||
"""
|
||||
|
||||
self.movie = pd.read_csv(os.path.join(settings.raw_data_dir, 'movie_metadata.csv'))
|
||||
Loading…
Add table
Add a link
Reference in a new issue