50 lines
1.5 KiB
YAML
50 lines
1.5 KiB
YAML
# Here you can define all your data sets by using simple YAML syntax.
|
|
#
|
|
# Documentation for this file format can be found in "The Data Catalog"
|
|
# Link: https://kedro.readthedocs.io/en/stable/04_user_guide/04_data_catalog.html
|
|
#
|
|
# We support interacting with a variety of data stores including local file systems, cloud, network and HDFS
|
|
#
|
|
# An example data set definition can look as follows:
|
|
#
|
|
#bikes:
|
|
# type: pandas.CSVDataSet
|
|
# filepath: "data/01_raw/bikes.csv"
|
|
#
|
|
#weather:
|
|
# type: spark.SparkDataSet
|
|
# filepath: s3a://your_bucket/data/01_raw/weather*
|
|
# file_format: csv
|
|
# credentials: dev_s3
|
|
# load_args:
|
|
# header: True
|
|
# inferSchema: True
|
|
# save_args:
|
|
# sep: '|'
|
|
# header: True
|
|
#
|
|
#scooters:
|
|
# type: pandas.SQLTableDataSet
|
|
# credentials: scooters_credentials
|
|
# table_name: scooters
|
|
# load_args:
|
|
# index_col: ['name']
|
|
# columns: ['name', 'gear']
|
|
# save_args:
|
|
# if_exists: 'replace'
|
|
# # if_exists: 'fail'
|
|
# # if_exists: 'append'
|
|
#
|
|
# The Data Catalog supports being able to reference the same file using two different DataSet implementations
|
|
# (transcoding), templating and a way to reuse arguments that are frequently repeated. See more here:
|
|
# https://kedro.readthedocs.io/en/stable/04_user_guide/04_data_catalog.html
|
|
|
|
#
|
|
# This is a data set used by the "Hello World" example pipeline provided with the project
|
|
# template. Please feel free to remove it once you remove the example pipeline.
|
|
|
|
example_iris_data:
|
|
type: CSVLocalDataSet
|
|
filepath: data/01_raw/iris.csv
|
|
|
|
|