# Here you can define all your data sets by using simple YAML syntax. # # Documentation for this file format can be found in "The Data Catalog" # Link: https://kedro.readthedocs.io/en/stable/04_user_guide/04_data_catalog.html # # We support interacting with a variety of data stores including local file systems, cloud, network and HDFS # # An example data set definition can look as follows: # #bikes: # type: pandas.CSVDataSet # filepath: "data/01_raw/bikes.csv" # #weather: # type: spark.SparkDataSet # filepath: s3a://your_bucket/data/01_raw/weather* # file_format: csv # credentials: dev_s3 # load_args: # header: True # inferSchema: True # save_args: # sep: '|' # header: True # #scooters: # type: pandas.SQLTableDataSet # credentials: scooters_credentials # table_name: scooters # load_args: # index_col: ['name'] # columns: ['name', 'gear'] # save_args: # if_exists: 'replace' # # if_exists: 'fail' # # if_exists: 'append' # # The Data Catalog supports being able to reference the same file using two different DataSet implementations # (transcoding), templating and a way to reuse arguments that are frequently repeated. See more here: # https://kedro.readthedocs.io/en/stable/04_user_guide/04_data_catalog.html # # This is a data set used by the "Hello World" example pipeline provided with the project # template. Please feel free to remove it once you remove the example pipeline. example_iris_data: type: CSVLocalDataSet filepath: data/01_raw/iris.csv