init

2020-02-28 08:16:44 -06:00 · 2020-02-28 08:16:44 -06:00 · 24e604ab32
commit 24e604ab32
44 changed files with 2338 additions and 0 deletions
--- a/conf/base/catalog.yml
+++ b/conf/base/catalog.yml
@ -0,0 +1,50 @@
+# Here you can define all your data sets by using simple YAML syntax.
+#
+# Documentation for this file format can be found in "The Data Catalog"
+# Link: https://kedro.readthedocs.io/en/stable/04_user_guide/04_data_catalog.html
+#
+# We support interacting with a variety of data stores including local file systems, cloud, network and HDFS
+#
+# An example data set definition can look as follows:
+#
+#bikes:
+#  type: pandas.CSVDataSet
+#  filepath: "data/01_raw/bikes.csv"
+#
+#weather:
+#  type: spark.SparkDataSet
+#  filepath: s3a://your_bucket/data/01_raw/weather*
+#  file_format: csv
+#  credentials: dev_s3
+#  load_args:
+#    header: True
+#    inferSchema: True
+#  save_args:
+#    sep: '|'
+#    header: True
+#
+#scooters:
+#  type: pandas.SQLTableDataSet
+#  credentials: scooters_credentials
+#  table_name: scooters
+#  load_args:
+#    index_col: ['name']
+#    columns: ['name', 'gear']
+#  save_args:
+#    if_exists: 'replace'
+#    # if_exists: 'fail'
+#    # if_exists: 'append'
+#
+# The Data Catalog supports being able to reference the same file using two different DataSet implementations
+# (transcoding), templating and a way to reuse arguments that are frequently repeated. See more here:
+# https://kedro.readthedocs.io/en/stable/04_user_guide/04_data_catalog.html
+
+#
+# This is a data set used by the "Hello World" example pipeline provided with the project
+# template. Please feel free to remove it once you remove the example pipeline.
+
+example_iris_data:
+  type: CSVLocalDataSet
+  filepath: data/01_raw/iris.csv
+
+
--- a/conf/base/logging.yml
+++ b/conf/base/logging.yml
@ -0,0 +1,66 @@
+version: 1
+disable_existing_loggers: False
+formatters:
+    simple:
+        format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    json_formatter:
+        format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        class: pythonjsonlogger.jsonlogger.JsonFormatter
+
+handlers:
+    console:
+        class: logging.StreamHandler
+        level: INFO
+        formatter: simple
+        stream: ext://sys.stdout
+
+    info_file_handler:
+        class: logging.handlers.RotatingFileHandler
+        level: INFO
+        formatter: simple
+        filename: logs/info.log
+        maxBytes: 10485760 # 10MB
+        backupCount: 20
+        encoding: utf8
+        delay: True
+
+    error_file_handler:
+        class: logging.handlers.RotatingFileHandler
+        level: ERROR
+        formatter: simple
+        filename: logs/errors.log
+        maxBytes: 10485760 # 10MB
+        backupCount: 20
+        encoding: utf8
+        delay: True
+
+    journal_file_handler:
+        class: kedro.versioning.journal.JournalFileHandler
+        level: INFO
+        base_dir: logs/journals
+        formatter: json_formatter
+
+loggers:
+    anyconfig:
+        level: WARNING
+        handlers: [console, info_file_handler, error_file_handler]
+        propagate: no
+
+    kedro.io:
+        level: INFO
+        handlers: [console, info_file_handler, error_file_handler]
+        propagate: no
+
+    kedro.pipeline:
+        level: INFO
+        handlers: [console, info_file_handler, error_file_handler]
+        propagate: no
+
+    kedro.journal:
+        level: INFO
+        handlers: [journal_file_handler]
+        propagate: no
+
+root:
+    level: INFO
+    handlers: [console, info_file_handler, error_file_handler]
--- a/conf/base/parameters.yml
+++ b/conf/base/parameters.yml
@ -0,0 +1,8 @@
+
+# Parameters for the example pipeline. Feel free to delete these once you
+# remove the example pipeline from pipeline.py and the example nodes in
+# `src/pipelines/`
+example_test_data_ratio: 0.2
+example_num_train_iter: 10000
+example_learning_rate: 0.01
+