This commit is contained in:
WaylonWalker 2020-02-28 08:16:44 -06:00
commit 24e604ab32
44 changed files with 2338 additions and 0 deletions

6
.coveragerc Normal file
View file

@ -0,0 +1,6 @@
[report]
fail_under=0
show_missing=True
exclude_lines =
pragma: no cover
raise NotImplementedError

157
.gitignore vendored Normal file
View file

@ -0,0 +1,157 @@
##########################
# KEDRO PROJECT
# ignore all local configuration
conf/local/**
!conf/local/.gitkeep
# ignore potentially sensitive credentials files
conf/**/*credentials*
# ignore everything in the following folders
data/**
logs/**
# except their sub-folders
!data/**/
!logs/**/
# also keep all .gitkeep files
!.gitkeep
# keep also the example dataset
!data/01_raw/iris.csv
##########################
# Common files
# IntelliJ
.idea/
*.iml
out/
.idea_modules/
### macOS
*.DS_Store
.AppleDouble
.LSOverride
.Trashes
# Vim
*~
.*.swo
.*.swp
# emacs
*~
\#*\#
/.emacs.desktop
/.emacs.desktop.lock
*.elc
# JIRA plugin
atlassian-ide-plugin.xml
# C extensions
*.so
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
.static_storage/
.media/
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
.ipython/profile_default/history.sqlite
.ipython/profile_default/startup/README
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# mkdocs documentation
/site
# mypy
.mypy_cache/

View file

@ -0,0 +1,58 @@
import logging.config
import os
import sys
from pathlib import Path
from IPython.core.magic import register_line_magic
# Find the project root (./../../../)
startup_error = None
project_path = Path(__file__).parents[3].resolve()
@register_line_magic
def reload_kedro(path, line=None):
""""Line magic which reloads all Kedro default variables."""
global startup_error
global context
global catalog
try:
import kedro.config.default_logger
from kedro.context import KEDRO_ENV_VAR, load_context
from kedro.cli.jupyter import collect_line_magic
except ImportError:
logging.error(
"Kedro appears not to be installed in your current environment "
"or your current IPython session was not started in a valid Kedro project."
)
raise
try:
path = path or project_path
logging.debug("Loading the context from %s", str(path))
context = load_context(path, env=os.getenv(KEDRO_ENV_VAR))
catalog = context.catalog
# remove cached user modules
package_name = context.__module__.split(".")[0]
to_remove = [mod for mod in sys.modules if mod.startswith(package_name)]
for module in to_remove:
del sys.modules[module]
logging.info("** Kedro project %s", str(context.project_name))
logging.info("Defined global variable `context` and `catalog`")
for line_magic in collect_line_magic():
register_line_magic(line_magic)
logging.info("Registered line magic `%s`", line_magic.__name__)
except Exception as err:
startup_error = err
logging.exception(
"Kedro's ipython session startup script failed:\n%s", str(err)
)
raise err
reload_kedro(project_path)

7
.isort.cfg Normal file
View file

@ -0,0 +1,7 @@
[settings]
multi_line_output=3
include_trailing_comma=True
force_grid_wrap=0
use_parentheses=True
line_length=88
known_third_party=kedro

1
.kedro.yml Normal file
View file

@ -0,0 +1 @@
context_path: default_kedro_157.run.ProjectContext

137
README.md Normal file
View file

@ -0,0 +1,137 @@
# Default Kedro 157
## Overview
This is your new Kedro project, which was generated using `Kedro 0.15.7` by running:
```
kedro new
```
Take a look at the [documentation](https://kedro.readthedocs.io) to get started.
## Rules and guidelines
In order to get the best out of the template:
* Please don't remove any lines from the `.gitignore` file provided
* Make sure your results can be reproduced by following a data engineering convention, e.g. the one we suggest [here](https://kedro.readthedocs.io/en/stable/06_resources/01_faq.html#what-is-data-engineering-convention)
* Don't commit any data to your repository
* Don't commit any credentials or local configuration to your repository
* Keep all credentials or local configuration in `conf/local/`
## Installing dependencies
Dependencies should be declared in `src/requirements.txt` for pip installation and `src/environment.yml` for conda installation.
To install them, run:
```
kedro install
```
## Running Kedro
You can run your Kedro project with:
```
kedro run
```
## Testing Kedro
Have a look at the file `src/tests/test_run.py` for instructions on how to write your tests. You can run your tests with the following command:
```
kedro test
```
To configure the coverage threshold, please have a look at the file `.coveragerc`.
### Working with Kedro from notebooks
In order to use notebooks in your Kedro project, you need to install Jupyter:
```
pip install jupyter
```
For using Jupyter Lab, you need to install it:
```
pip install jupyterlab
```
After installing Jupyter, you can start a local notebook server:
```
kedro jupyter notebook
```
You can also start Jupyter Lab:
```
kedro jupyter lab
```
And if you want to run an IPython session:
```
kedro ipython
```
Running Jupyter or IPython this way provides the following variables in
scope: `proj_dir`, `proj_name`, `conf`, `io`, `parameters` and `startup_error`.
#### Converting notebook cells to nodes in a Kedro project
Once you are happy with a notebook, you may want to move your code over into the Kedro project structure for the next stage in your development. This is done through a mixture of [cell tagging](https://jupyter-notebook.readthedocs.io/en/stable/changelog.html#cell-tags) and Kedro CLI commands.
By adding the `node` tag to a cell and running the command below, the cell's source code will be copied over to a Python file within `src/<package_name>/nodes/`.
```
kedro jupyter convert <filepath_to_my_notebook>
```
> *Note:* The name of the Python file matches the name of the original notebook.
Alternatively, you may want to transform all your notebooks in one go. To this end, you can run the following command to convert all notebook files found in the project root directory and under any of its sub-folders.
```
kedro jupyter convert --all
```
#### Ignoring notebook output cells in `git`
In order to automatically strip out all output cell contents before committing to `git`, you can run `kedro activate-nbstripout`. This will add a hook in `.git/config` which will run `nbstripout` before anything is committed to `git`.
> *Note:* Your output cells will be left intact locally.
## Package the project
In order to package the project's Python code in `.egg` and / or a `.wheel` file, you can run:
```
kedro package
```
After running that, you can find the two packages in `src/dist/`.
## Building API documentation
To build API docs for your code using Sphinx, run:
```
kedro build-docs
```
See your documentation by opening `docs/build/html/index.html`.
## Building the project requirements
To generate or update the dependency requirements for your project, run:
```
kedro build-reqs
```
This will copy the contents of `src/requirements.txt` into a new file `src/requirements.in` which will be used as the source for `pip-compile`. You can see the output of the resolution by opening `src/requirements.txt`.
After this, if you'd like to update your project requirements, please update `src/requirements.in` and re-run `kedro build-reqs`.

26
conf/README.md Normal file
View file

@ -0,0 +1,26 @@
# What is this for?
This folder should be used to store configuration files used by Kedro or by separate tools.
This file can be used to provide users with instructions for how to reproduce local configuration with their own credentials. You can edit the file however you like, but you may wish to retain the information below and add your own section in the section titled **Instructions**.
## Local configuration
The `local` folder should be used for configuration that is either user-specific (e.g. IDE configuration) or protected (e.g. security keys).
> *Note:* Please do not check in any local configuration to version control.
## Base configuration
The `base` folder is for shared configuration, such as non-sensitive and project-related configuration that may be shared across team members.
WARNING: Please do not put access credentials in the base configuration folder.
# Instructions
# Find out more
You can find out more about configuration from the [user guide documentation](https://kedro.readthedocs.io/en/stable/04_user_guide/03_configuration.html).

50
conf/base/catalog.yml Normal file
View file

@ -0,0 +1,50 @@
# Here you can define all your data sets by using simple YAML syntax.
#
# Documentation for this file format can be found in "The Data Catalog"
# Link: https://kedro.readthedocs.io/en/stable/04_user_guide/04_data_catalog.html
#
# We support interacting with a variety of data stores including local file systems, cloud, network and HDFS
#
# An example data set definition can look as follows:
#
#bikes:
# type: pandas.CSVDataSet
# filepath: "data/01_raw/bikes.csv"
#
#weather:
# type: spark.SparkDataSet
# filepath: s3a://your_bucket/data/01_raw/weather*
# file_format: csv
# credentials: dev_s3
# load_args:
# header: True
# inferSchema: True
# save_args:
# sep: '|'
# header: True
#
#scooters:
# type: pandas.SQLTableDataSet
# credentials: scooters_credentials
# table_name: scooters
# load_args:
# index_col: ['name']
# columns: ['name', 'gear']
# save_args:
# if_exists: 'replace'
# # if_exists: 'fail'
# # if_exists: 'append'
#
# The Data Catalog supports being able to reference the same file using two different DataSet implementations
# (transcoding), templating and a way to reuse arguments that are frequently repeated. See more here:
# https://kedro.readthedocs.io/en/stable/04_user_guide/04_data_catalog.html
#
# This is a data set used by the "Hello World" example pipeline provided with the project
# template. Please feel free to remove it once you remove the example pipeline.
example_iris_data:
type: CSVLocalDataSet
filepath: data/01_raw/iris.csv

66
conf/base/logging.yml Normal file
View file

@ -0,0 +1,66 @@
version: 1
disable_existing_loggers: False
formatters:
simple:
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
json_formatter:
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
class: pythonjsonlogger.jsonlogger.JsonFormatter
handlers:
console:
class: logging.StreamHandler
level: INFO
formatter: simple
stream: ext://sys.stdout
info_file_handler:
class: logging.handlers.RotatingFileHandler
level: INFO
formatter: simple
filename: logs/info.log
maxBytes: 10485760 # 10MB
backupCount: 20
encoding: utf8
delay: True
error_file_handler:
class: logging.handlers.RotatingFileHandler
level: ERROR
formatter: simple
filename: logs/errors.log
maxBytes: 10485760 # 10MB
backupCount: 20
encoding: utf8
delay: True
journal_file_handler:
class: kedro.versioning.journal.JournalFileHandler
level: INFO
base_dir: logs/journals
formatter: json_formatter
loggers:
anyconfig:
level: WARNING
handlers: [console, info_file_handler, error_file_handler]
propagate: no
kedro.io:
level: INFO
handlers: [console, info_file_handler, error_file_handler]
propagate: no
kedro.pipeline:
level: INFO
handlers: [console, info_file_handler, error_file_handler]
propagate: no
kedro.journal:
level: INFO
handlers: [journal_file_handler]
propagate: no
root:
level: INFO
handlers: [console, info_file_handler, error_file_handler]

8
conf/base/parameters.yml Normal file
View file

@ -0,0 +1,8 @@
# Parameters for the example pipeline. Feel free to delete these once you
# remove the example pipeline from pipeline.py and the example nodes in
# `src/pipelines/`
example_test_data_ratio: 0.2
example_num_train_iter: 10000
example_learning_rate: 0.01

0
conf/local/.gitkeep Normal file
View file

0
data/01_raw/.gitkeep Normal file
View file

151
data/01_raw/iris.csv Normal file
View file

@ -0,0 +1,151 @@
sepal_length,sepal_width,petal_length,petal_width,species
5.1,3.5,1.4,0.2,setosa
4.9,3.0,1.4,0.2,setosa
4.7,3.2,1.3,0.2,setosa
4.6,3.1,1.5,0.2,setosa
5.0,3.6,1.4,0.2,setosa
5.4,3.9,1.7,0.4,setosa
4.6,3.4,1.4,0.3,setosa
5.0,3.4,1.5,0.2,setosa
4.4,2.9,1.4,0.2,setosa
4.9,3.1,1.5,0.1,setosa
5.4,3.7,1.5,0.2,setosa
4.8,3.4,1.6,0.2,setosa
4.8,3.0,1.4,0.1,setosa
4.3,3.0,1.1,0.1,setosa
5.8,4.0,1.2,0.2,setosa
5.7,4.4,1.5,0.4,setosa
5.4,3.9,1.3,0.4,setosa
5.1,3.5,1.4,0.3,setosa
5.7,3.8,1.7,0.3,setosa
5.1,3.8,1.5,0.3,setosa
5.4,3.4,1.7,0.2,setosa
5.1,3.7,1.5,0.4,setosa
4.6,3.6,1.0,0.2,setosa
5.1,3.3,1.7,0.5,setosa
4.8,3.4,1.9,0.2,setosa
5.0,3.0,1.6,0.2,setosa
5.0,3.4,1.6,0.4,setosa
5.2,3.5,1.5,0.2,setosa
5.2,3.4,1.4,0.2,setosa
4.7,3.2,1.6,0.2,setosa
4.8,3.1,1.6,0.2,setosa
5.4,3.4,1.5,0.4,setosa
5.2,4.1,1.5,0.1,setosa
5.5,4.2,1.4,0.2,setosa
4.9,3.1,1.5,0.1,setosa
5.0,3.2,1.2,0.2,setosa
5.5,3.5,1.3,0.2,setosa
4.9,3.1,1.5,0.1,setosa
4.4,3.0,1.3,0.2,setosa
5.1,3.4,1.5,0.2,setosa
5.0,3.5,1.3,0.3,setosa
4.5,2.3,1.3,0.3,setosa
4.4,3.2,1.3,0.2,setosa
5.0,3.5,1.6,0.6,setosa
5.1,3.8,1.9,0.4,setosa
4.8,3.0,1.4,0.3,setosa
5.1,3.8,1.6,0.2,setosa
4.6,3.2,1.4,0.2,setosa
5.3,3.7,1.5,0.2,setosa
5.0,3.3,1.4,0.2,setosa
7.0,3.2,4.7,1.4,versicolor
6.4,3.2,4.5,1.5,versicolor
6.9,3.1,4.9,1.5,versicolor
5.5,2.3,4.0,1.3,versicolor
6.5,2.8,4.6,1.5,versicolor
5.7,2.8,4.5,1.3,versicolor
6.3,3.3,4.7,1.6,versicolor
4.9,2.4,3.3,1.0,versicolor
6.6,2.9,4.6,1.3,versicolor
5.2,2.7,3.9,1.4,versicolor
5.0,2.0,3.5,1.0,versicolor
5.9,3.0,4.2,1.5,versicolor
6.0,2.2,4.0,1.0,versicolor
6.1,2.9,4.7,1.4,versicolor
5.6,2.9,3.6,1.3,versicolor
6.7,3.1,4.4,1.4,versicolor
5.6,3.0,4.5,1.5,versicolor
5.8,2.7,4.1,1.0,versicolor
6.2,2.2,4.5,1.5,versicolor
5.6,2.5,3.9,1.1,versicolor
5.9,3.2,4.8,1.8,versicolor
6.1,2.8,4.0,1.3,versicolor
6.3,2.5,4.9,1.5,versicolor
6.1,2.8,4.7,1.2,versicolor
6.4,2.9,4.3,1.3,versicolor
6.6,3.0,4.4,1.4,versicolor
6.8,2.8,4.8,1.4,versicolor
6.7,3.0,5.0,1.7,versicolor
6.0,2.9,4.5,1.5,versicolor
5.7,2.6,3.5,1.0,versicolor
5.5,2.4,3.8,1.1,versicolor
5.5,2.4,3.7,1.0,versicolor
5.8,2.7,3.9,1.2,versicolor
6.0,2.7,5.1,1.6,versicolor
5.4,3.0,4.5,1.5,versicolor
6.0,3.4,4.5,1.6,versicolor
6.7,3.1,4.7,1.5,versicolor
6.3,2.3,4.4,1.3,versicolor
5.6,3.0,4.1,1.3,versicolor
5.5,2.5,4.0,1.3,versicolor
5.5,2.6,4.4,1.2,versicolor
6.1,3.0,4.6,1.4,versicolor
5.8,2.6,4.0,1.2,versicolor
5.0,2.3,3.3,1.0,versicolor
5.6,2.7,4.2,1.3,versicolor
5.7,3.0,4.2,1.2,versicolor
5.7,2.9,4.2,1.3,versicolor
6.2,2.9,4.3,1.3,versicolor
5.1,2.5,3.0,1.1,versicolor
5.7,2.8,4.1,1.3,versicolor
6.3,3.3,6.0,2.5,virginica
5.8,2.7,5.1,1.9,virginica
7.1,3.0,5.9,2.1,virginica
6.3,2.9,5.6,1.8,virginica
6.5,3.0,5.8,2.2,virginica
7.6,3.0,6.6,2.1,virginica
4.9,2.5,4.5,1.7,virginica
7.3,2.9,6.3,1.8,virginica
6.7,2.5,5.8,1.8,virginica
7.2,3.6,6.1,2.5,virginica
6.5,3.2,5.1,2.0,virginica
6.4,2.7,5.3,1.9,virginica
6.8,3.0,5.5,2.1,virginica
5.7,2.5,5.0,2.0,virginica
5.8,2.8,5.1,2.4,virginica
6.4,3.2,5.3,2.3,virginica
6.5,3.0,5.5,1.8,virginica
7.7,3.8,6.7,2.2,virginica
7.7,2.6,6.9,2.3,virginica
6.0,2.2,5.0,1.5,virginica
6.9,3.2,5.7,2.3,virginica
5.6,2.8,4.9,2.0,virginica
7.7,2.8,6.7,2.0,virginica
6.3,2.7,4.9,1.8,virginica
6.7,3.3,5.7,2.1,virginica
7.2,3.2,6.0,1.8,virginica
6.2,2.8,4.8,1.8,virginica
6.1,3.0,4.9,1.8,virginica
6.4,2.8,5.6,2.1,virginica
7.2,3.0,5.8,1.6,virginica
7.4,2.8,6.1,1.9,virginica
7.9,3.8,6.4,2.0,virginica
6.4,2.8,5.6,2.2,virginica
6.3,2.8,5.1,1.5,virginica
6.1,2.6,5.6,1.4,virginica
7.7,3.0,6.1,2.3,virginica
6.3,3.4,5.6,2.4,virginica
6.4,3.1,5.5,1.8,virginica
6.0,3.0,4.8,1.8,virginica
6.9,3.1,5.4,2.1,virginica
6.7,3.1,5.6,2.4,virginica
6.9,3.1,5.1,2.3,virginica
5.8,2.7,5.1,1.9,virginica
6.8,3.2,5.9,2.3,virginica
6.7,3.3,5.7,2.5,virginica
6.7,3.0,5.2,2.3,virginica
6.3,2.5,5.0,1.9,virginica
6.5,3.0,5.2,2.0,virginica
6.2,3.4,5.4,2.3,virginica
5.9,3.0,5.1,1.8,virginica
1 sepal_length sepal_width petal_length petal_width species
2 5.1 3.5 1.4 0.2 setosa
3 4.9 3.0 1.4 0.2 setosa
4 4.7 3.2 1.3 0.2 setosa
5 4.6 3.1 1.5 0.2 setosa
6 5.0 3.6 1.4 0.2 setosa
7 5.4 3.9 1.7 0.4 setosa
8 4.6 3.4 1.4 0.3 setosa
9 5.0 3.4 1.5 0.2 setosa
10 4.4 2.9 1.4 0.2 setosa
11 4.9 3.1 1.5 0.1 setosa
12 5.4 3.7 1.5 0.2 setosa
13 4.8 3.4 1.6 0.2 setosa
14 4.8 3.0 1.4 0.1 setosa
15 4.3 3.0 1.1 0.1 setosa
16 5.8 4.0 1.2 0.2 setosa
17 5.7 4.4 1.5 0.4 setosa
18 5.4 3.9 1.3 0.4 setosa
19 5.1 3.5 1.4 0.3 setosa
20 5.7 3.8 1.7 0.3 setosa
21 5.1 3.8 1.5 0.3 setosa
22 5.4 3.4 1.7 0.2 setosa
23 5.1 3.7 1.5 0.4 setosa
24 4.6 3.6 1.0 0.2 setosa
25 5.1 3.3 1.7 0.5 setosa
26 4.8 3.4 1.9 0.2 setosa
27 5.0 3.0 1.6 0.2 setosa
28 5.0 3.4 1.6 0.4 setosa
29 5.2 3.5 1.5 0.2 setosa
30 5.2 3.4 1.4 0.2 setosa
31 4.7 3.2 1.6 0.2 setosa
32 4.8 3.1 1.6 0.2 setosa
33 5.4 3.4 1.5 0.4 setosa
34 5.2 4.1 1.5 0.1 setosa
35 5.5 4.2 1.4 0.2 setosa
36 4.9 3.1 1.5 0.1 setosa
37 5.0 3.2 1.2 0.2 setosa
38 5.5 3.5 1.3 0.2 setosa
39 4.9 3.1 1.5 0.1 setosa
40 4.4 3.0 1.3 0.2 setosa
41 5.1 3.4 1.5 0.2 setosa
42 5.0 3.5 1.3 0.3 setosa
43 4.5 2.3 1.3 0.3 setosa
44 4.4 3.2 1.3 0.2 setosa
45 5.0 3.5 1.6 0.6 setosa
46 5.1 3.8 1.9 0.4 setosa
47 4.8 3.0 1.4 0.3 setosa
48 5.1 3.8 1.6 0.2 setosa
49 4.6 3.2 1.4 0.2 setosa
50 5.3 3.7 1.5 0.2 setosa
51 5.0 3.3 1.4 0.2 setosa
52 7.0 3.2 4.7 1.4 versicolor
53 6.4 3.2 4.5 1.5 versicolor
54 6.9 3.1 4.9 1.5 versicolor
55 5.5 2.3 4.0 1.3 versicolor
56 6.5 2.8 4.6 1.5 versicolor
57 5.7 2.8 4.5 1.3 versicolor
58 6.3 3.3 4.7 1.6 versicolor
59 4.9 2.4 3.3 1.0 versicolor
60 6.6 2.9 4.6 1.3 versicolor
61 5.2 2.7 3.9 1.4 versicolor
62 5.0 2.0 3.5 1.0 versicolor
63 5.9 3.0 4.2 1.5 versicolor
64 6.0 2.2 4.0 1.0 versicolor
65 6.1 2.9 4.7 1.4 versicolor
66 5.6 2.9 3.6 1.3 versicolor
67 6.7 3.1 4.4 1.4 versicolor
68 5.6 3.0 4.5 1.5 versicolor
69 5.8 2.7 4.1 1.0 versicolor
70 6.2 2.2 4.5 1.5 versicolor
71 5.6 2.5 3.9 1.1 versicolor
72 5.9 3.2 4.8 1.8 versicolor
73 6.1 2.8 4.0 1.3 versicolor
74 6.3 2.5 4.9 1.5 versicolor
75 6.1 2.8 4.7 1.2 versicolor
76 6.4 2.9 4.3 1.3 versicolor
77 6.6 3.0 4.4 1.4 versicolor
78 6.8 2.8 4.8 1.4 versicolor
79 6.7 3.0 5.0 1.7 versicolor
80 6.0 2.9 4.5 1.5 versicolor
81 5.7 2.6 3.5 1.0 versicolor
82 5.5 2.4 3.8 1.1 versicolor
83 5.5 2.4 3.7 1.0 versicolor
84 5.8 2.7 3.9 1.2 versicolor
85 6.0 2.7 5.1 1.6 versicolor
86 5.4 3.0 4.5 1.5 versicolor
87 6.0 3.4 4.5 1.6 versicolor
88 6.7 3.1 4.7 1.5 versicolor
89 6.3 2.3 4.4 1.3 versicolor
90 5.6 3.0 4.1 1.3 versicolor
91 5.5 2.5 4.0 1.3 versicolor
92 5.5 2.6 4.4 1.2 versicolor
93 6.1 3.0 4.6 1.4 versicolor
94 5.8 2.6 4.0 1.2 versicolor
95 5.0 2.3 3.3 1.0 versicolor
96 5.6 2.7 4.2 1.3 versicolor
97 5.7 3.0 4.2 1.2 versicolor
98 5.7 2.9 4.2 1.3 versicolor
99 6.2 2.9 4.3 1.3 versicolor
100 5.1 2.5 3.0 1.1 versicolor
101 5.7 2.8 4.1 1.3 versicolor
102 6.3 3.3 6.0 2.5 virginica
103 5.8 2.7 5.1 1.9 virginica
104 7.1 3.0 5.9 2.1 virginica
105 6.3 2.9 5.6 1.8 virginica
106 6.5 3.0 5.8 2.2 virginica
107 7.6 3.0 6.6 2.1 virginica
108 4.9 2.5 4.5 1.7 virginica
109 7.3 2.9 6.3 1.8 virginica
110 6.7 2.5 5.8 1.8 virginica
111 7.2 3.6 6.1 2.5 virginica
112 6.5 3.2 5.1 2.0 virginica
113 6.4 2.7 5.3 1.9 virginica
114 6.8 3.0 5.5 2.1 virginica
115 5.7 2.5 5.0 2.0 virginica
116 5.8 2.8 5.1 2.4 virginica
117 6.4 3.2 5.3 2.3 virginica
118 6.5 3.0 5.5 1.8 virginica
119 7.7 3.8 6.7 2.2 virginica
120 7.7 2.6 6.9 2.3 virginica
121 6.0 2.2 5.0 1.5 virginica
122 6.9 3.2 5.7 2.3 virginica
123 5.6 2.8 4.9 2.0 virginica
124 7.7 2.8 6.7 2.0 virginica
125 6.3 2.7 4.9 1.8 virginica
126 6.7 3.3 5.7 2.1 virginica
127 7.2 3.2 6.0 1.8 virginica
128 6.2 2.8 4.8 1.8 virginica
129 6.1 3.0 4.9 1.8 virginica
130 6.4 2.8 5.6 2.1 virginica
131 7.2 3.0 5.8 1.6 virginica
132 7.4 2.8 6.1 1.9 virginica
133 7.9 3.8 6.4 2.0 virginica
134 6.4 2.8 5.6 2.2 virginica
135 6.3 2.8 5.1 1.5 virginica
136 6.1 2.6 5.6 1.4 virginica
137 7.7 3.0 6.1 2.3 virginica
138 6.3 3.4 5.6 2.4 virginica
139 6.4 3.1 5.5 1.8 virginica
140 6.0 3.0 4.8 1.8 virginica
141 6.9 3.1 5.4 2.1 virginica
142 6.7 3.1 5.6 2.4 virginica
143 6.9 3.1 5.1 2.3 virginica
144 5.8 2.7 5.1 1.9 virginica
145 6.8 3.2 5.9 2.3 virginica
146 6.7 3.3 5.7 2.5 virginica
147 6.7 3.0 5.2 2.3 virginica
148 6.3 2.5 5.0 1.9 virginica
149 6.5 3.0 5.2 2.0 virginica
150 6.2 3.4 5.4 2.3 virginica
151 5.9 3.0 5.1 1.8 virginica

View file

0
data/03_primary/.gitkeep Normal file
View file

View file

View file

0
data/06_models/.gitkeep Normal file
View file

View file

View file

257
docs/source/conf.py Normal file
View file

@ -0,0 +1,257 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2020 QuantumBlack Visual Analytics Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
# (either separately or in combination, "QuantumBlack Trademarks") are
# trademarks of QuantumBlack. The License does not grant you any right or
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
# Trademarks or any confusingly similar mark as a trademark for your product,
# or use the QuantumBlack Trademarks in any other manner that might cause
# confusion in the marketplace, including but not limited to in advertising,
# on websites, or on software.
#
# See the License for the specific language governing permissions and
# limitations under the License.
# default_kedro_157 documentation build
# configuration file, created by sphinx-quickstart.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import re
from kedro.cli.utils import find_stylesheets
from recommonmark.transform import AutoStructify
from default_kedro_157 import __version__ as release
# -- Project information -----------------------------------------------------
project = "default_kedro_157"
copyright = "2020, QuantumBlack Visual Analytics Limited"
author = "QuantumBlack"
# The short X.Y version.
version = re.match(r"^([0-9]+\.[0-9]+).*", release).group(1)
# -- General configuration ---------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.napoleon",
"sphinx_autodoc_typehints",
"sphinx.ext.doctest",
"sphinx.ext.todo",
"sphinx.ext.coverage",
"sphinx.ext.mathjax",
"sphinx.ext.ifconfig",
"sphinx.ext.viewcode",
"sphinx.ext.mathjax",
"nbsphinx",
"recommonmark",
"sphinx_copybutton",
]
# enable autosummary plugin (table of contents for modules/classes/class
# methods)
autosummary_generate = True
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
source_suffix = {".rst": "restructuredtext", ".md": "markdown"}
# The master toctree document.
master_doc = "index"
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path .
exclude_patterns = ["_build", "**.ipynb_checkpoints"]
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = "sphinx_rtd_theme"
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
html_theme_options = {"collapse_navigation": False, "style_external_links": True}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself. Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}
html_show_sourcelink = False
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = "default_kedro_157doc"
# -- Options for LaTeX output ------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
#
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
#
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
#
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(
master_doc,
"default_kedro_157.tex",
"default_kedro_157 Documentation",
"QuantumBlack",
"manual",
)
]
# -- Options for manual page output ------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(
master_doc,
"default_kedro_157",
"default_kedro_157 Documentation",
[author],
1,
)
]
# -- Options for Texinfo output ----------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(
master_doc,
"default_kedro_157",
"default_kedro_157 Documentation",
author,
"default_kedro_157",
"Project default_kedro_157 codebase.",
"Data-Science",
)
]
# -- Options for todo extension ----------------------------------------------
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
# -- Extension configuration -------------------------------------------------
# nbsphinx_prolog = """
# see here for prolog/epilog details:
# https://nbsphinx.readthedocs.io/en/0.3.1/prolog-and-epilog.html
# """
# -- NBconvert kernel config -------------------------------------------------
nbsphinx_kernel_name = "python3"
def remove_arrows_in_examples(lines):
for i, line in enumerate(lines):
lines[i] = line.replace(">>>", "")
def autodoc_process_docstring(app, what, name, obj, options, lines):
remove_arrows_in_examples(lines)
def skip(app, what, name, obj, skip, options):
if name == "__init__":
return False
return skip
def setup(app):
app.connect("autodoc-process-docstring", autodoc_process_docstring)
app.connect("autodoc-skip-member", skip)
# add Kedro stylesheets
for stylesheet in find_stylesheets():
app.add_stylesheet(stylesheet)
# enable rendering RST tables in Markdown
app.add_config_value("recommonmark_config", {"enable_eval_rst": True}, True)
app.add_transform(AutoStructify)

19
docs/source/index.rst Normal file
View file

@ -0,0 +1,19 @@
.. default_kedro_157 documentation master file, created by sphinx-quickstart.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to project's default_kedro_157 API docs!
=============================================
.. toctree::
:maxdepth: 4
modules
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

624
kedro_cli.py Normal file
View file

@ -0,0 +1,624 @@
# Copyright 2020 QuantumBlack Visual Analytics Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
# (either separately or in combination, "QuantumBlack Trademarks") are
# trademarks of QuantumBlack. The License does not grant you any right or
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
# Trademarks or any confusingly similar mark as a trademark for your product,
# or use the QuantumBlack Trademarks in any other manner that might cause
# confusion in the marketplace, including but not limited to in advertising,
# on websites, or on software.
#
# See the License for the specific language governing permissions and
# limitations under the License.
"""Command line tools for manipulating a Kedro project.
Intended to be invoked via `kedro`."""
import os
import re
import shutil
import subprocess
import sys
import webbrowser
from collections import Counter
from glob import iglob
from itertools import chain
from pathlib import Path
from typing import Any, Dict, Iterable, List, Tuple
import anyconfig
import click
from click import secho, style
from kedro.cli import main as kedro_main
from kedro.cli.utils import (
KedroCliError,
call,
export_nodes,
forward_command,
python_call,
)
from kedro.context import KEDRO_ENV_VAR, load_context
from kedro.runner import SequentialRunner
from kedro.utils import load_obj
CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
# get our package onto the python path
PROJ_PATH = Path(__file__).resolve().parent
os.environ["IPYTHONDIR"] = str(PROJ_PATH / ".ipython")
NO_DEPENDENCY_MESSAGE = """{0} is not installed. Please make sure {0} is in
src/requirements.txt and run `kedro install`."""
TAG_ARG_HELP = """Construct the pipeline using only nodes which have this tag
attached. Option can be used multiple times, what results in a
pipeline constructed from nodes having any of those tags."""
PIPELINE_ARG_HELP = """Name of the modular pipeline to run.
If not set, the project pipeline is run by default."""
ENV_ARG_HELP = """Run the pipeline in a configured environment. If not specified,
pipeline will run using environment `local`."""
NODE_ARG_HELP = """Run only nodes with specified names."""
FROM_NODES_HELP = """A list of node names which should be used as a starting point."""
TO_NODES_HELP = """A list of node names which should be used as an end point."""
FROM_INPUTS_HELP = (
"""A list of dataset names which should be used as a starting point."""
)
PARALLEL_ARG_HELP = """Run the pipeline using the `ParallelRunner`.
If not specified, use the `SequentialRunner`. This flag cannot be used together
with --runner."""
OPEN_ARG_HELP = """Open the documentation in your default browser after building."""
RUNNER_ARG_HELP = """Specify a runner that you want to run the pipeline with.
This option cannot be used together with --parallel."""
CONVERT_ALL_HELP = """Extract the nodes from all notebooks in the Kedro project directory,
including sub-folders."""
OVERWRITE_HELP = """If Python file already exists for the equivalent notebook,
overwrite its contents."""
LOAD_VERSION_HELP = """Specify a particular dataset version (timestamp) for loading."""
CONFIG_FILE_HELP = """Specify a YAML configuration file to load the run
command arguments from. If command line arguments are provided, they will
override the loaded ones."""
PARAMS_ARG_HELP = """Specify extra parameters that you want to pass
to the context initializer. Items must be separated by comma, keys - by colon,
example: param1:value1,param2:value2. Each parameter is split by the first comma,
so parameter values are allowed to contain colons, parameter keys are not."""
JUPYTER_IP_HELP = "IP address of the Jupyter server."
JUPYTER_ALL_KERNELS_HELP = "Display all available Python kernels."
JUPYTER_IDLE_TIMEOUT_HELP = """When a notebook is closed, Jupyter server will
terminate its kernel after so many seconds of inactivity. This does not affect
any open notebooks."""
def _split_string(ctx, param, value):
return [item for item in value.split(",") if item]
def _try_convert_to_numeric(value):
try:
value = float(value)
except ValueError:
return value
return int(value) if value.is_integer() else value
def _split_params(ctx, param, value):
if isinstance(value, dict):
return value
result = {}
for item in _split_string(ctx, param, value):
item = item.split(":", 1)
if len(item) != 2:
ctx.fail(
"Invalid format of `{}` option: Item `{}` must contain a key and "
"a value separated by `:`.".format(param.name, item[0])
)
key = item[0].strip()
if not key:
ctx.fail(
"Invalid format of `{}` option: Parameter key cannot be "
"an empty string.".format(param.name)
)
value = item[1].strip()
result[key] = _try_convert_to_numeric(value)
return result
def _reformat_load_versions(ctx, param, value) -> Dict[str, str]:
"""Reformat data structure from tuple to dictionary for `load-version`.
E.g ('dataset1:time1', 'dataset2:time2') -> {"dataset1": "time1", "dataset2": "time2"}.
"""
load_version_separator = ":"
load_versions_dict = {}
for load_version in value:
load_version_list = load_version.split(load_version_separator, 1)
if len(load_version_list) != 2:
raise ValueError(
"Expected the form of `load_version` to be "
"`dataset_name:YYYY-MM-DDThh.mm.ss.sssZ`,"
"found {} instead".format(load_version)
)
load_versions_dict[load_version_list[0]] = load_version_list[1]
return load_versions_dict
def _config_file_callback(ctx, param, value):
"""Config file callback, that replaces command line options with config file
values. If command line options are passed, they override config file values.
"""
ctx.default_map = ctx.default_map or {}
section = ctx.info_name
if value:
config = anyconfig.load(value)[section]
ctx.default_map.update(config)
return value
def _get_values_as_tuple(values: Iterable[str]) -> Tuple[str]:
return tuple(chain.from_iterable(value.split(",") for value in values))
@click.group(context_settings=CONTEXT_SETTINGS, name=__file__)
def cli():
"""Command line tools for manipulating a Kedro project."""
@cli.command()
@click.option(
"--from-inputs", type=str, default="", help=FROM_INPUTS_HELP, callback=_split_string
)
@click.option(
"--from-nodes", type=str, default="", help=FROM_NODES_HELP, callback=_split_string
)
@click.option(
"--to-nodes", type=str, default="", help=TO_NODES_HELP, callback=_split_string
)
@click.option("--node", "-n", "node_names", type=str, multiple=True, help=NODE_ARG_HELP)
@click.option(
"--runner", "-r", type=str, default=None, multiple=False, help=RUNNER_ARG_HELP
)
@click.option("--parallel", "-p", is_flag=True, multiple=False, help=PARALLEL_ARG_HELP)
@click.option(
"--env",
"-e",
type=str,
default=None,
multiple=False,
envvar=KEDRO_ENV_VAR,
help=ENV_ARG_HELP,
)
@click.option("--tag", "-t", type=str, multiple=True, help=TAG_ARG_HELP)
@click.option(
"--load-version",
"-lv",
type=str,
multiple=True,
help=LOAD_VERSION_HELP,
callback=_reformat_load_versions,
)
@click.option("--pipeline", type=str, default=None, help=PIPELINE_ARG_HELP)
@click.option(
"--config",
"-c",
type=click.Path(exists=True, dir_okay=False, resolve_path=True),
help=CONFIG_FILE_HELP,
callback=_config_file_callback,
)
@click.option(
"--params", type=str, default="", help=PARAMS_ARG_HELP, callback=_split_params
)
def run(
tag,
env,
parallel,
runner,
node_names,
to_nodes,
from_nodes,
from_inputs,
load_version,
pipeline,
config,
params,
):
"""Run the pipeline."""
if parallel and runner:
raise KedroCliError(
"Both --parallel and --runner options cannot be used together. "
"Please use either --parallel or --runner."
)
if parallel:
runner = "ParallelRunner"
runner_class = load_obj(runner, "kedro.runner") if runner else SequentialRunner
tag = _get_values_as_tuple(tag) if tag else tag
node_names = _get_values_as_tuple(node_names) if node_names else node_names
context = load_context(Path.cwd(), env=env, extra_params=params)
context.run(
tags=tag,
runner=runner_class(),
node_names=node_names,
from_nodes=from_nodes,
to_nodes=to_nodes,
from_inputs=from_inputs,
load_versions=load_version,
pipeline_name=pipeline,
)
@forward_command(cli, forward_help=True)
def test(args):
"""Run the test suite."""
try:
import pytest # pylint: disable=unused-import
except ImportError:
raise KedroCliError(NO_DEPENDENCY_MESSAGE.format("pytest"))
else:
python_call("pytest", args)
@cli.command()
@click.argument("files", type=click.Path(exists=True), nargs=-1)
def lint(files):
"""Run flake8, isort and (on Python >=3.6) black."""
# pylint: disable=unused-import
if not files:
files = ("src/tests", "src/default_kedro_157")
try:
import flake8
import isort
except ImportError as exc:
raise KedroCliError(NO_DEPENDENCY_MESSAGE.format(exc.name))
python_call("flake8", ("--max-line-length=88",) + files)
python_call("isort", ("-rc", "-tc", "-up", "-fgw=0", "-m=3", "-w=88") + files)
if sys.version_info[:2] >= (3, 6):
try:
import black
except ImportError:
raise KedroCliError(NO_DEPENDENCY_MESSAGE.format("black"))
python_call("black", files)
@cli.command()
def install():
"""Install project dependencies from both requirements.txt
and environment.yml (optional)."""
if (Path.cwd() / "src" / "environment.yml").is_file():
call(["conda", "install", "--file", "src/environment.yml", "--yes"])
pip_command = ["install", "-U", "-r", "src/requirements.txt"]
if os.name == "posix":
python_call("pip", pip_command)
else:
command = [sys.executable, "-m", "pip"] + pip_command
subprocess.Popen(command, creationflags=subprocess.CREATE_NEW_CONSOLE)
@forward_command(cli, forward_help=True)
def ipython(args):
"""Open IPython with project specific variables loaded."""
if "-h" not in args and "--help" not in args:
ipython_message()
call(["ipython"] + list(args))
@cli.command()
def package():
"""Package the project as a Python egg and wheel."""
call([sys.executable, "setup.py", "clean", "--all", "bdist_egg"], cwd="src")
call([sys.executable, "setup.py", "clean", "--all", "bdist_wheel"], cwd="src")
@cli.command("build-docs")
@click.option(
"--open",
"-o",
"open_docs",
is_flag=True,
multiple=False,
default=False,
help=OPEN_ARG_HELP,
)
def build_docs(open_docs):
"""Build the project documentation."""
python_call("pip", ["install", "src/[docs]"])
python_call("pip", ["install", "-r", "src/requirements.txt"])
python_call(
"ipykernel", ["install", "--user", "--name=default_kedro_157"]
)
shutil.rmtree("docs/build", ignore_errors=True)
call(
[
"sphinx-apidoc",
"--module-first",
"-o",
"docs/source",
"src/default_kedro_157",
]
)
call(["sphinx-build", "-M", "html", "docs/source", "docs/build", "-a"])
if open_docs:
docs_page = (Path.cwd() / "docs" / "build" / "html" / "index.html").as_uri()
secho("Opening {}".format(docs_page))
webbrowser.open(docs_page)
@cli.command("build-reqs")
def build_reqs():
"""Build the project dependency requirements."""
requirements_path = Path.cwd() / "src" / "requirements.in"
if not requirements_path.is_file():
secho("No requirements.in found. Copying contents from requirements.txt...")
contents = (Path.cwd() / "src" / "requirements.txt").read_text()
requirements_path.write_text(contents)
python_call("piptools", ["compile", str(requirements_path)])
secho(
(
"Requirements built! Please update requirements.in "
"if you'd like to make a change in your project's dependencies, "
"and re-run build-reqs to generate the new requirements.txt."
)
)
@cli.command("activate-nbstripout")
def activate_nbstripout():
"""Install the nbstripout git hook to automatically clean notebooks."""
secho(
(
"Notebook output cells will be automatically cleared before committing"
" to git."
),
fg="yellow",
)
try:
import nbstripout # pylint: disable=unused-import
except ImportError:
raise KedroCliError(NO_DEPENDENCY_MESSAGE.format("nbstripout"))
try:
res = subprocess.run(
["git", "rev-parse", "--git-dir"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
if res.returncode:
raise KedroCliError("Not a git repository. Run `git init` first.")
except FileNotFoundError:
raise KedroCliError("Git executable not found. Install Git first.")
call(["nbstripout", "--install"])
def _build_jupyter_command(
base: str, ip: str, all_kernels: bool, args: Iterable[str], idle_timeout: int
) -> List[str]:
cmd = [
base,
"--ip",
ip,
"--MappingKernelManager.cull_idle_timeout={}".format(idle_timeout),
"--MappingKernelManager.cull_interval={}".format(idle_timeout),
]
if not all_kernels:
project_name = "Default Kedro 157"
kernel_name = re.sub(r"[^\w]+", "", project_name).strip() or "Kedro"
cmd += [
"--NotebookApp.kernel_spec_manager_class="
"kedro.cli.jupyter.SingleKernelSpecManager",
"--KernelSpecManager.default_kernel_name='{}'".format(kernel_name),
]
return cmd + list(args)
def _build_jupyter_env(kedro_env: str) -> Dict[str, Any]:
"""Build the environment dictionary that gets injected into the subprocess running
Jupyter. Since the subprocess has access only to the environment variables passed
in, we need to copy the current environment and add ``KEDRO_ENV_VAR``.
"""
if not kedro_env:
return {}
jupyter_env = os.environ.copy()
jupyter_env[KEDRO_ENV_VAR] = kedro_env
return {"env": jupyter_env}
@cli.group()
def jupyter():
"""Open Jupyter Notebook / Lab with project specific variables loaded, or
convert notebooks into Kedro code.
"""
@forward_command(jupyter, "notebook", forward_help=True)
@click.option("--ip", type=str, default="127.0.0.1", help=JUPYTER_IP_HELP)
@click.option(
"--all-kernels", is_flag=True, default=False, help=JUPYTER_ALL_KERNELS_HELP
)
@click.option("--idle-timeout", type=int, default=30, help=JUPYTER_IDLE_TIMEOUT_HELP)
@click.option(
"--env",
"-e",
type=str,
default=None,
multiple=False,
envvar=KEDRO_ENV_VAR,
help=ENV_ARG_HELP,
)
def jupyter_notebook(ip, all_kernels, env, idle_timeout, args):
"""Open Jupyter Notebook with project specific variables loaded."""
if "-h" not in args and "--help" not in args:
ipython_message(all_kernels)
arguments = _build_jupyter_command(
"notebook", ip=ip, all_kernels=all_kernels, args=args, idle_timeout=idle_timeout
)
python_call_kwargs = _build_jupyter_env(env)
python_call("jupyter", arguments, **python_call_kwargs)
@forward_command(jupyter, "lab", forward_help=True)
@click.option("--ip", type=str, default="127.0.0.1", help=JUPYTER_IP_HELP)
@click.option(
"--all-kernels", is_flag=True, default=False, help=JUPYTER_ALL_KERNELS_HELP
)
@click.option("--idle-timeout", type=int, default=30, help=JUPYTER_IDLE_TIMEOUT_HELP)
@click.option(
"--env",
"-e",
type=str,
default=None,
multiple=False,
envvar=KEDRO_ENV_VAR,
help=ENV_ARG_HELP,
)
def jupyter_lab(ip, all_kernels, env, idle_timeout, args):
"""Open Jupyter Lab with project specific variables loaded."""
if "-h" not in args and "--help" not in args:
ipython_message(all_kernels)
arguments = _build_jupyter_command(
"lab", ip=ip, all_kernels=all_kernels, args=args, idle_timeout=idle_timeout
)
python_call_kwargs = _build_jupyter_env(env)
python_call("jupyter", arguments, **python_call_kwargs)
@jupyter.command("convert")
@click.option("--all", "all_flag", is_flag=True, help=CONVERT_ALL_HELP)
@click.option("-y", "overwrite_flag", is_flag=True, help=OVERWRITE_HELP)
@click.argument(
"filepath",
type=click.Path(exists=True, dir_okay=False, resolve_path=True),
required=False,
nargs=-1,
)
def convert_notebook(all_flag, overwrite_flag, filepath):
"""Convert selected or all notebooks found in a Kedro project
to Kedro code, by exporting code from the appropriately-tagged cells:
Cells tagged as `node` will be copied over to a Python file matching
the name of the notebook, under `src/<package_name>/nodes`.
*Note*: Make sure your notebooks have unique names!
FILEPATH: Path(s) to exact notebook file(s) to be converted. Both
relative and absolute paths are accepted.
Should not be provided if --all flag is already present.
"""
context = load_context(Path.cwd())
if not filepath and not all_flag:
secho(
"Please specify a notebook filepath "
"or add '--all' to convert all notebooks."
)
sys.exit(1)
kedro_project_path = context.project_path
kedro_package_name = "default_kedro_157"
if all_flag:
# pathlib glob does not ignore hidden directories,
# whereas Python glob does, which is more useful in
# ensuring checkpoints will not be included
pattern = kedro_project_path / "**" / "*.ipynb"
notebooks = sorted(Path(p) for p in iglob(str(pattern), recursive=True))
else:
notebooks = [Path(f) for f in filepath]
counter = Counter(n.stem for n in notebooks)
non_unique_names = [name for name, counts in counter.items() if counts > 1]
if non_unique_names:
raise KedroCliError(
"Found non-unique notebook names! "
"Please rename the following: {}".format(", ".join(non_unique_names))
)
for notebook in notebooks:
secho("Converting notebook '{}'...".format(str(notebook)))
output_path = (
kedro_project_path
/ "src"
/ kedro_package_name
/ "nodes"
/ "{}.py".format(notebook.stem)
)
if output_path.is_file():
overwrite = overwrite_flag or click.confirm(
"Output file {} already exists. Overwrite?".format(str(output_path)),
default=False,
)
if overwrite:
export_nodes(notebook, output_path)
else:
export_nodes(notebook, output_path)
secho("Done!")
def ipython_message(all_kernels=True):
"""Show a message saying how we have configured the IPython env."""
ipy_vars = ["startup_error", "context"]
secho("-" * 79, fg="cyan")
secho("Starting a Kedro session with the following variables in scope")
secho(", ".join(ipy_vars), fg="green")
secho(
"Use the line magic {} to refresh them".format(
style("%reload_kedro", fg="green")
)
)
secho("or to see the error message if they are undefined")
if not all_kernels:
secho("The choice of kernels is limited to the default one.", fg="yellow")
secho("(restart with --all-kernels to get access to others)", fg="yellow")
secho("-" * 79, fg="cyan")
if __name__ == "__main__":
os.chdir(str(PROJ_PATH))
kedro_main()

0
logs/.gitkeep Normal file
View file

0
logs/journals/.gitkeep Normal file
View file

0
notebooks/.gitkeep Normal file
View file

3
setup.cfg Normal file
View file

@ -0,0 +1,3 @@
[tool:pytest]
addopts=--cov-report term-missing
--cov src/default_kedro_157 -ra

View file

@ -0,0 +1,31 @@
# Copyright 2020 QuantumBlack Visual Analytics Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
# (either separately or in combination, "QuantumBlack Trademarks") are
# trademarks of QuantumBlack. The License does not grant you any right or
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
# Trademarks or any confusingly similar mark as a trademark for your product,
# or use the QuantumBlack Trademarks in any other manner that might cause
# confusion in the marketplace, including but not limited to in advertising,
# on websites, or on software.
#
# See the License for the specific language governing permissions and
# limitations under the License.
"""Default Kedro 157
"""
__version__ = "0.1"

View file

View file

@ -0,0 +1,68 @@
# Copyright 2020 QuantumBlack Visual Analytics Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
# (either separately or in combination, "QuantumBlack Trademarks") are
# trademarks of QuantumBlack. The License does not grant you any right or
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
# Trademarks or any confusingly similar mark as a trademark for your product,
# or use the QuantumBlack Trademarks in any other manner that might cause
# confusion in the marketplace, including but not limited to in advertising,
# on websites, or on software.
#
# See the License for the specific language governing permissions and
# limitations under the License.
"""Construction of the master pipeline.
"""
from typing import Dict
from kedro.pipeline import Pipeline
###########################################################################
# Here you can find an example pipeline, made of two modular pipelines.
#
# Delete this when you start working on your own Kedro project as
# well as pipelines/data_science AND pipelines/data_engineering
# -------------------------------------------------------------------------
from default_kedro_157.pipelines import data_engineering as de
from default_kedro_157.pipelines import data_science as ds
def create_pipelines(**kwargs) -> Dict[str, Pipeline]:
"""Create the project's pipeline.
Args:
kwargs: Ignore any additional arguments added in the future.
Returns:
A mapping from a pipeline name to a ``Pipeline`` object.
"""
data_engineering_pipeline = de.create_pipeline()
data_science_pipeline = ds.create_pipeline()
return {
"de": data_engineering_pipeline,
"ds": data_science_pipeline,
"__default__": data_engineering_pipeline + data_science_pipeline,
}

View file

@ -0,0 +1,53 @@
# Data Engineering pipeline
> *Note:* This `README.md` was generated using `Kedro 0.15.7` for illustration purposes. Please modify it according to your pipeline structure and contents.
## Overview
This modular pipeline splits the incoming data into the train and test subsets (`split_data` node)
## Pipeline inputs
### `example_iris_data`
| | |
| ---- | ------------------ |
| Type | `pandas.DataFrame` |
| Description | Input data to split into train and test sets |
### `params:example_test_data_ratio`
| | |
| ---- | ------------------ |
| Type | `float` |
| Description | The split ratio parameter that identifies what percentage of rows goes to the train set |
## Pipeline outputs
### `example_train_x`
| | |
| ---- | ------------------ |
| Type | `pandas.DataFrame` |
| Description | DataFrame containing train set features |
### `example_train_y`
| | |
| ---- | ------------------ |
| Type | `pandas.DataFrame` |
| Description | DataFrame containing train set one-hot encoded target variable |
### `example_test_x`
| | |
| ---- | ------------------ |
| Type | `pandas.DataFrame` |
| Description | DataFrame containing test set features |
### `example_test_y`
| | |
| ---- | ------------------ |
| Type | `pandas.DataFrame` |
| Description | DataFrame containing test set one-hot encoded target variable |

View file

@ -0,0 +1,34 @@
# Copyright 2020 QuantumBlack Visual Analytics Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
# (either separately or in combination, "QuantumBlack Trademarks") are
# trademarks of QuantumBlack. The License does not grant you any right or
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
# Trademarks or any confusingly similar mark as a trademark for your product,
# or use the QuantumBlack Trademarks in any other manner that might cause
# confusion in the marketplace, including but not limited to in advertising,
# on websites, or on software.
#
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example code for the nodes in the example pipeline. This code is meant
just for illustrating basic Kedro features.
PLEASE DELETE THIS FILE ONCE YOU START WORKING ON YOUR OWN PROJECT!
"""
from .pipeline import create_pipeline # NOQA

View file

@ -0,0 +1,78 @@
# Copyright 2020 QuantumBlack Visual Analytics Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
# (either separately or in combination, "QuantumBlack Trademarks") are
# trademarks of QuantumBlack. The License does not grant you any right or
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
# Trademarks or any confusingly similar mark as a trademark for your product,
# or use the QuantumBlack Trademarks in any other manner that might cause
# confusion in the marketplace, including but not limited to in advertising,
# on websites, or on software.
#
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example code for the nodes in the example pipeline. This code is meant
just for illustrating basic Kedro features.
PLEASE DELETE THIS FILE ONCE YOU START WORKING ON YOUR OWN PROJECT!
"""
from typing import Any, Dict
import pandas as pd
def split_data(data: pd.DataFrame, example_test_data_ratio: float) -> Dict[str, Any]:
"""Node for splitting the classical Iris data set into training and test
sets, each split into features and labels.
The split ratio parameter is taken from conf/project/parameters.yml.
The data and the parameters will be loaded and provided to your function
automatically when the pipeline is executed and it is time to run this node.
"""
data.columns = [
"sepal_length",
"sepal_width",
"petal_length",
"petal_width",
"target",
]
classes = sorted(data["target"].unique())
# One-hot encoding for the target variable
data = pd.get_dummies(data, columns=["target"], prefix="", prefix_sep="")
# Shuffle all the data
data = data.sample(frac=1).reset_index(drop=True)
# Split to training and testing data
n = data.shape[0]
n_test = int(n * example_test_data_ratio)
training_data = data.iloc[n_test:, :].reset_index(drop=True)
test_data = data.iloc[:n_test, :].reset_index(drop=True)
# Split the data to features and labels
train_data_x = training_data.loc[:, "sepal_length":"petal_width"]
train_data_y = training_data[classes]
test_data_x = test_data.loc[:, "sepal_length":"petal_width"]
test_data_y = test_data[classes]
# When returning many variables, it is a good practice to give them names:
return dict(
train_x=train_data_x,
train_y=train_data_y,
test_x=test_data_x,
test_y=test_data_y,
)

View file

@ -0,0 +1,54 @@
# Copyright 2020 QuantumBlack Visual Analytics Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
# (either separately or in combination, "QuantumBlack Trademarks") are
# trademarks of QuantumBlack. The License does not grant you any right or
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
# Trademarks or any confusingly similar mark as a trademark for your product,
# or use the QuantumBlack Trademarks in any other manner that might cause
# confusion in the marketplace, including but not limited to in advertising,
# on websites, or on software.
#
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example code for the nodes in the example pipeline. This code is meant
just for illustrating basic Kedro features.
Delete this when you start working on your own Kedro project.
"""
from kedro.pipeline import Pipeline, node
from .nodes import split_data
def create_pipeline(**kwargs):
return Pipeline(
[
node(
split_data,
["example_iris_data", "params:example_test_data_ratio"],
dict(
train_x="example_train_x",
train_y="example_train_y",
test_x="example_test_x",
test_y="example_test_y",
),
)
]
)

View file

@ -0,0 +1,58 @@
# Data Science pipeline
> *Note:* This `README.md` was generated using `Kedro 0.15.7` for illustration purposes. Please modify it according to your pipeline structure and contents.
## Overview
This modular pipeline:
1. trains a simple multi-class logistic regression model (`train_model` node)
2. makes predictions given a trained model from (1) and a test set (`predict` node)
3. reports the model accuracy on a test set (`report_accuracy` node)
## Pipeline inputs
### `example_train_x`
| | |
| ---- | ------------------ |
| Type | `pandas.DataFrame` |
| Description | DataFrame containing train set features |
### `example_train_y`
| | |
| ---- | ------------------ |
| Type | `pandas.DataFrame` |
| Description | DataFrame containing train set one-hot encoded target variable |
### `example_test_x`
| | |
| ---- | ------------------ |
| Type | `pandas.DataFrame` |
| Description | DataFrame containing test set features |
### `example_test_y`
| | |
| ---- | ------------------ |
| Type | `pandas.DataFrame` |
| Description | DataFrame containing test set one-hot encoded target variable |
### `parameters`
| | |
| ---- | ------------------ |
| Type | `dict` |
| Description | Project parameter dictionary that must contain the following keys: `example_num_train_iter` (number of model training iterations), `example_learning_rate` (learning rate for gradient descent) |
## Pipeline outputs
### `example_model`
| | |
| ---- | ------------------ |
| Type | `numpy.ndarray` |
| Description | Example logistic regression model |

View file

@ -0,0 +1,34 @@
# Copyright 2020 QuantumBlack Visual Analytics Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
# (either separately or in combination, "QuantumBlack Trademarks") are
# trademarks of QuantumBlack. The License does not grant you any right or
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
# Trademarks or any confusingly similar mark as a trademark for your product,
# or use the QuantumBlack Trademarks in any other manner that might cause
# confusion in the marketplace, including but not limited to in advertising,
# on websites, or on software.
#
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example code for the nodes in the example pipeline. This code is meant
just for illustrating basic Kedro features.
PLEASE DELETE THIS FILE ONCE YOU START WORKING ON YOUR OWN PROJECT!
"""
from .pipeline import create_pipeline # NOQA

View file

@ -0,0 +1,109 @@
# Copyright 2020 QuantumBlack Visual Analytics Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
# (either separately or in combination, "QuantumBlack Trademarks") are
# trademarks of QuantumBlack. The License does not grant you any right or
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
# Trademarks or any confusingly similar mark as a trademark for your product,
# or use the QuantumBlack Trademarks in any other manner that might cause
# confusion in the marketplace, including but not limited to in advertising,
# on websites, or on software.
#
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example code for the nodes in the example pipeline. This code is meant
just for illustrating basic Kedro features.
Delete this when you start working on your own Kedro project.
"""
# pylint: disable=invalid-name
import logging
from typing import Any, Dict
import numpy as np
import pandas as pd
def train_model(
train_x: pd.DataFrame, train_y: pd.DataFrame, parameters: Dict[str, Any]
) -> np.ndarray:
"""Node for training a simple multi-class logistic regression model. The
number of training iterations as well as the learning rate are taken from
conf/project/parameters.yml. All of the data as well as the parameters
will be provided to this function at the time of execution.
"""
num_iter = parameters["example_num_train_iter"]
lr = parameters["example_learning_rate"]
X = train_x.to_numpy()
Y = train_y.to_numpy()
# Add bias to the features
bias = np.ones((X.shape[0], 1))
X = np.concatenate((bias, X), axis=1)
weights = []
# Train one model for each class in Y
for k in range(Y.shape[1]):
# Initialise weights
theta = np.zeros(X.shape[1])
y = Y[:, k]
for _ in range(num_iter):
z = np.dot(X, theta)
h = _sigmoid(z)
gradient = np.dot(X.T, (h - y)) / y.size
theta -= lr * gradient
# Save the weights for each model
weights.append(theta)
# Return a joint multi-class model with weights for all classes
return np.vstack(weights).transpose()
def predict(model: np.ndarray, test_x: pd.DataFrame) -> np.ndarray:
"""Node for making predictions given a pre-trained model and a test set.
"""
X = test_x.to_numpy()
# Add bias to the features
bias = np.ones((X.shape[0], 1))
X = np.concatenate((bias, X), axis=1)
# Predict "probabilities" for each class
result = _sigmoid(np.dot(X, model))
# Return the index of the class with max probability for all samples
return np.argmax(result, axis=1)
def report_accuracy(predictions: np.ndarray, test_y: pd.DataFrame) -> None:
"""Node for reporting the accuracy of the predictions performed by the
previous node. Notice that this function has no outputs, except logging.
"""
# Get true class index
target = np.argmax(test_y.to_numpy(), axis=1)
# Calculate accuracy of predictions
accuracy = np.sum(predictions == target) / target.shape[0]
# Log the accuracy of the model
log = logging.getLogger(__name__)
log.info("Model accuracy on test set: %0.2f%%", accuracy * 100)
def _sigmoid(z):
"""A helper sigmoid function used by the training and the scoring nodes."""
return 1 / (1 + np.exp(-z))

View file

@ -0,0 +1,55 @@
# Copyright 2020 QuantumBlack Visual Analytics Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
# (either separately or in combination, "QuantumBlack Trademarks") are
# trademarks of QuantumBlack. The License does not grant you any right or
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
# Trademarks or any confusingly similar mark as a trademark for your product,
# or use the QuantumBlack Trademarks in any other manner that might cause
# confusion in the marketplace, including but not limited to in advertising,
# on websites, or on software.
#
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example code for the nodes in the example pipeline. This code is meant
just for illustrating basic Kedro features.
Delete this when you start working on your own Kedro project.
"""
from kedro.pipeline import Pipeline, node
from .nodes import predict, report_accuracy, train_model
def create_pipeline(**kwargs):
return Pipeline(
[
node(
train_model,
["example_train_x", "example_train_y", "parameters"],
"example_model",
),
node(
predict,
dict(model="example_model", test_x="example_test_x"),
"example_predictions",
),
node(report_accuracy, ["example_predictions", "example_test_y"], None),
]
)

View file

@ -0,0 +1,61 @@
# Copyright 2020 QuantumBlack Visual Analytics Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
# (either separately or in combination, "QuantumBlack Trademarks") are
# trademarks of QuantumBlack. The License does not grant you any right or
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
# Trademarks or any confusingly similar mark as a trademark for your product,
# or use the QuantumBlack Trademarks in any other manner that might cause
# confusion in the marketplace, including but not limited to in advertising,
# on websites, or on software.
#
# See the License for the specific language governing permissions and
# limitations under the License.
"""Application entry point."""
from pathlib import Path
from typing import Dict
from kedro.context import KedroContext, load_context
from kedro.pipeline import Pipeline
from default_kedro_157.pipeline import create_pipelines
class ProjectContext(KedroContext):
"""Users can override the remaining methods from the parent class here,
or create new ones (e.g. as required by plugins)
"""
project_name = "Default Kedro 157"
project_version = "0.15.7"
def _get_pipelines(self) -> Dict[str, Pipeline]:
return create_pipelines()
def run_package():
# entry point for running pip-install projects
# using `<project_package>` command
project_context = load_context(Path.cwd())
project_context.run()
if __name__ == "__main__":
# entry point for running pip-installed projects
# using `python -m <project_package>.run` command
run_package()

11
src/requirements.txt Normal file
View file

@ -0,0 +1,11 @@
ipython>=7.0.0, <8.0
jupyter>=1.0.0, <2.0
jupyter_client>=5.1.0, <6.0
jupyterlab==0.31.1
kedro==0.15.7
kedro-viz>=3.1.0, <4.0
nbstripout==0.3.3
pytest-cov>=2.5, <3.0
pytest-mock>=1.7.1,<2.0
pytest>=3.4, <4.0
wheel==0.32.2

67
src/setup.py Normal file
View file

@ -0,0 +1,67 @@
# Copyright 2020 QuantumBlack Visual Analytics Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
# (either separately or in combination, "QuantumBlack Trademarks") are
# trademarks of QuantumBlack. The License does not grant you any right or
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
# Trademarks or any confusingly similar mark as a trademark for your product,
# or use the QuantumBlack Trademarks in any other manner that might cause
# confusion in the marketplace, including but not limited to in advertising,
# on websites, or on software.
#
# See the License for the specific language governing permissions and
# limitations under the License.
from setuptools import find_packages, setup
entry_point = (
"default-kedro-157 = default_kedro_157.run:run_package"
)
# get the dependencies and installs
with open("requirements.txt", "r", encoding="utf-8") as f:
# Make sure we strip all comments and options (e.g "--extra-index-url")
# that arise from a modified pip.conf file that configure global options
# when running kedro build-reqs
requires = []
for line in f:
req = line.split("#", 1)[0].strip()
if req and not req.startswith("--"):
requires.append(req)
setup(
name="default_kedro_157",
version="0.1",
packages=find_packages(exclude=["tests"]),
entry_points={"console_scripts": [entry_point]},
install_requires=requires,
extras_require={
"docs": [
"sphinx>=1.6.3, <2.0",
"sphinx_rtd_theme==0.4.1",
"nbsphinx==0.3.4",
"nbstripout==0.3.3",
"recommonmark==0.5.0",
"sphinx-autodoc-typehints==1.6.0",
"sphinx_copybutton==0.2.5",
"jupyter_client>=5.1.0, <6.0",
"tornado>=4.2, <6.0",
"ipykernel>=4.8.1, <5.0",
]
},
)

0
src/tests/__init__.py Normal file
View file

55
src/tests/test_run.py Normal file
View file

@ -0,0 +1,55 @@
# Copyright 2020 QuantumBlack Visual Analytics Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
# (either separately or in combination, "QuantumBlack Trademarks") are
# trademarks of QuantumBlack. The License does not grant you any right or
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
# Trademarks or any confusingly similar mark as a trademark for your product,
# or use the QuantumBlack Trademarks in any other manner that might cause
# confusion in the marketplace, including but not limited to in advertising,
# on websites, or on software.
#
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module contains an example test.
Tests should be placed in ``src/tests``, in modules that mirror your
project's structure, and in files named test_*.py. They are simply functions
named ``test_*`` which test a unit of logic.
To run the tests, run ``kedro test``.
"""
from pathlib import Path
import pytest
from default_kedro_157.run import ProjectContext
@pytest.fixture
def project_context():
return ProjectContext(str(Path.cwd()))
class TestProjectContext:
def test_project_name(self, project_context):
assert project_context.project_name == "Default Kedro 157"
def test_project_version(self, project_context):
assert project_context.project_version == "0.15.7"