init
This commit is contained in:
commit
24e604ab32
44 changed files with 2338 additions and 0 deletions
31
src/default_kedro_157/__init__.py
Normal file
31
src/default_kedro_157/__init__.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
# Copyright 2020 QuantumBlack Visual Analytics Limited
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
|
||||
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
|
||||
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
|
||||
# (either separately or in combination, "QuantumBlack Trademarks") are
|
||||
# trademarks of QuantumBlack. The License does not grant you any right or
|
||||
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
|
||||
# Trademarks or any confusingly similar mark as a trademark for your product,
|
||||
# or use the QuantumBlack Trademarks in any other manner that might cause
|
||||
# confusion in the marketplace, including but not limited to in advertising,
|
||||
# on websites, or on software.
|
||||
#
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Default Kedro 157
|
||||
"""
|
||||
|
||||
__version__ = "0.1"
|
||||
0
src/default_kedro_157/nodes/__init__.py
Normal file
0
src/default_kedro_157/nodes/__init__.py
Normal file
68
src/default_kedro_157/pipeline.py
Normal file
68
src/default_kedro_157/pipeline.py
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
# Copyright 2020 QuantumBlack Visual Analytics Limited
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
|
||||
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
|
||||
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
|
||||
# (either separately or in combination, "QuantumBlack Trademarks") are
|
||||
# trademarks of QuantumBlack. The License does not grant you any right or
|
||||
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
|
||||
# Trademarks or any confusingly similar mark as a trademark for your product,
|
||||
# or use the QuantumBlack Trademarks in any other manner that might cause
|
||||
# confusion in the marketplace, including but not limited to in advertising,
|
||||
# on websites, or on software.
|
||||
#
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Construction of the master pipeline.
|
||||
"""
|
||||
|
||||
from typing import Dict
|
||||
from kedro.pipeline import Pipeline
|
||||
|
||||
|
||||
|
||||
###########################################################################
|
||||
# Here you can find an example pipeline, made of two modular pipelines.
|
||||
#
|
||||
# Delete this when you start working on your own Kedro project as
|
||||
# well as pipelines/data_science AND pipelines/data_engineering
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
from default_kedro_157.pipelines import data_engineering as de
|
||||
from default_kedro_157.pipelines import data_science as ds
|
||||
|
||||
|
||||
def create_pipelines(**kwargs) -> Dict[str, Pipeline]:
|
||||
"""Create the project's pipeline.
|
||||
|
||||
Args:
|
||||
kwargs: Ignore any additional arguments added in the future.
|
||||
|
||||
Returns:
|
||||
A mapping from a pipeline name to a ``Pipeline`` object.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
data_engineering_pipeline = de.create_pipeline()
|
||||
data_science_pipeline = ds.create_pipeline()
|
||||
|
||||
return {
|
||||
"de": data_engineering_pipeline,
|
||||
"ds": data_science_pipeline,
|
||||
"__default__": data_engineering_pipeline + data_science_pipeline,
|
||||
}
|
||||
|
||||
0
src/default_kedro_157/pipelines/__init__.py
Normal file
0
src/default_kedro_157/pipelines/__init__.py
Normal file
53
src/default_kedro_157/pipelines/data_engineering/README.md
Normal file
53
src/default_kedro_157/pipelines/data_engineering/README.md
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
# Data Engineering pipeline
|
||||
|
||||
> *Note:* This `README.md` was generated using `Kedro 0.15.7` for illustration purposes. Please modify it according to your pipeline structure and contents.
|
||||
|
||||
## Overview
|
||||
|
||||
This modular pipeline splits the incoming data into the train and test subsets (`split_data` node)
|
||||
|
||||
## Pipeline inputs
|
||||
|
||||
### `example_iris_data`
|
||||
|
||||
| | |
|
||||
| ---- | ------------------ |
|
||||
| Type | `pandas.DataFrame` |
|
||||
| Description | Input data to split into train and test sets |
|
||||
|
||||
### `params:example_test_data_ratio`
|
||||
|
||||
| | |
|
||||
| ---- | ------------------ |
|
||||
| Type | `float` |
|
||||
| Description | The split ratio parameter that identifies what percentage of rows goes to the train set |
|
||||
|
||||
## Pipeline outputs
|
||||
|
||||
### `example_train_x`
|
||||
|
||||
| | |
|
||||
| ---- | ------------------ |
|
||||
| Type | `pandas.DataFrame` |
|
||||
| Description | DataFrame containing train set features |
|
||||
|
||||
### `example_train_y`
|
||||
|
||||
| | |
|
||||
| ---- | ------------------ |
|
||||
| Type | `pandas.DataFrame` |
|
||||
| Description | DataFrame containing train set one-hot encoded target variable |
|
||||
|
||||
### `example_test_x`
|
||||
|
||||
| | |
|
||||
| ---- | ------------------ |
|
||||
| Type | `pandas.DataFrame` |
|
||||
| Description | DataFrame containing test set features |
|
||||
|
||||
### `example_test_y`
|
||||
|
||||
| | |
|
||||
| ---- | ------------------ |
|
||||
| Type | `pandas.DataFrame` |
|
||||
| Description | DataFrame containing test set one-hot encoded target variable |
|
||||
34
src/default_kedro_157/pipelines/data_engineering/__init__.py
Normal file
34
src/default_kedro_157/pipelines/data_engineering/__init__.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
# Copyright 2020 QuantumBlack Visual Analytics Limited
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
|
||||
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
|
||||
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
|
||||
# (either separately or in combination, "QuantumBlack Trademarks") are
|
||||
# trademarks of QuantumBlack. The License does not grant you any right or
|
||||
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
|
||||
# Trademarks or any confusingly similar mark as a trademark for your product,
|
||||
# or use the QuantumBlack Trademarks in any other manner that might cause
|
||||
# confusion in the marketplace, including but not limited to in advertising,
|
||||
# on websites, or on software.
|
||||
#
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Example code for the nodes in the example pipeline. This code is meant
|
||||
just for illustrating basic Kedro features.
|
||||
|
||||
PLEASE DELETE THIS FILE ONCE YOU START WORKING ON YOUR OWN PROJECT!
|
||||
"""
|
||||
|
||||
from .pipeline import create_pipeline # NOQA
|
||||
78
src/default_kedro_157/pipelines/data_engineering/nodes.py
Normal file
78
src/default_kedro_157/pipelines/data_engineering/nodes.py
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
# Copyright 2020 QuantumBlack Visual Analytics Limited
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
|
||||
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
|
||||
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
|
||||
# (either separately or in combination, "QuantumBlack Trademarks") are
|
||||
# trademarks of QuantumBlack. The License does not grant you any right or
|
||||
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
|
||||
# Trademarks or any confusingly similar mark as a trademark for your product,
|
||||
# or use the QuantumBlack Trademarks in any other manner that might cause
|
||||
# confusion in the marketplace, including but not limited to in advertising,
|
||||
# on websites, or on software.
|
||||
#
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Example code for the nodes in the example pipeline. This code is meant
|
||||
just for illustrating basic Kedro features.
|
||||
|
||||
PLEASE DELETE THIS FILE ONCE YOU START WORKING ON YOUR OWN PROJECT!
|
||||
"""
|
||||
|
||||
from typing import Any, Dict
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def split_data(data: pd.DataFrame, example_test_data_ratio: float) -> Dict[str, Any]:
|
||||
"""Node for splitting the classical Iris data set into training and test
|
||||
sets, each split into features and labels.
|
||||
The split ratio parameter is taken from conf/project/parameters.yml.
|
||||
The data and the parameters will be loaded and provided to your function
|
||||
automatically when the pipeline is executed and it is time to run this node.
|
||||
"""
|
||||
data.columns = [
|
||||
"sepal_length",
|
||||
"sepal_width",
|
||||
"petal_length",
|
||||
"petal_width",
|
||||
"target",
|
||||
]
|
||||
classes = sorted(data["target"].unique())
|
||||
# One-hot encoding for the target variable
|
||||
data = pd.get_dummies(data, columns=["target"], prefix="", prefix_sep="")
|
||||
|
||||
# Shuffle all the data
|
||||
data = data.sample(frac=1).reset_index(drop=True)
|
||||
|
||||
# Split to training and testing data
|
||||
n = data.shape[0]
|
||||
n_test = int(n * example_test_data_ratio)
|
||||
training_data = data.iloc[n_test:, :].reset_index(drop=True)
|
||||
test_data = data.iloc[:n_test, :].reset_index(drop=True)
|
||||
|
||||
# Split the data to features and labels
|
||||
train_data_x = training_data.loc[:, "sepal_length":"petal_width"]
|
||||
train_data_y = training_data[classes]
|
||||
test_data_x = test_data.loc[:, "sepal_length":"petal_width"]
|
||||
test_data_y = test_data[classes]
|
||||
|
||||
# When returning many variables, it is a good practice to give them names:
|
||||
return dict(
|
||||
train_x=train_data_x,
|
||||
train_y=train_data_y,
|
||||
test_x=test_data_x,
|
||||
test_y=test_data_y,
|
||||
)
|
||||
54
src/default_kedro_157/pipelines/data_engineering/pipeline.py
Normal file
54
src/default_kedro_157/pipelines/data_engineering/pipeline.py
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
# Copyright 2020 QuantumBlack Visual Analytics Limited
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
|
||||
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
|
||||
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
|
||||
# (either separately or in combination, "QuantumBlack Trademarks") are
|
||||
# trademarks of QuantumBlack. The License does not grant you any right or
|
||||
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
|
||||
# Trademarks or any confusingly similar mark as a trademark for your product,
|
||||
# or use the QuantumBlack Trademarks in any other manner that might cause
|
||||
# confusion in the marketplace, including but not limited to in advertising,
|
||||
# on websites, or on software.
|
||||
#
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Example code for the nodes in the example pipeline. This code is meant
|
||||
just for illustrating basic Kedro features.
|
||||
|
||||
Delete this when you start working on your own Kedro project.
|
||||
"""
|
||||
|
||||
from kedro.pipeline import Pipeline, node
|
||||
|
||||
from .nodes import split_data
|
||||
|
||||
|
||||
def create_pipeline(**kwargs):
|
||||
return Pipeline(
|
||||
[
|
||||
node(
|
||||
split_data,
|
||||
["example_iris_data", "params:example_test_data_ratio"],
|
||||
dict(
|
||||
train_x="example_train_x",
|
||||
train_y="example_train_y",
|
||||
test_x="example_test_x",
|
||||
test_y="example_test_y",
|
||||
),
|
||||
)
|
||||
]
|
||||
)
|
||||
58
src/default_kedro_157/pipelines/data_science/README.md
Normal file
58
src/default_kedro_157/pipelines/data_science/README.md
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
# Data Science pipeline
|
||||
|
||||
> *Note:* This `README.md` was generated using `Kedro 0.15.7` for illustration purposes. Please modify it according to your pipeline structure and contents.
|
||||
|
||||
## Overview
|
||||
|
||||
This modular pipeline:
|
||||
1. trains a simple multi-class logistic regression model (`train_model` node)
|
||||
2. makes predictions given a trained model from (1) and a test set (`predict` node)
|
||||
3. reports the model accuracy on a test set (`report_accuracy` node)
|
||||
|
||||
|
||||
## Pipeline inputs
|
||||
|
||||
### `example_train_x`
|
||||
|
||||
| | |
|
||||
| ---- | ------------------ |
|
||||
| Type | `pandas.DataFrame` |
|
||||
| Description | DataFrame containing train set features |
|
||||
|
||||
### `example_train_y`
|
||||
|
||||
| | |
|
||||
| ---- | ------------------ |
|
||||
| Type | `pandas.DataFrame` |
|
||||
| Description | DataFrame containing train set one-hot encoded target variable |
|
||||
|
||||
### `example_test_x`
|
||||
|
||||
| | |
|
||||
| ---- | ------------------ |
|
||||
| Type | `pandas.DataFrame` |
|
||||
| Description | DataFrame containing test set features |
|
||||
|
||||
### `example_test_y`
|
||||
|
||||
| | |
|
||||
| ---- | ------------------ |
|
||||
| Type | `pandas.DataFrame` |
|
||||
| Description | DataFrame containing test set one-hot encoded target variable |
|
||||
|
||||
### `parameters`
|
||||
|
||||
| | |
|
||||
| ---- | ------------------ |
|
||||
| Type | `dict` |
|
||||
| Description | Project parameter dictionary that must contain the following keys: `example_num_train_iter` (number of model training iterations), `example_learning_rate` (learning rate for gradient descent) |
|
||||
|
||||
|
||||
## Pipeline outputs
|
||||
|
||||
### `example_model`
|
||||
|
||||
| | |
|
||||
| ---- | ------------------ |
|
||||
| Type | `numpy.ndarray` |
|
||||
| Description | Example logistic regression model |
|
||||
34
src/default_kedro_157/pipelines/data_science/__init__.py
Normal file
34
src/default_kedro_157/pipelines/data_science/__init__.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
# Copyright 2020 QuantumBlack Visual Analytics Limited
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
|
||||
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
|
||||
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
|
||||
# (either separately or in combination, "QuantumBlack Trademarks") are
|
||||
# trademarks of QuantumBlack. The License does not grant you any right or
|
||||
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
|
||||
# Trademarks or any confusingly similar mark as a trademark for your product,
|
||||
# or use the QuantumBlack Trademarks in any other manner that might cause
|
||||
# confusion in the marketplace, including but not limited to in advertising,
|
||||
# on websites, or on software.
|
||||
#
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Example code for the nodes in the example pipeline. This code is meant
|
||||
just for illustrating basic Kedro features.
|
||||
|
||||
PLEASE DELETE THIS FILE ONCE YOU START WORKING ON YOUR OWN PROJECT!
|
||||
"""
|
||||
|
||||
from .pipeline import create_pipeline # NOQA
|
||||
109
src/default_kedro_157/pipelines/data_science/nodes.py
Normal file
109
src/default_kedro_157/pipelines/data_science/nodes.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
# Copyright 2020 QuantumBlack Visual Analytics Limited
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
|
||||
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
|
||||
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
|
||||
# (either separately or in combination, "QuantumBlack Trademarks") are
|
||||
# trademarks of QuantumBlack. The License does not grant you any right or
|
||||
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
|
||||
# Trademarks or any confusingly similar mark as a trademark for your product,
|
||||
# or use the QuantumBlack Trademarks in any other manner that might cause
|
||||
# confusion in the marketplace, including but not limited to in advertising,
|
||||
# on websites, or on software.
|
||||
#
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Example code for the nodes in the example pipeline. This code is meant
|
||||
just for illustrating basic Kedro features.
|
||||
|
||||
Delete this when you start working on your own Kedro project.
|
||||
"""
|
||||
# pylint: disable=invalid-name
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def train_model(
|
||||
train_x: pd.DataFrame, train_y: pd.DataFrame, parameters: Dict[str, Any]
|
||||
) -> np.ndarray:
|
||||
"""Node for training a simple multi-class logistic regression model. The
|
||||
number of training iterations as well as the learning rate are taken from
|
||||
conf/project/parameters.yml. All of the data as well as the parameters
|
||||
will be provided to this function at the time of execution.
|
||||
"""
|
||||
num_iter = parameters["example_num_train_iter"]
|
||||
lr = parameters["example_learning_rate"]
|
||||
X = train_x.to_numpy()
|
||||
Y = train_y.to_numpy()
|
||||
|
||||
# Add bias to the features
|
||||
bias = np.ones((X.shape[0], 1))
|
||||
X = np.concatenate((bias, X), axis=1)
|
||||
|
||||
weights = []
|
||||
# Train one model for each class in Y
|
||||
for k in range(Y.shape[1]):
|
||||
# Initialise weights
|
||||
theta = np.zeros(X.shape[1])
|
||||
y = Y[:, k]
|
||||
for _ in range(num_iter):
|
||||
z = np.dot(X, theta)
|
||||
h = _sigmoid(z)
|
||||
gradient = np.dot(X.T, (h - y)) / y.size
|
||||
theta -= lr * gradient
|
||||
# Save the weights for each model
|
||||
weights.append(theta)
|
||||
|
||||
# Return a joint multi-class model with weights for all classes
|
||||
return np.vstack(weights).transpose()
|
||||
|
||||
|
||||
def predict(model: np.ndarray, test_x: pd.DataFrame) -> np.ndarray:
|
||||
"""Node for making predictions given a pre-trained model and a test set.
|
||||
"""
|
||||
X = test_x.to_numpy()
|
||||
|
||||
# Add bias to the features
|
||||
bias = np.ones((X.shape[0], 1))
|
||||
X = np.concatenate((bias, X), axis=1)
|
||||
|
||||
# Predict "probabilities" for each class
|
||||
result = _sigmoid(np.dot(X, model))
|
||||
|
||||
# Return the index of the class with max probability for all samples
|
||||
return np.argmax(result, axis=1)
|
||||
|
||||
|
||||
def report_accuracy(predictions: np.ndarray, test_y: pd.DataFrame) -> None:
|
||||
"""Node for reporting the accuracy of the predictions performed by the
|
||||
previous node. Notice that this function has no outputs, except logging.
|
||||
"""
|
||||
# Get true class index
|
||||
target = np.argmax(test_y.to_numpy(), axis=1)
|
||||
# Calculate accuracy of predictions
|
||||
accuracy = np.sum(predictions == target) / target.shape[0]
|
||||
# Log the accuracy of the model
|
||||
log = logging.getLogger(__name__)
|
||||
log.info("Model accuracy on test set: %0.2f%%", accuracy * 100)
|
||||
|
||||
|
||||
def _sigmoid(z):
|
||||
"""A helper sigmoid function used by the training and the scoring nodes."""
|
||||
return 1 / (1 + np.exp(-z))
|
||||
55
src/default_kedro_157/pipelines/data_science/pipeline.py
Normal file
55
src/default_kedro_157/pipelines/data_science/pipeline.py
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
# Copyright 2020 QuantumBlack Visual Analytics Limited
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
|
||||
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
|
||||
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
|
||||
# (either separately or in combination, "QuantumBlack Trademarks") are
|
||||
# trademarks of QuantumBlack. The License does not grant you any right or
|
||||
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
|
||||
# Trademarks or any confusingly similar mark as a trademark for your product,
|
||||
# or use the QuantumBlack Trademarks in any other manner that might cause
|
||||
# confusion in the marketplace, including but not limited to in advertising,
|
||||
# on websites, or on software.
|
||||
#
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Example code for the nodes in the example pipeline. This code is meant
|
||||
just for illustrating basic Kedro features.
|
||||
|
||||
Delete this when you start working on your own Kedro project.
|
||||
"""
|
||||
|
||||
from kedro.pipeline import Pipeline, node
|
||||
|
||||
from .nodes import predict, report_accuracy, train_model
|
||||
|
||||
|
||||
def create_pipeline(**kwargs):
|
||||
return Pipeline(
|
||||
[
|
||||
node(
|
||||
train_model,
|
||||
["example_train_x", "example_train_y", "parameters"],
|
||||
"example_model",
|
||||
),
|
||||
node(
|
||||
predict,
|
||||
dict(model="example_model", test_x="example_test_x"),
|
||||
"example_predictions",
|
||||
),
|
||||
node(report_accuracy, ["example_predictions", "example_test_y"], None),
|
||||
]
|
||||
)
|
||||
61
src/default_kedro_157/run.py
Normal file
61
src/default_kedro_157/run.py
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
# Copyright 2020 QuantumBlack Visual Analytics Limited
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
|
||||
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
|
||||
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
|
||||
# (either separately or in combination, "QuantumBlack Trademarks") are
|
||||
# trademarks of QuantumBlack. The License does not grant you any right or
|
||||
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
|
||||
# Trademarks or any confusingly similar mark as a trademark for your product,
|
||||
# or use the QuantumBlack Trademarks in any other manner that might cause
|
||||
# confusion in the marketplace, including but not limited to in advertising,
|
||||
# on websites, or on software.
|
||||
#
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Application entry point."""
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
from kedro.context import KedroContext, load_context
|
||||
from kedro.pipeline import Pipeline
|
||||
|
||||
from default_kedro_157.pipeline import create_pipelines
|
||||
|
||||
|
||||
class ProjectContext(KedroContext):
|
||||
"""Users can override the remaining methods from the parent class here,
|
||||
or create new ones (e.g. as required by plugins)
|
||||
"""
|
||||
|
||||
project_name = "Default Kedro 157"
|
||||
project_version = "0.15.7"
|
||||
|
||||
def _get_pipelines(self) -> Dict[str, Pipeline]:
|
||||
return create_pipelines()
|
||||
|
||||
|
||||
def run_package():
|
||||
# entry point for running pip-install projects
|
||||
# using `<project_package>` command
|
||||
project_context = load_context(Path.cwd())
|
||||
project_context.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# entry point for running pip-installed projects
|
||||
# using `python -m <project_package>.run` command
|
||||
run_package()
|
||||
Loading…
Add table
Add a link
Reference in a new issue