feat: Added tabular forecasting samples (#128)

ivanmkc · Ivan Cheung · web-flow · commit 69fc7fd415e1 · 2020-12-23T05:58:01.000+09:00
* Added predict, get_model_evaluation and create_training_pipeline samples for AutoML Forecasting

* Added param handlers

* Added headers manually

* fix: Improved forecasting sample

* Added forecasting test

* Added tests for predict and get_model_evaluation

* fix: Fixed create_training_pipeline_sample

* feat: Added list_model_evaluations_tabular_forecasting_sample and test, fixed get_model_evaluation_tabular_forecasting_sample, and fixed create_training_pipeline_tabular_forecasting_sample

* fix: Reverted back to generated BUILD_SPECIFIC_GCLOUD_PROJECT

* fix: Fixed name of test

* fix: Fixed lint errors

* fix: Fixed assertion

* fix: Removed predict samples

* Consolidated samples

* fix: Removed list_model_evaluations_tabular_forecasting

* fix: tweaks

Co-authored-by: Ivan Cheung &lt;ivanmkc@google.com&gt;
diff --git a/.sample_configs/param_handlers/create_training_pipeline_tabular_forecasting_sample.py b/.sample_configs/param_handlers/create_training_pipeline_tabular_forecasting_sample.py
@@ -0,0 +1,80 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+def make_parent(parent: str) -> str:
+    parent = parent
+
+    return parent
+
+
+def make_training_pipeline(
+    display_name: str,
+    dataset_id: str,
+    model_display_name: str,
+    target_column: str,
+    time_series_identifier_column: str,
+    time_column: str,
+    static_columns: str,
+    time_variant_past_only_columns: str,
+    time_variant_past_and_future_columns: str,
+    forecast_window_end: int,
+) -> google.cloud.aiplatform_v1alpha1.types.training_pipeline.TrainingPipeline:
+    # set the columns used for training and their data types
+    transformations = [
+        {"auto": {"column_name": "date"}},
+        {"auto": {"column_name": "state_name"}},
+        {"auto": {"column_name": "county_fips_code"}},
+        {"auto": {"column_name": "confirmed_cases"}},
+        {"auto": {"column_name": "deaths"}},
+    ]
+
+    period = {"unit": "day", "quantity": 1}
+
+    # the inputs should be formatted according to the training_task_definition yaml file
+    training_task_inputs_dict = {
+        # required inputs
+        "targetColumn": target_column,
+        "timeSeriesIdentifierColumn": time_series_identifier_column,
+        "timeColumn": time_column,
+        "transformations": transformations,
+        "period": period,
+        "optimizationObjective": "minimize-rmse",
+        "trainBudgetMilliNodeHours": 8000,
+        "staticColumns": static_columns,
+        "timeVariantPastOnlyColumns": time_variant_past_only_columns,
+        "timeVariantPastAndFutureColumns": time_variant_past_and_future_columns,
+        "forecastWindowEnd": forecast_window_end,
+    }
+
+    training_task_inputs = to_protobuf_value(training_task_inputs_dict)
+
+    training_pipeline = {
+        "display_name": display_name,
+        "training_task_definition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_forecasting_1.0.0.yaml",
+        "training_task_inputs": training_task_inputs,
+        "input_data_config": {
+            "dataset_id": dataset_id,
+            "fraction_split": {
+                "training_fraction": 0.8,
+                "validation_fraction": 0.1,
+                "test_fraction": 0.1,
+            },
+        },
+        "model_to_upload": {"display_name": model_display_name},
+    }
+
+    return training_pipeline
+
diff --git a/.sample_configs/process_configs.yaml b/.sample_configs/process_configs.yaml
@@ -19,6 +19,7 @@ create_batch_prediction_job_custom_image_explain_sample: {}
 create_batch_prediction_job_custom_tabular_explain_sample: {}
 create_batch_prediction_job_sample: {}
 create_batch_prediction_job_tabular_explain_sample: {}
+create_batch_prediction_job_tabular_forecasting_sample: {}
 create_batch_prediction_job_text_classification_sample: {}
 create_batch_prediction_job_text_entity_extraction_sample: {}
 create_batch_prediction_job_text_sentiment_analysis_sample: {}
@@ -77,6 +78,7 @@ create_training_pipeline_image_object_detection_sample:
     training_task_inputs_dict: trainingjob.definition.AutoMlImageObjectDetectionInputs
 create_training_pipeline_sample: {}
 create_training_pipeline_tabular_classification_sample: {}
+create_training_pipeline_tabular_forecasting_sample: {}
 create_training_pipeline_tabular_regression_sample: {}
 create_training_pipeline_text_classification_sample:
   schema_types:
@@ -168,6 +170,7 @@ get_model_evaluation_sample:
   - model_explanation
 get_model_evaluation_slice_sample: {}
 get_model_evaluation_tabular_classification_sample: {}
+get_model_evaluation_tabular_forecasting_sample: {}
 get_model_evaluation_tabular_regression_sample: {}
 get_model_evaluation_text_classification_sample:
   skip:
@@ -232,6 +235,7 @@ list_endpoints_sample: {}
 list_hyperparameter_tuning_jobs_sample: {}
 list_model_evaluation_slices_sample: {}
 list_model_evaluations_sample: {}
+list_model_evaluations_tabular_forecasting_sample: {}
 list_models_sample: {}
 list_specialist_pools_sample: {}
 list_training_pipelines_sample: {}
@@ -274,6 +278,7 @@ predict_tabular_classification_sample:
   comments:
     predictions: See gs://google-cloud-aiplatform/schema/predict/prediction/tables_classification.yaml
       for the format of the predictions.
+predict_tabular_forecasting_sample: {}
 predict_tabular_regression_sample:
   api_endpoint: us-central1-prediction-aiplatform.googleapis.com
   max_depth: 1
diff --git a/.sample_configs/variants.yaml b/.sample_configs/variants.yaml
@@ -22,6 +22,7 @@ create_batch_prediction_job:
 - custom_image_explain
 - custom_tabular_explain
 - tabular_explain
+- tabular_forecasting
 - text_classification
 - text_entity_extraction
 - text_sentiment_analysis
@@ -59,6 +60,7 @@ create_training_pipeline:
 - image_classification
 - image_object_detection
 - tabular_classification
+- tabular_forecasting
 - tabular_regression
 - text_classification
 - text_entity_extraction
diff --git a/samples/snippets/create_training_pipeline_tabular_forecasting_sample.py b/samples/snippets/create_training_pipeline_tabular_forecasting_sample.py
@@ -0,0 +1,90 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# [START aiplatform_create_training_pipeline_tabular_forecasting_sample]
+from google.cloud import aiplatform
+from google.protobuf import json_format
+from google.protobuf.struct_pb2 import Value
+
+
+def create_training_pipeline_tabular_forecasting_sample(
+    project: str,
+    display_name: str,
+    dataset_id: str,
+    model_display_name: str,
+    target_column: str,
+    time_series_identifier_column: str,
+    time_column: str,
+    static_columns: str,
+    time_variant_past_only_columns: str,
+    time_variant_past_and_future_columns: str,
+    forecast_window_end: int,
+    location: str = "us-central1",
+    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
+):
+    client_options = {"api_endpoint": api_endpoint}
+    # Initialize client that will be used to create and send requests.
+    # This client only needs to be created once, and can be reused for multiple requests.
+    client = aiplatform.gapic.PipelineServiceClient(client_options=client_options)
+    # set the columns used for training and their data types
+    transformations = [
+        {"auto": {"column_name": "date"}},
+        {"auto": {"column_name": "state_name"}},
+        {"auto": {"column_name": "county_fips_code"}},
+        {"auto": {"column_name": "confirmed_cases"}},
+        {"auto": {"column_name": "deaths"}},
+    ]
+
+    period = {"unit": "day", "quantity": 1}
+
+    # the inputs should be formatted according to the training_task_definition yaml file
+    training_task_inputs_dict = {
+        # required inputs
+        "targetColumn": target_column,
+        "timeSeriesIdentifierColumn": time_series_identifier_column,
+        "timeColumn": time_column,
+        "transformations": transformations,
+        "period": period,
+        "optimizationObjective": "minimize-rmse",
+        "trainBudgetMilliNodeHours": 8000,
+        "staticColumns": static_columns,
+        "timeVariantPastOnlyColumns": time_variant_past_only_columns,
+        "timeVariantPastAndFutureColumns": time_variant_past_and_future_columns,
+        "forecastWindowEnd": forecast_window_end,
+    }
+
+    training_task_inputs = json_format.ParseDict(training_task_inputs_dict, Value())
+
+    training_pipeline = {
+        "display_name": display_name,
+        "training_task_definition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_forecasting_1.0.0.yaml",
+        "training_task_inputs": training_task_inputs,
+        "input_data_config": {
+            "dataset_id": dataset_id,
+            "fraction_split": {
+                "training_fraction": 0.8,
+                "validation_fraction": 0.1,
+                "test_fraction": 0.1,
+            },
+        },
+        "model_to_upload": {"display_name": model_display_name},
+    }
+    parent = f"projects/{project}/locations/{location}"
+    response = client.create_training_pipeline(
+        parent=parent, training_pipeline=training_pipeline
+    )
+    print("response:", response)
+
+
+# [END aiplatform_create_training_pipeline_tabular_forecasting_sample]
diff --git a/samples/snippets/create_training_pipeline_tabular_forecasting_sample_test.py b/samples/snippets/create_training_pipeline_tabular_forecasting_sample_test.py
@@ -0,0 +1,87 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from uuid import uuid4
+
+from google.cloud import aiplatform
+import pytest
+
+import cancel_training_pipeline_sample
+import create_training_pipeline_tabular_forecasting_sample
+import delete_training_pipeline_sample
+import helpers
+
+PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT")
+DATASET_ID = "3003302817130610688"  # COVID Dataset
+DISPLAY_NAME = f"temp_create_training_pipeline_test_{uuid4()}"
+TARGET_COLUMN = "deaths"
+PREDICTION_TYPE = "forecasting"
+
+
+@pytest.fixture
+def shared_state():
+    state = {}
+    yield state
+
+
+@pytest.fixture(scope="function", autouse=True)
+def teardown(shared_state):
+    yield
+
+    training_pipeline_id = shared_state["training_pipeline_name"].split("/")[-1]
+
+    # Stop the training pipeline
+    cancel_training_pipeline_sample.cancel_training_pipeline_sample(
+        project=PROJECT_ID, training_pipeline_id=training_pipeline_id
+    )
+
+    client_options = {"api_endpoint": "us-central1-aiplatform.googleapis.com"}
+    pipeline_client = aiplatform.gapic.PipelineServiceClient(
+        client_options=client_options
+    )
+
+    # Waiting for training pipeline to be in CANCELLED state
+    helpers.wait_for_job_state(
+        get_job_method=pipeline_client.get_training_pipeline,
+        name=shared_state["training_pipeline_name"],
+    )
+
+    # Delete the training pipeline
+    delete_training_pipeline_sample.delete_training_pipeline_sample(
+        project=PROJECT_ID, training_pipeline_id=training_pipeline_id
+    )
+
+
+def test_ucaip_generated_create_training_pipeline_sample(capsys, shared_state):
+
+    create_training_pipeline_tabular_forecasting_sample.create_training_pipeline_tabular_forecasting_sample(
+        project=PROJECT_ID,
+        display_name=DISPLAY_NAME,
+        dataset_id=DATASET_ID,
+        model_display_name="permanent_tabular_forecasting_model",
+        target_column=TARGET_COLUMN,
+        time_series_identifier_column="county",
+        time_column="date",
+        static_columns=["state_name"],
+        time_variant_past_only_columns=["deaths"],
+        time_variant_past_and_future_columns=["date"],
+        forecast_window_end=10,
+    )
+
+    out, _ = capsys.readouterr()
+    assert "response:" in out
+
+    # Save resource name of the newly created training pipeline
+    shared_state["training_pipeline_name"] = helpers.get_name(out)