Skip to content

Commit 7daacd5

Browse files
authored
feat: add data_labeling samples (#78)
1 parent f79c0f4 commit 7daacd5

6 files changed

+465
-0
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# [START aiplatform_create_data_labeling_job_active_learning_sample]
16+
from google.cloud import aiplatform
17+
from google.protobuf import json_format
18+
from google.protobuf.struct_pb2 import Value
19+
20+
21+
def create_data_labeling_job_active_learning_sample(
22+
project: str,
23+
display_name: str,
24+
dataset: str,
25+
instruction_uri: str,
26+
inputs_schema_uri: str,
27+
annotation_spec: str,
28+
location: str = "us-central1",
29+
api_endpoint: str = "us-central1-aiplatform.googleapis.com",
30+
):
31+
client_options = {"api_endpoint": api_endpoint}
32+
# Initialize client that will be used to create and send requests.
33+
# This client only needs to be created once, and can be reused for multiple requests.
34+
client = aiplatform.gapic.JobServiceClient(client_options=client_options)
35+
inputs_dict = {"annotation_specs": [annotation_spec]}
36+
inputs = json_format.ParseDict(inputs_dict, Value())
37+
38+
active_learning_config = {"max_data_item_count": 1}
39+
40+
data_labeling_job = {
41+
"display_name": display_name,
42+
# Full resource name: projects/{project}/locations/{location}/datasets/{dataset_id}
43+
"datasets": [dataset],
44+
"labeler_count": 1,
45+
"instruction_uri": instruction_uri,
46+
"inputs_schema_uri": inputs_schema_uri,
47+
"inputs": inputs,
48+
"annotation_labels": {
49+
"aiplatform.googleapis.com/annotation_set_name": "data_labeling_job_active_learning"
50+
},
51+
"active_learning_config": active_learning_config,
52+
}
53+
parent = f"projects/{project}/locations/{location}"
54+
response = client.create_data_labeling_job(
55+
parent=parent, data_labeling_job=data_labeling_job
56+
)
57+
print("response:", response)
58+
59+
60+
# [END aiplatform_create_data_labeling_job_active_learning_sample]
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pytest
16+
import os
17+
import uuid
18+
from google.cloud import aiplatform
19+
20+
import helpers
21+
22+
import create_data_labeling_job_active_learning_sample
23+
24+
API_ENDPOINT = os.getenv("DATA_LABELING_API_ENDPOINT")
25+
PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT")
26+
LOCATION = "us-central1"
27+
DATASET_ID = "1905673553261363200"
28+
INPUTS_SCHEMA_URI = "gs://google-cloud-aiplatform/schema/datalabelingjob/inputs/image_classification_1.0.0.yaml"
29+
DISPLAY_NAME = f"temp_create_data_labeling_job_active_learning_test_{uuid.uuid4()}"
30+
31+
INSTRUCTIONS_GCS_URI = (
32+
"gs://ucaip-sample-resources/images/datalabeling_instructions.pdf"
33+
)
34+
ANNOTATION_SPEC = "rose"
35+
36+
37+
@pytest.fixture
38+
def shared_state():
39+
state = {}
40+
yield state
41+
42+
43+
@pytest.fixture
44+
def job_client():
45+
client_options = {"api_endpoint": API_ENDPOINT}
46+
job_client = aiplatform.gapic.JobServiceClient(client_options=client_options)
47+
yield job_client
48+
49+
50+
@pytest.fixture(scope="function", autouse=True)
51+
def teardown(capsys, shared_state, job_client):
52+
yield
53+
54+
job_client.cancel_data_labeling_job(name=shared_state["data_labeling_job_name"])
55+
56+
# Verify Data Labelling Job is cancelled, or timeout after 400 seconds
57+
helpers.wait_for_job_state(
58+
get_job_method=job_client.get_data_labeling_job,
59+
name=shared_state["data_labeling_job_name"],
60+
timeout=400,
61+
freq=10,
62+
)
63+
64+
# Delete the data labeling job
65+
response = job_client.delete_data_labeling_job(
66+
name=shared_state["data_labeling_job_name"]
67+
)
68+
69+
print("Delete LRO:", response.operation.name)
70+
delete_data_labeling_job_response = response.result(timeout=300)
71+
print("delete_data_labeling_job_response", delete_data_labeling_job_response)
72+
73+
out, _ = capsys.readouterr()
74+
assert "delete_data_labeling_job_response" in out
75+
76+
77+
# Creating a data labeling job for images
78+
def test_create_data_labeling_job_active_learning_sample(capsys, shared_state):
79+
80+
create_data_labeling_job_active_learning_sample.create_data_labeling_job_active_learning_sample(
81+
project=PROJECT_ID,
82+
display_name=DISPLAY_NAME,
83+
dataset=f"projects/{PROJECT_ID}/locations/{LOCATION}/datasets/{DATASET_ID}",
84+
instruction_uri=INSTRUCTIONS_GCS_URI,
85+
inputs_schema_uri=INPUTS_SCHEMA_URI,
86+
annotation_spec=ANNOTATION_SPEC,
87+
api_endpoint=API_ENDPOINT,
88+
)
89+
90+
out, _ = capsys.readouterr()
91+
92+
# Save resource name of the newly created data labeing job
93+
shared_state["data_labeling_job_name"] = helpers.get_name(out)
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# [START aiplatform_create_data_labeling_job_image_segmentation_sample]
16+
from google.cloud import aiplatform
17+
from google.protobuf import json_format
18+
from google.protobuf.struct_pb2 import Value
19+
20+
21+
def create_data_labeling_job_image_segmentation_sample(
22+
project: str,
23+
display_name: str,
24+
dataset: str,
25+
instruction_uri: str,
26+
inputs_schema_uri: str,
27+
annotation_spec: dict,
28+
annotation_set_name: str,
29+
location: str = "us-central1",
30+
api_endpoint: str = "us-central1-aiplatform.googleapis.com",
31+
):
32+
client_options = {"api_endpoint": api_endpoint}
33+
# Initialize client that will be used to create and send requests.
34+
# This client only needs to be created once, and can be reused for multiple requests.
35+
client = aiplatform.gapic.JobServiceClient(client_options=client_options)
36+
inputs_dict = {"annotationSpecColors": [annotation_spec]}
37+
inputs = json_format.ParseDict(inputs_dict, Value())
38+
39+
data_labeling_job = {
40+
"display_name": display_name,
41+
# Full resource name: projects/{project}/locations/{location}/datasets/{dataset_id}
42+
"datasets": [dataset],
43+
"labeler_count": 1,
44+
"instruction_uri": instruction_uri,
45+
"inputs_schema_uri": inputs_schema_uri,
46+
"inputs": inputs,
47+
"annotation_labels": {
48+
"aiplatform.googleapis.com/annotation_set_name": annotation_set_name
49+
},
50+
}
51+
parent = f"projects/{project}/locations/{location}"
52+
response = client.create_data_labeling_job(
53+
parent=parent, data_labeling_job=data_labeling_job
54+
)
55+
print("response:", response)
56+
57+
58+
# [END aiplatform_create_data_labeling_job_image_segmentation_sample]
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pytest
16+
import os
17+
import uuid
18+
from google.cloud import aiplatform
19+
20+
import helpers
21+
22+
import create_data_labeling_job_image_segmentation_sample
23+
24+
API_ENDPOINT = os.getenv("DATA_LABELING_API_ENDPOINT")
25+
PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT")
26+
LOCATION = "us-central1"
27+
DATASET_ID = "5111009432972558336"
28+
INPUTS_SCHEMA_URI = "gs://google-cloud-aiplatform/schema/datalabelingjob/inputs/image_segmentation_1.0.0.yaml"
29+
DISPLAY_NAME = f"temp_create_data_labeling_job_image_segmentation_test_{uuid.uuid4()}"
30+
31+
INSTRUCTIONS_GCS_URI = (
32+
"gs://ucaip-sample-resources/images/datalabeling_instructions.pdf"
33+
)
34+
ANNOTATION_SPEC = {"color": {"red": 1.0}, "displayName": "rose"}
35+
ANNOTATION_SET_NAME = f"temp_image_segmentation_{uuid.uuid4()}"
36+
37+
@pytest.fixture
38+
def shared_state():
39+
state = {}
40+
yield state
41+
42+
43+
@pytest.fixture
44+
def job_client():
45+
client_options = {"api_endpoint": API_ENDPOINT}
46+
job_client = aiplatform.gapic.JobServiceClient(client_options=client_options)
47+
yield job_client
48+
49+
50+
@pytest.fixture(scope="function", autouse=True)
51+
def teardown(capsys, shared_state, job_client):
52+
yield
53+
54+
job_client.cancel_data_labeling_job(name=shared_state["data_labeling_job_name"])
55+
56+
# Verify Data Labelling Job is cancelled, or timeout after 400 seconds
57+
helpers.wait_for_job_state(
58+
get_job_method=job_client.get_data_labeling_job,
59+
name=shared_state["data_labeling_job_name"],
60+
timeout=400,
61+
freq=10,
62+
)
63+
64+
# Delete the data labeling job
65+
response = job_client.delete_data_labeling_job(
66+
name=shared_state["data_labeling_job_name"]
67+
)
68+
69+
print("Delete LRO:", response.operation.name)
70+
delete_data_labeling_job_response = response.result(timeout=300)
71+
print("delete_data_labeling_job_response", delete_data_labeling_job_response)
72+
73+
out, _ = capsys.readouterr()
74+
assert "delete_data_labeling_job_response" in out
75+
76+
77+
# Creating a data labeling job for images
78+
def test_create_data_labeling_job_image_segmentation_sample(capsys, shared_state):
79+
80+
dataset = f"projects/{PROJECT_ID}/locations/{LOCATION}/datasets/{DATASET_ID}"
81+
82+
create_data_labeling_job_image_segmentation_sample.create_data_labeling_job_image_segmentation_sample(
83+
project=PROJECT_ID,
84+
display_name=DISPLAY_NAME,
85+
dataset=dataset,
86+
instruction_uri=INSTRUCTIONS_GCS_URI,
87+
inputs_schema_uri=INPUTS_SCHEMA_URI,
88+
annotation_spec=ANNOTATION_SPEC,
89+
annotation_set_name=ANNOTATION_SET_NAME,
90+
api_endpoint=API_ENDPOINT,
91+
)
92+
93+
out, _ = capsys.readouterr()
94+
95+
# Save resource name of the newly created data labeing job
96+
shared_state["data_labeling_job_name"] = helpers.get_name(out)
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# [START aiplatform_create_data_labeling_job_specialist_pool_sample]
16+
from google.cloud import aiplatform
17+
from google.protobuf import json_format
18+
from google.protobuf.struct_pb2 import Value
19+
20+
21+
def create_data_labeling_job_specialist_pool_sample(
22+
project: str,
23+
display_name: str,
24+
dataset: str,
25+
specialist_pool: str,
26+
instruction_uri: str,
27+
inputs_schema_uri: str,
28+
annotation_spec: str,
29+
location: str = "us-central1",
30+
api_endpoint: str = "us-central1-aiplatform.googleapis.com",
31+
):
32+
client_options = {"api_endpoint": api_endpoint}
33+
# Initialize client that will be used to create and send requests.
34+
# This client only needs to be created once, and can be reused for multiple requests.
35+
client = aiplatform.gapic.JobServiceClient(client_options=client_options)
36+
inputs_dict = {"annotation_specs": [annotation_spec]}
37+
inputs = json_format.ParseDict(inputs_dict, Value())
38+
39+
data_labeling_job = {
40+
"display_name": display_name,
41+
# Full resource name: projects/{project}/locations/{location}/datasets/{dataset_id}
42+
"datasets": [dataset],
43+
"labeler_count": 1,
44+
"instruction_uri": instruction_uri,
45+
"inputs_schema_uri": inputs_schema_uri,
46+
"inputs": inputs,
47+
"annotation_labels": {
48+
"aiplatform.googleapis.com/annotation_set_name": "data_labeling_job_specialist_pool"
49+
},
50+
# Full resource name: projects/{project}/locations/{location}/specialistPools/{specialist_pool_id}
51+
"specialist_pools": [specialist_pool],
52+
}
53+
parent = f"projects/{project}/locations/{location}"
54+
response = client.create_data_labeling_job(
55+
parent=parent, data_labeling_job=data_labeling_job
56+
)
57+
print("response:", response)
58+
59+
60+
# [END aiplatform_create_data_labeling_job_specialist_pool_sample]

0 commit comments

Comments
 (0)