Skip to content

Commit 833b953

Browse files
feat: sample - add col load append (#112)
* feat: new sample - Add Column Load Append * feat: add column load append work in progress * update comment * updates * updates * fix build issue * update based on comments * update based on comments * update exception handling * update exception handling
1 parent eafa6ba commit 833b953

File tree

4 files changed

+203
-0
lines changed

4 files changed

+203
-0
lines changed
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.bigquery;
18+
19+
// [START bigquery_relax_column_load_append]
20+
import com.google.cloud.bigquery.BigQuery;
21+
import com.google.cloud.bigquery.BigQueryException;
22+
import com.google.cloud.bigquery.BigQueryOptions;
23+
import com.google.cloud.bigquery.Field;
24+
import com.google.cloud.bigquery.FormatOptions;
25+
import com.google.cloud.bigquery.Job;
26+
import com.google.cloud.bigquery.JobId;
27+
import com.google.cloud.bigquery.JobInfo;
28+
import com.google.cloud.bigquery.JobInfo.SchemaUpdateOption;
29+
import com.google.cloud.bigquery.JobInfo.WriteDisposition;
30+
import com.google.cloud.bigquery.LegacySQLTypeName;
31+
import com.google.cloud.bigquery.LoadJobConfiguration;
32+
import com.google.cloud.bigquery.Schema;
33+
import com.google.cloud.bigquery.Table;
34+
import com.google.cloud.bigquery.TableId;
35+
import com.google.common.collect.ImmutableList;
36+
import java.util.UUID;
37+
38+
public class AddColumnLoadAppend {
39+
40+
public static void runAddColumnLoadAppend() throws Exception {
41+
// TODO(developer): Replace these variables before running the sample.
42+
String datasetName = "MY_DATASET_NAME";
43+
String tableName = "MY_TABLE_NAME";
44+
String sourceUri = "/path/to/file.csv";
45+
addColumnLoadAppend(datasetName, tableName, sourceUri);
46+
}
47+
48+
public static void addColumnLoadAppend(String datasetName, String tableName, String sourceUri)
49+
throws Exception {
50+
try {
51+
// Initialize client that will be used to send requests. This client only needs to be created
52+
// once, and can be reused for multiple requests.
53+
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
54+
55+
TableId tableId = TableId.of(datasetName, tableName);
56+
Table table = bigquery.getTable(tableId);
57+
58+
// Add a new column to a BigQuery table while appending rows via a load job.
59+
// 'REQUIRED' fields cannot be added to an existing schema, so the additional column must be
60+
// 'NULLABLE'.
61+
Schema newSchema =
62+
Schema.of(
63+
Field.newBuilder("name", LegacySQLTypeName.STRING)
64+
.setMode(Field.Mode.REQUIRED)
65+
.build(),
66+
// Adding below additional column during the load job
67+
Field.newBuilder("post_abbr", LegacySQLTypeName.STRING)
68+
.setMode(Field.Mode.NULLABLE)
69+
.build());
70+
71+
LoadJobConfiguration loadJobConfig =
72+
LoadJobConfiguration.builder(tableId, sourceUri)
73+
.setFormatOptions(FormatOptions.csv())
74+
.setWriteDisposition(WriteDisposition.WRITE_APPEND)
75+
.setSchema(newSchema)
76+
.setSchemaUpdateOptions(ImmutableList.of(SchemaUpdateOption.ALLOW_FIELD_ADDITION))
77+
.build();
78+
79+
// Create a job ID so that we can safely retry.
80+
JobId jobId = JobId.of(UUID.randomUUID().toString());
81+
Job loadJob = bigquery.create(JobInfo.newBuilder(loadJobConfig).setJobId(jobId).build());
82+
System.out.println(loadJob.getJobId());
83+
84+
// Load data from a GCS parquet file into the table
85+
// Blocks until this load table job completes its execution, either failing or succeeding.
86+
Job completedJob = loadJob.waitFor();
87+
88+
// Check for errors
89+
if (completedJob == null) {
90+
throw new Exception("Job not executed since it no longer exists.");
91+
} else if (completedJob.getStatus().getError() != null) {
92+
// You can also look at queryJob.getStatus().getExecutionErrors() for all
93+
// errors, not just the latest one.
94+
throw new Exception(
95+
"BigQuery was unable to load into the table due to an error: \n"
96+
+ loadJob.getStatus().getError());
97+
}
98+
System.out.println("Column successfully added during load append job");
99+
} catch (BigQueryException | InterruptedException e) {
100+
System.out.println("Column not added during load append \n" + e.toString());
101+
}
102+
}
103+
}
104+
// [END bigquery_relax_column_load_append]
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.bigquery;
18+
19+
import static junit.framework.TestCase.assertNotNull;
20+
21+
import com.google.cloud.bigquery.Field;
22+
import com.google.cloud.bigquery.LegacySQLTypeName;
23+
import com.google.cloud.bigquery.Schema;
24+
import java.io.ByteArrayOutputStream;
25+
import java.io.PrintStream;
26+
import org.junit.After;
27+
import org.junit.Before;
28+
import org.junit.BeforeClass;
29+
import org.junit.Test;
30+
31+
public class AddColumnLoadAppendIT {
32+
private ByteArrayOutputStream bout;
33+
private PrintStream out;
34+
35+
private static final String BIGQUERY_DATASET_NAME = System.getenv("BIGQUERY_DATASET_NAME");
36+
37+
private static void requireEnvVar(String varName) {
38+
assertNotNull(
39+
"Environment variable " + varName + " is required to perform these tests.",
40+
System.getenv(varName));
41+
}
42+
43+
@BeforeClass
44+
public static void checkRequirements() {
45+
requireEnvVar("BIGQUERY_DATASET_NAME");
46+
}
47+
48+
@Before
49+
public void setUp() {
50+
bout = new ByteArrayOutputStream();
51+
out = new PrintStream(bout);
52+
System.setOut(out);
53+
}
54+
55+
@After
56+
public void tearDown() {
57+
System.setOut(null);
58+
}
59+
60+
@Test
61+
public void testAddColumnLoadAppend() throws Exception {
62+
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv";
63+
64+
String tableName = "ADD_COLUMN_LOAD_APPEND_TEST";
65+
Schema originalSchema =
66+
Schema.of(
67+
Field.newBuilder("name", LegacySQLTypeName.STRING)
68+
.setMode(Field.Mode.REQUIRED)
69+
.build());
70+
71+
CreateTable.createTable(BIGQUERY_DATASET_NAME, tableName, originalSchema);
72+
73+
AddColumnLoadAppend.addColumnLoadAppend(BIGQUERY_DATASET_NAME, tableName, sourceUri);
74+
75+
// Clean up
76+
DeleteTable.deleteTable(BIGQUERY_DATASET_NAME, tableName);
77+
}
78+
}

samples/src/test/java/com/example/bigquery/CreateDatasetIT.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,33 @@
1717
package com.example.bigquery;
1818

1919
import static com.google.common.truth.Truth.assertThat;
20+
import static junit.framework.TestCase.assertNotNull;
2021

2122
import com.google.cloud.bigquery.testing.RemoteBigQueryHelper;
2223
import java.io.ByteArrayOutputStream;
2324
import java.io.PrintStream;
2425
import org.junit.After;
2526
import org.junit.Before;
27+
import org.junit.BeforeClass;
2628
import org.junit.Test;
2729

2830
public class CreateDatasetIT {
2931
private ByteArrayOutputStream bout;
3032
private PrintStream out;
3133

34+
private static final String GOOGLE_CLOUD_PROJECT = System.getenv("GOOGLE_CLOUD_PROJECT");
35+
36+
private static void requireEnvVar(String varName) {
37+
assertNotNull(
38+
"Environment variable " + varName + " is required to perform these tests.",
39+
System.getenv(varName));
40+
}
41+
42+
@BeforeClass
43+
public static void checkRequirements() {
44+
requireEnvVar("GOOGLE_CLOUD_PROJECT");
45+
}
46+
3247
@Before
3348
public void setUp() {
3449
bout = new ByteArrayOutputStream();
@@ -46,5 +61,8 @@ public void testCreateDataset() {
4661
String generatedDatasetName = RemoteBigQueryHelper.generateDatasetName();
4762
CreateDataset.createDataset(generatedDatasetName);
4863
assertThat(bout.toString()).contains(generatedDatasetName + " created successfully");
64+
65+
// Clean up
66+
DeleteDataset.deleteDataset(GOOGLE_CLOUD_PROJECT, generatedDatasetName);
4967
}
5068
}

samples/src/test/java/com/example/bigquery/CreateTableIT.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,5 +60,8 @@ public void testCreateTable() {
6060
CreateTable.createTable(generatedDatasetName, tableName, schema);
6161

6262
assertThat(bout.toString()).contains("Table created successfully");
63+
64+
// Clean up
65+
DeleteTable.deleteTable(generatedDatasetName, tableName);
6366
}
6467
}

0 commit comments

Comments
 (0)