Skip to content

Commit 26ae5e7

Browse files
authored
chore: add tests for to_gbq with flexible column names (#1645)
* chore: add tests for `to_gbq` with flexible column names * just numbers
1 parent f67aa41 commit 26ae5e7

File tree

1 file changed

+83
-0
lines changed

1 file changed

+83
-0
lines changed

tests/system/small/test_dataframe_io.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,89 @@ def test_to_gbq_w_duplicate_column_names(
552552
)
553553

554554

555+
def test_to_gbq_w_flexible_column_names(
556+
scalars_df_index, dataset_id: str, bigquery_client
557+
):
558+
"""Test the `to_gbq` API when dealing with flexible column names.
559+
560+
This test is for BigQuery-backed storage nodes.
561+
562+
See: https://cloud.google.com/bigquery/docs/schemas#flexible-column-names
563+
"""
564+
destination_table = f"{dataset_id}.test_to_gbq_w_flexible_column_names"
565+
renamed_columns = {
566+
# First column in Japanese (tests unicode).
567+
"bool_col": "最初のカラム",
568+
"bytes_col": "col with space",
569+
# Dots aren't allowed in BigQuery column names, so these should be translated
570+
"date_col": "col.with.dots",
571+
"datetime_col": "col-with-hyphens",
572+
"geography_col": "1start_with_number",
573+
"int64_col": "col_with_underscore",
574+
# Just numbers.
575+
"int64_too": "123",
576+
}
577+
bf_df = scalars_df_index[renamed_columns.keys()].rename(columns=renamed_columns)
578+
assert list(bf_df.columns) == list(renamed_columns.values())
579+
bf_df.to_gbq(destination_table, index=False)
580+
581+
table = bigquery_client.get_table(destination_table)
582+
columns = [field.name for field in table.schema]
583+
assert columns == [
584+
"最初のカラム",
585+
"col with space",
586+
# Dots aren't allowed in BigQuery column names, so these should be translated
587+
"col_with_dots",
588+
"col-with-hyphens",
589+
"1start_with_number",
590+
"col_with_underscore",
591+
"123",
592+
]
593+
594+
595+
def test_to_gbq_w_flexible_column_names_local_node(
596+
session, dataset_id: str, bigquery_client
597+
):
598+
"""Test the `to_gbq` API when dealing with flexible column names.
599+
600+
This test is for local nodes, e.g. read_pandas(), since those may go through
601+
a different code path compared to data that starts in BigQuery.
602+
603+
See: https://cloud.google.com/bigquery/docs/schemas#flexible-column-names
604+
"""
605+
destination_table = f"{dataset_id}.test_to_gbq_w_flexible_column_names_local_node"
606+
607+
data = {
608+
# First column in Japanese (tests unicode).
609+
"最初のカラム": [1, 2, 3],
610+
"col with space": [4, 5, 6],
611+
# Dots aren't allowed in BigQuery column names, so these should be translated
612+
"col.with.dots": [7, 8, 9],
613+
"col-with-hyphens": [10, 11, 12],
614+
"1start_with_number": [13, 14, 15],
615+
"col_with_underscore": [16, 17, 18],
616+
"123": [19, 20, 21],
617+
}
618+
pd_df = pd.DataFrame(data)
619+
assert list(pd_df.columns) == list(data.keys())
620+
bf_df = session.read_pandas(pd_df)
621+
assert list(bf_df.columns) == list(data.keys())
622+
bf_df.to_gbq(destination_table, index=False)
623+
624+
table = bigquery_client.get_table(destination_table)
625+
columns = [field.name for field in table.schema]
626+
assert columns == [
627+
"最初のカラム",
628+
"col with space",
629+
# Dots aren't allowed in BigQuery column names, so these should be translated
630+
"col_with_dots",
631+
"col-with-hyphens",
632+
"1start_with_number",
633+
"col_with_underscore",
634+
"123",
635+
]
636+
637+
555638
def test_to_gbq_w_None_column_names(
556639
scalars_df_index, scalars_pandas_df_index, dataset_id
557640
):

0 commit comments

Comments
 (0)