@@ -73,20 +73,28 @@ def column_names(self) -> List[str]:
73
73
gcs_source_uris .sort ()
74
74
75
75
# Get the first file in sorted list
76
- return TabularDataset ._retrieve_gcs_source_columns (
77
- self .project , gcs_source_uris [0 ]
76
+ return self ._retrieve_gcs_source_columns (
77
+ project = self .project ,
78
+ gcs_csv_file_path = gcs_source_uris [0 ],
79
+ credentials = self .credentials ,
78
80
)
79
81
elif bq_source :
80
82
bq_table_uri = bq_source .get ("uri" )
81
83
if bq_table_uri :
82
- return TabularDataset ._retrieve_bq_source_columns (
83
- self .project , bq_table_uri
84
+ return self ._retrieve_bq_source_columns (
85
+ project = self .project ,
86
+ bq_table_uri = bq_table_uri ,
87
+ credentials = self .credentials ,
84
88
)
85
89
86
90
raise RuntimeError ("No valid CSV or BigQuery datasource found." )
87
91
88
92
@staticmethod
89
- def _retrieve_gcs_source_columns (project : str , gcs_csv_file_path : str ) -> List [str ]:
93
+ def _retrieve_gcs_source_columns (
94
+ project : str ,
95
+ gcs_csv_file_path : str ,
96
+ credentials : Optional [auth_credentials .Credentials ] = None ,
97
+ ) -> List [str ]:
90
98
"""Retrieve the columns from a comma-delimited CSV file stored on Google Cloud Storage
91
99
92
100
Example Usage:
@@ -104,7 +112,8 @@ def _retrieve_gcs_source_columns(project: str, gcs_csv_file_path: str) -> List[s
104
112
gcs_csv_file_path (str):
105
113
Required. A full path to a CSV files stored on Google Cloud Storage.
106
114
Must include "gs://" prefix.
107
-
115
+ credentials (auth_credentials.Credentials):
116
+ Credentials to use to with GCS Client.
108
117
Returns:
109
118
List[str]
110
119
A list of columns names in the CSV file.
@@ -116,7 +125,7 @@ def _retrieve_gcs_source_columns(project: str, gcs_csv_file_path: str) -> List[s
116
125
gcs_bucket , gcs_blob = utils .extract_bucket_and_prefix_from_gcs_path (
117
126
gcs_csv_file_path
118
127
)
119
- client = storage .Client (project = project )
128
+ client = storage .Client (project = project , credentials = credentials )
120
129
bucket = client .bucket (gcs_bucket )
121
130
blob = bucket .blob (gcs_blob )
122
131
@@ -135,6 +144,7 @@ def _retrieve_gcs_source_columns(project: str, gcs_csv_file_path: str) -> List[s
135
144
line += blob .download_as_bytes (
136
145
start = start_index , end = start_index + increment
137
146
).decode ("utf-8" )
147
+
138
148
first_new_line_index = line .find ("\n " )
139
149
start_index += increment
140
150
@@ -156,7 +166,11 @@ def _retrieve_gcs_source_columns(project: str, gcs_csv_file_path: str) -> List[s
156
166
return next (csv_reader )
157
167
158
168
@staticmethod
159
- def _retrieve_bq_source_columns (project : str , bq_table_uri : str ) -> List [str ]:
169
+ def _retrieve_bq_source_columns (
170
+ project : str ,
171
+ bq_table_uri : str ,
172
+ credentials : Optional [auth_credentials .Credentials ] = None ,
173
+ ) -> List [str ]:
160
174
"""Retrieve the columns from a table on Google BigQuery
161
175
162
176
Example Usage:
@@ -174,6 +188,8 @@ def _retrieve_bq_source_columns(project: str, bq_table_uri: str) -> List[str]:
174
188
bq_table_uri (str):
175
189
Required. A URI to a BigQuery table.
176
190
Can include "bq://" prefix but not required.
191
+ credentials (auth_credentials.Credentials):
192
+ Credentials to use with BQ Client.
177
193
178
194
Returns:
179
195
List[str]
@@ -185,7 +201,7 @@ def _retrieve_bq_source_columns(project: str, bq_table_uri: str) -> List[str]:
185
201
if bq_table_uri .startswith (prefix ):
186
202
bq_table_uri = bq_table_uri [len (prefix ) :]
187
203
188
- client = bigquery .Client (project = project )
204
+ client = bigquery .Client (project = project , credentials = credentials )
189
205
table = client .get_table (bq_table_uri )
190
206
schema = table .schema
191
207
return [schema .name for schema in schema ]
0 commit comments