Skip to content

Commit 06cec00

Browse files
authored
fix: model.fit metric not collected issue. (#1085)
* fix: model.fit metric not collected issue. * update * fix unit test * update code and test * update code * update comment * update test
1 parent fd06d31 commit 06cec00

File tree

4 files changed

+23
-1
lines changed

4 files changed

+23
-1
lines changed

bigframes/session/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1373,7 +1373,9 @@ def _start_query_ml_ddl(
13731373
# https://cloud.google.com/bigquery/docs/customer-managed-encryption#encrypt-model
13741374
job_config.destination_encryption_configuration = None
13751375

1376-
return bf_io_bigquery.start_query_with_client(self.bqclient, sql, job_config)
1376+
return bf_io_bigquery.start_query_with_client(
1377+
self.bqclient, sql, job_config, metrics=self._metrics
1378+
)
13771379

13781380
def _export(
13791381
self,

tests/system/large/ml/test_linear_model.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,20 @@ def test_unordered_mode_linear_regression_configure_fit_score_predict(
128128
]
129129
]
130130
y_train = df[["body_mass_g"]]
131+
132+
start_execution_count = df._block._expr.session._metrics.execution_count
131133
model.fit(X_train, y_train)
134+
end_execution_count = df._block._expr.session._metrics.execution_count
135+
# The fit function initiates two queries: the first generates and caches
136+
# the training data, while the second creates and fits the model.
137+
assert end_execution_count - start_execution_count == 2
132138

133139
# Check score to ensure the model was fitted
140+
start_execution_count = end_execution_count
134141
result = model.score(X_train, y_train).to_pandas()
142+
end_execution_count = df._block._expr.session._metrics.execution_count
143+
assert end_execution_count - start_execution_count == 1
144+
135145
utils.check_pandas_df_schema_and_index(
136146
result, columns=utils.ML_REGRESSION_METRICS, index=1
137147
)
@@ -154,7 +164,10 @@ def test_unordered_mode_linear_regression_configure_fit_score_predict(
154164
assert reloaded_model.max_iterations == 20
155165
assert reloaded_model.tol == 0.01
156166

167+
start_execution_count = df._block._expr.session._metrics.execution_count
157168
pred = reloaded_model.predict(df)
169+
end_execution_count = df._block._expr.session._metrics.execution_count
170+
assert end_execution_count - start_execution_count == 1
158171
utils.check_pandas_df_schema_and_index(
159172
pred,
160173
columns=("predicted_body_mass_g",),

tests/system/small/ml/test_core.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,8 +383,14 @@ def test_model_forecast(time_series_bqml_arima_plus_model: core.BqmlModel):
383383

384384
def test_model_register(ephemera_penguins_bqml_linear_model: core.BqmlModel):
385385
model = ephemera_penguins_bqml_linear_model
386+
387+
start_execution_count = model.session._metrics.execution_count
388+
386389
model.register()
387390

391+
end_execution_count = model.session._metrics.execution_count
392+
assert end_execution_count - start_execution_count == 1
393+
388394
assert model.model.model_id is not None
389395
model_name = "bigframes_" + model.model.model_id
390396
# Only registered model contains the field, and the field includes project/dataset. Here only check model_id.

tests/unit/ml/test_golden_sql.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def mock_session():
3636
TEMP_MODEL_ID.project, TEMP_MODEL_ID.dataset_id
3737
)
3838
mock_session._bq_kms_key_name = None
39+
mock_session._metrics = None
3940

4041
query_job = mock.create_autospec(bigquery.QueryJob)
4142
type(query_job).destination = mock.PropertyMock(

0 commit comments

Comments
 (0)