Skip to content

Commit 417de3a

Browse files
authored
fix: make DataFrame __getattr__ and __setattr__ more robust to subclassing (#1352)
* fix: make `DataFrame` `__getattr__` and `__setattr__` more robust to subclassing * use _block as an initialized indicator
1 parent f2d5264 commit 417de3a

File tree

2 files changed

+47
-19
lines changed

2 files changed

+47
-19
lines changed

bigframes/dataframe.py

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ def __init__(
118118
):
119119
global bigframes
120120

121+
self._query_job: Optional[bigquery.QueryJob] = None
122+
121123
if copy is not None and not copy:
122124
raise ValueError(
123125
f"DataFrame constructor only supports copy=True. {constants.FEEDBACK_LINK}"
@@ -182,7 +184,6 @@ def __init__(
182184
if dtype:
183185
bf_dtype = bigframes.dtypes.bigframes_type(dtype)
184186
block = block.multi_apply_unary_op(ops.AsTypeOp(to_type=bf_dtype))
185-
self._block = block
186187

187188
else:
188189
import bigframes.pandas
@@ -194,10 +195,14 @@ def __init__(
194195
dtype=dtype, # type:ignore
195196
)
196197
if session:
197-
self._block = session.read_pandas(pd_dataframe)._get_block()
198+
block = session.read_pandas(pd_dataframe)._get_block()
198199
else:
199-
self._block = bigframes.pandas.read_pandas(pd_dataframe)._get_block()
200-
self._query_job: Optional[bigquery.QueryJob] = None
200+
block = bigframes.pandas.read_pandas(pd_dataframe)._get_block()
201+
202+
# We use _block as an indicator in __getattr__ and __setattr__ to see
203+
# if the object is fully initialized, so make sure we set the _block
204+
# attribute last.
205+
self._block = block
201206
self._block.session._register_object(self)
202207

203208
def __dir__(self):
@@ -625,13 +630,17 @@ def _getitem_bool_series(self, key: bigframes.series.Series) -> DataFrame:
625630
return DataFrame(block)
626631

627632
def __getattr__(self, key: str):
628-
# Protect against recursion errors with uninitialized DataFrame
629-
# objects. See:
633+
# To allow subclasses to set private attributes before the class is
634+
# fully initialized, protect against recursion errors with
635+
# uninitialized DataFrame objects. Note: this comes at the downside
636+
# that columns with a leading `_` won't be treated as columns.
637+
#
638+
# See:
630639
# https://github.com/googleapis/python-bigquery-dataframes/issues/728
631640
# and
632641
# https://nedbatchelder.com/blog/201010/surprising_getattr_recursion.html
633642
if key == "_block":
634-
raise AttributeError("_block")
643+
raise AttributeError(key)
635644

636645
if key in self._block.column_labels:
637646
return self.__getitem__(key)
@@ -651,26 +660,36 @@ def __getattr__(self, key: str):
651660
raise AttributeError(key)
652661

653662
def __setattr__(self, key: str, value):
654-
if key in ["_block", "_query_job"]:
663+
if key == "_block":
664+
object.__setattr__(self, key, value)
665+
return
666+
667+
# To allow subclasses to set private attributes before the class is
668+
# fully initialized, assume anything set before `_block` is initialized
669+
# is a regular attribute.
670+
if not hasattr(self, "_block"):
655671
object.__setattr__(self, key, value)
656672
return
657-
# Can this be removed???
673+
674+
# If someone has a column named the same as a normal attribute
675+
# (e.g. index), we want to set the normal attribute, not the column.
676+
# To do that, check if there is a normal attribute by using
677+
# __getattribute__ (not __getattr__, because that includes columns).
678+
# If that returns a value without raising, then we know this is a
679+
# normal attribute and we should prefer that.
658680
try:
659-
# boring attributes go through boring old path
660681
object.__getattribute__(self, key)
661682
return object.__setattr__(self, key, value)
662683
except AttributeError:
663684
pass
664685

665-
# if this fails, go on to more involved attribute setting
666-
# (note that this matches __getattr__, above).
667-
try:
668-
if key in self.columns:
669-
self[key] = value
670-
else:
671-
object.__setattr__(self, key, value)
672-
# Can this be removed?
673-
except (AttributeError, TypeError):
686+
# If we made it here, then we know that it's not a regular attribute
687+
# already, so it might be a column to update. Note: we don't allow
688+
# adding new columns using __setattr__, only __setitem__, that way we
689+
# can still add regular new attributes.
690+
if key in self._block.column_labels:
691+
self[key] = value
692+
else:
674693
object.__setattr__(self, key, value)
675694

676695
def __repr__(self) -> str:

tests/unit/test_dataframe.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,15 @@ def test_dataframe_repr_with_uninitialized_object():
4141
assert "DataFrame" in got
4242

4343

44+
def test_dataframe_setattr_with_uninitialized_object():
45+
"""Ensures DataFrame can be subclassed without trying to set attributes as columns."""
46+
# Avoid calling __init__ since it might be called later in a subclass.
47+
# https://stackoverflow.com/a/6384982/101923
48+
dataframe = bigframes.dataframe.DataFrame.__new__(bigframes.dataframe.DataFrame)
49+
dataframe.lineage = "my-test-value"
50+
assert dataframe.lineage == "my-test-value" # Should just be a regular attribute.
51+
52+
4453
def test_dataframe_to_gbq_invalid_destination(monkeypatch: pytest.MonkeyPatch):
4554
dataframe = resources.create_dataframe(monkeypatch)
4655

0 commit comments

Comments
 (0)