From 9798a2b14dffb20432f732343cac92341e42fe09 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 15 Dec 2023 14:42:07 -0600
Subject: [PATCH 01/27] deps: migrate to `ibis-framework >= "7.1.0"` (#53)

* deps: migrate to `ibis-framework >= "7.0.0"`

This should unlock some bug fixes as well as potential `UNNEST` support
in a future change.

* use dtype instead of output_dtype in custom ops

* adjust type annotations

* Update noxfile.py

* update type annotations

* fix for deferred values

* fix prerelease

* minimum 7.1.0

* mypy

* revert presubmit changes

* fix minimum sqlglot

* fix custom op

* hack InMemoryTable formatter back in

* use ops module to avoid breaking changes if ops move around

* workaround nullscalar issue

* update usage of percent_rank to explicitly order by the value

* disable ibis prerelease tests for now

* fix unit_prerelease
---
 bigframes/core/compile/compiled.py            | 25 ++++++-
 bigframes/core/reshape/__init__.py            |  6 +-
 bigframes/operations/__init__.py              | 70 ++++++++++++++++---
 bigframes/operations/aggregations.py          |  8 +--
 bigframes/remote_function.py                  |  9 +--
 bigframes/session/__init__.py                 |  7 +-
 mypy.ini                                      |  3 +
 noxfile.py                                    | 26 +++----
 setup.py                                      |  2 +-
 testing/constraints-3.9.txt                   |  4 +-
 tests/system/small/test_ibis.py               | 13 ++--
 tests/unit/resources.py                       |  2 +-
 .../ibis/backends/bigquery/__init__.py        |  3 +
 .../ibis/backends/bigquery/compiler.py        | 59 ++++++++++++++++
 .../ibis/expr/operations/analytic.py          | 14 ++--
 .../ibis/expr/operations/json.py              |  2 +-
 .../ibis/expr/operations/reductions.py        |  8 +--
 17 files changed, 198 insertions(+), 63 deletions(-)
 create mode 100644 third_party/bigframes_vendored/ibis/backends/bigquery/compiler.py

diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py
index 461c2c005a..537d9c8b52 100644
--- a/bigframes/core/compile/compiled.py
+++ b/bigframes/core/compile/compiled.py
@@ -21,6 +21,7 @@
 
 import ibis
 import ibis.backends.bigquery as ibis_bigquery
+import ibis.common.deferred  # type: ignore
 import ibis.expr.datatypes as ibis_dtypes
 import ibis.expr.types as ibis_types
 import pandas
@@ -62,7 +63,16 @@ def __init__(
         self._columns = tuple(columns)
         # To allow for more efficient lookup by column name, create a
         # dictionary mapping names to column values.
-        self._column_names = {column.get_name(): column for column in self._columns}
+        self._column_names = {
+            (
+                column.resolve(table)
+                # TODO(https://github.com/ibis-project/ibis/issues/7613): use
+                # public API to refer to Deferred type.
+                if isinstance(column, ibis.common.deferred.Deferred)
+                else column
+            ).get_name(): column
+            for column in self._columns
+        }
 
     @property
     def columns(self) -> typing.Tuple[ibis_types.Value, ...]:
@@ -643,7 +653,16 @@ def __init__(
 
         # To allow for more efficient lookup by column name, create a
         # dictionary mapping names to column values.
-        self._column_names = {column.get_name(): column for column in self._columns}
+        self._column_names = {
+            (
+                column.resolve(table)
+                # TODO(https://github.com/ibis-project/ibis/issues/7613): use
+                # public API to refer to Deferred type.
+                if isinstance(column, ibis.common.deferred.Deferred)
+                else column
+            ).get_name(): column
+            for column in self._columns
+        }
         self._hidden_ordering_column_names = {
             column.get_name(): column for column in self._hidden_ordering_columns
         }
@@ -860,7 +879,7 @@ def project_window_op(
             case_statement = ibis.case()
             for clause in clauses:
                 case_statement = case_statement.when(clause[0], clause[1])
-            case_statement = case_statement.else_(window_op).end()
+            case_statement = case_statement.else_(window_op).end()  # type: ignore
             window_op = case_statement
 
         result = self._set_or_replace_by_id(output_name or column_name, window_op)
diff --git a/bigframes/core/reshape/__init__.py b/bigframes/core/reshape/__init__.py
index dc61c3baad..24c1bff309 100644
--- a/bigframes/core/reshape/__init__.py
+++ b/bigframes/core/reshape/__init__.py
@@ -18,6 +18,7 @@
 
 import bigframes.constants as constants
 import bigframes.core as core
+import bigframes.core.ordering as order
 import bigframes.core.utils as utils
 import bigframes.dataframe
 import bigframes.operations as ops
@@ -145,7 +146,10 @@ def qcut(
     block, result = block.apply_window_op(
         x._value_column,
         agg_ops.QcutOp(q),
-        window_spec=core.WindowSpec(grouping_keys=(nullity_id,)),
+        window_spec=core.WindowSpec(
+            grouping_keys=(nullity_id,),
+            ordering=(order.OrderingColumnReference(x._value_column),),
+        ),
     )
     block, result = block.apply_binary_op(
         result, nullity_id, ops.partial_arg3(ops.where_op, None), result_label=label
diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py
index a29dd36c72..0655aafdb3 100644
--- a/bigframes/operations/__init__.py
+++ b/bigframes/operations/__init__.py
@@ -18,6 +18,7 @@
 import typing
 
 import ibis
+import ibis.common.annotations
 import ibis.common.exceptions
 import ibis.expr.datatypes as ibis_dtypes
 import ibis.expr.operations.generic
@@ -352,14 +353,23 @@ def _as_ibis(self, x: ibis_types.Value):
         str_val = typing.cast(ibis_types.StringValue, x)
 
         # SQL pad operations will truncate, we do not want to truncate though.
-        pad_length = ibis.greatest(str_val.length(), self._length)
+        pad_length = typing.cast(
+            ibis_types.IntegerValue, ibis.greatest(str_val.length(), self._length)
+        )
         if self._side == "left":
             return str_val.lpad(pad_length, self._fillchar)
         elif self._side == "right":
             return str_val.rpad(pad_length, self._fillchar)
         else:  # side == both
             # Pad more on right side if can't pad both sides equally
-            lpad_amount = ((pad_length - str_val.length()) // 2) + str_val.length()
+            lpad_amount = typing.cast(
+                ibis_types.IntegerValue,
+                (
+                    (pad_length - str_val.length())
+                    // typing.cast(ibis_types.NumericValue, ibis.literal(2))
+                )
+                + str_val.length(),
+            )
             return str_val.lpad(lpad_amount, self._fillchar).rpad(
                 pad_length, self._fillchar
             )
@@ -722,10 +732,29 @@ def ne_op(
     return x != y
 
 
+def _null_or_value(value: ibis_types.Value, where_value: ibis_types.BooleanValue):
+    return ibis.where(
+        where_value,
+        value,
+        ibis.null(),
+    )
+
+
 def and_op(
     x: ibis_types.Value,
     y: ibis_types.Value,
 ):
+    # Workaround issue https://github.com/ibis-project/ibis/issues/7775 by
+    # implementing three-valued logic ourselves. For AND, when we encounter a
+    # NULL value, we only know when the result is FALSE, otherwise the result
+    # is unknown (NULL). See: truth table at
+    # https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic#AND,_OR
+    if isinstance(x, ibis_types.NullScalar):
+        return _null_or_value(y, y == ibis.literal(False))
+
+    if isinstance(y, ibis_types.NullScalar):
+        return _null_or_value(x, x == ibis.literal(False))
+
     return typing.cast(ibis_types.BooleanValue, x) & typing.cast(
         ibis_types.BooleanValue, y
     )
@@ -735,6 +764,17 @@ def or_op(
     x: ibis_types.Value,
     y: ibis_types.Value,
 ):
+    # Workaround issue https://github.com/ibis-project/ibis/issues/7775 by
+    # implementing three-valued logic ourselves. For OR, when we encounter a
+    # NULL value, we only know when the result is TRUE, otherwise the result
+    # is unknown (NULL). See: truth table at
+    # https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic#AND,_OR
+    if isinstance(x, ibis_types.NullScalar):
+        return _null_or_value(y, y == ibis.literal(True))
+
+    if isinstance(y, ibis_types.NullScalar):
+        return _null_or_value(x, x == ibis.literal(True))
+
     return typing.cast(ibis_types.BooleanValue, x) | typing.cast(
         ibis_types.BooleanValue, y
     )
@@ -746,10 +786,16 @@ def add_op(
     y: ibis_types.Value,
 ):
     if isinstance(x, ibis_types.NullScalar) or isinstance(x, ibis_types.NullScalar):
-        return
-    return typing.cast(ibis_types.NumericValue, x) + typing.cast(
-        ibis_types.NumericValue, y
-    )
+        return ibis.null()
+    try:
+        # Could be string concatenation or numeric addition.
+        return x + y  # type: ignore
+    except ibis.common.annotations.SignatureValidationError as exc:
+        left_type = bigframes.dtypes.ibis_dtype_to_bigframes_dtype(x.type())
+        right_type = bigframes.dtypes.ibis_dtype_to_bigframes_dtype(y.type())
+        raise TypeError(
+            f"Cannot add {repr(left_type)} and {repr(right_type)}. {constants.FEEDBACK_LINK}"
+        ) from exc
 
 
 @short_circuit_nulls()
@@ -1047,7 +1093,7 @@ def where_op(
     replacement: ibis_types.Value,
 ) -> ibis_types.Value:
     """Returns x if y is true, otherwise returns z."""
-    return ibis.case().when(condition, original).else_(replacement).end()
+    return ibis.case().when(condition, original).else_(replacement).end()  # type: ignore
 
 
 def clip_op(
@@ -1060,7 +1106,7 @@ def clip_op(
         not isinstance(upper, ibis_types.NullScalar)
     ):
         return (
-            ibis.case()
+            ibis.case()  # type: ignore
             .when(upper.isnull() | (original > upper), upper)
             .else_(original)
             .end()
@@ -1069,7 +1115,7 @@ def clip_op(
         upper, ibis_types.NullScalar
     ):
         return (
-            ibis.case()
+            ibis.case()  # type: ignore
             .when(lower.isnull() | (original < lower), lower)
             .else_(original)
             .end()
@@ -1079,9 +1125,11 @@ def clip_op(
     ):
         return original
     else:
-        # Note: Pandas has unchanged behavior when upper bound and lower bound are flipped. This implementation requires that lower_bound < upper_bound
+        # Note: Pandas has unchanged behavior when upper bound and lower bound
+        # are flipped.
+        # This implementation requires that lower_bound < upper_bound.
         return (
-            ibis.case()
+            ibis.case()  # type: ignore
             .when(lower.isnull() | (original < lower), lower)
             .when(upper.isnull() | (original > upper), upper)
             .else_(original)
diff --git a/bigframes/operations/aggregations.py b/bigframes/operations/aggregations.py
index 465d188724..363dfe819d 100644
--- a/bigframes/operations/aggregations.py
+++ b/bigframes/operations/aggregations.py
@@ -74,7 +74,7 @@ def _as_ibis(
         # Will be null if all inputs are null. Pandas defaults to zero sum though.
         bq_sum = _apply_window_if_present(column.sum(), window)
         return (
-            ibis.case().when(bq_sum.isnull(), ibis_types.literal(0)).else_(bq_sum).end()
+            ibis.case().when(bq_sum.isnull(), ibis_types.literal(0)).else_(bq_sum).end()  # type: ignore
         )
 
 
@@ -167,7 +167,7 @@ def _as_ibis(
             .else_(magnitude * pow(-1, negative_count_parity))
             .end()
         )
-        return float_result.cast(column.type())
+        return float_result.cast(column.type())  # type: ignore
 
 
 class MaxOp(AggregateOp):
@@ -290,7 +290,7 @@ def _as_ibis(
                     dtypes.literal_to_ibis_scalar(bucket_n, force_dtype=Int64Dtype()),
                 )
             out = out.else_(None)
-            return out.end()
+            return out.end()  # type: ignore
 
     @property
     def skips_nulls(self):
@@ -482,7 +482,7 @@ def _map_to_literal(
     original: ibis_types.Value, literal: ibis_types.Scalar
 ) -> ibis_types.Column:
     # Hack required to perform aggregations on literals in ibis, even though bigquery will let you directly aggregate literals (eg. 'SELECT COUNT(1) from table1')
-    return ibis.ifelse(original.isnull(), literal, literal)
+    return ibis.ifelse(original.isnull(), literal, literal)  # type: ignore
 
 
 sum_op = SumOp()
diff --git a/bigframes/remote_function.py b/bigframes/remote_function.py
index a899ebd371..f54c26fa56 100644
--- a/bigframes/remote_function.py
+++ b/bigframes/remote_function.py
@@ -535,17 +535,14 @@ def remote_function_node(
     """Creates an Ibis node representing a remote function call."""
 
     fields = {
-        name: rlz.value(type_) if type_ else rlz.any
+        name: rlz.ValueOf(None if type_ == "ANY TYPE" else type_)
         for name, type_ in zip(
             ibis_signature.parameter_names, ibis_signature.input_types
         )
     }
 
-    try:
-        fields["output_type"] = rlz.shape_like("args", dtype=ibis_signature.output_type)  # type: ignore
-    except TypeError:
-        fields["output_dtype"] = property(lambda _: ibis_signature.output_type)
-        fields["output_shape"] = rlz.shape_like("args")
+    fields["dtype"] = ibis_signature.output_type  # type: ignore
+    fields["shape"] = rlz.shape_like("args")
 
     node = type(routine_ref_to_string_for_query(routine_ref), (ops.ValueOp,), fields)  # type: ignore
 
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 5364060d1c..fb5fab86ce 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -79,9 +79,9 @@
 import bigframes.session.clients
 import bigframes.version
 
-# Even though the ibis.backends.bigquery.registry import is unused, it's needed
+# Even though the ibis.backends.bigquery import is unused, it's needed
 # to register new and replacement ops with the Ibis BigQuery backend.
-import third_party.bigframes_vendored.ibis.backends.bigquery.registry  # noqa
+import third_party.bigframes_vendored.ibis.backends.bigquery  # noqa
 import third_party.bigframes_vendored.ibis.expr.operations as vendored_ibis_ops
 import third_party.bigframes_vendored.pandas.io.gbq as third_party_pandas_gbq
 import third_party.bigframes_vendored.pandas.io.parquet as third_party_pandas_parquet
@@ -873,8 +873,9 @@ def _read_pandas(
             total_ordering_columns=frozenset([ordering_col]),
             integer_encoding=IntegerEncoding(True, is_sequential=True),
         )
-        table_expression = self.ibis_client.table(
+        table_expression = self.ibis_client.table(  # type: ignore
             load_table_destination.table_id,
+            # TODO: use "dataset_id" as the "schema"
             database=f"{load_table_destination.project}.{load_table_destination.dataset_id}",
         )
 
diff --git a/mypy.ini b/mypy.ini
index 901394813a..3809f8e241 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -24,5 +24,8 @@ ignore_missing_imports = True
 [mypy-pyarrow]
 ignore_missing_imports = True
 
+[mypy-ibis.*]
+ignore_missing_imports = True
+
 [mypy-ipywidgets]
 ignore_missing_imports = True
diff --git a/noxfile.py b/noxfile.py
index 2174e27529..c0ec3b0c54 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -524,23 +524,19 @@ def prerelease(session: nox.sessions.Session, tests_path):
     )
     already_installed.add("pandas")
 
-    # TODO(shobs):
-    # Commit https://github.com/ibis-project/ibis/commit/c20ba7feab6bdea6c299721310e04dbc10551cc2
-    # introduced breaking change that removed the following:
-    #   ibis.expr.rules.column
-    #   ibis.expr.rules.value
-    #   ibis.expr.rules.any
-    # Let's exclude ibis head from prerelease install list for now. Instead, use
-    # a working ibis-framework version resolved via setup.by (currently resolves
-    # to version 6.2.0 due to version requirement "6.2.0,<7.0.0dev").
-    # We should enable the head back once bigframes support a version that
-    # includes the above commit.
+    # Ibis has introduced breaking changes. Let's exclude ibis head
+    # from prerelease install list for now. We should enable the head back
+    # once bigframes supports the version at HEAD.
     # session.install(
-    #    "--upgrade",
-    #    "-e",  # Use -e so that py.typed file is included.
-    #    "git+https://github.com/ibis-project/ibis.git#egg=ibis-framework",
+    #     "--upgrade",
+    #     "-e",  # Use -e so that py.typed file is included.
+    #     "git+https://github.com/ibis-project/ibis.git@7.x.x#egg=ibis-framework",
     # )
-    session.install("--no-deps", "ibis-framework==6.2.0")
+    session.install(
+        "--upgrade",
+        # "--pre",
+        "ibis-framework>=7.1.0,<8.0.0dev",
+    )
     already_installed.add("ibis-framework")
 
     # Workaround https://github.com/googleapis/python-db-dtypes-pandas/issues/178
diff --git a/setup.py b/setup.py
index 3351542985..1ad4bbd3eb 100644
--- a/setup.py
+++ b/setup.py
@@ -43,8 +43,8 @@
     "google-cloud-iam >=2.12.1",
     "google-cloud-resource-manager >=1.10.3",
     "google-cloud-storage >=2.0.0",
+    "ibis-framework[bigquery] >=7.1.0,<8.0.0dev",
     # TODO: Relax upper bound once we have fixed `system_prerelease` tests.
-    "ibis-framework[bigquery] >=6.2.0,<7.0.0dev",
     "pandas >=1.5.0,<2.1.4",
     "pydata-google-auth >=1.8.2",
     "requests >=2.27.1",
diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt
index f43d3b4ca0..218255c77e 100644
--- a/testing/constraints-3.9.txt
+++ b/testing/constraints-3.9.txt
@@ -45,7 +45,7 @@ greenlet==2.0.2
 grpc-google-iam-v1==0.12.6
 grpcio==1.53.0
 grpcio-status==1.48.2
-ibis-framework==6.2.0
+ibis-framework==7.1.0
 humanize==4.6.0
 identify==2.5.22
 idna==3.4
@@ -107,7 +107,7 @@ scikit-learn==1.2.2
 SecretStorage==3.3.3
 six==1.16.0
 SQLAlchemy==1.4.0
-sqlglot==10.6.4
+sqlglot==18.12.0
 tomli==2.0.1
 toolz==0.12.0
 tqdm==4.65.0
diff --git a/tests/system/small/test_ibis.py b/tests/system/small/test_ibis.py
index 58b78e0048..9fe1176068 100644
--- a/tests/system/small/test_ibis.py
+++ b/tests/system/small/test_ibis.py
@@ -23,11 +23,16 @@
 def test_approximate_quantiles(session: bigframes.Session, scalars_table_id: str):
     num_bins = 3
     ibis_client = session.ibis_client
-    _, dataset, table_id = scalars_table_id.split(".")
-    ibis_table: ibis_types.Table = ibis_client.table(table_id, database=dataset)
+    project, dataset, table_id = scalars_table_id.split(".")
+    ibis_table: ibis_types.Table = ibis_client.table(  # type: ignore
+        table_id,
+        schema=dataset,
+        database=project,
+    )
     ibis_column: ibis_types.NumericColumn = ibis_table["int64_col"]
-    quantiles: ibis_types.ArrayScalar = vendored_ibis_ops.ApproximateMultiQuantile(  # type: ignore
-        ibis_column, num_bins=num_bins
+    quantiles: ibis_types.ArrayScalar = vendored_ibis_ops.ApproximateMultiQuantile(
+        ibis_column,  # type: ignore
+        num_bins=num_bins,  # type: ignore
     ).to_expr()
     value = quantiles[1]
     num_edges = quantiles.length()
diff --git a/tests/unit/resources.py b/tests/unit/resources.py
index 8ba321d122..b239b04671 100644
--- a/tests/unit/resources.py
+++ b/tests/unit/resources.py
@@ -79,7 +79,7 @@ def create_dataframe(
     # might not actually be used. Mock out the global session, too.
     monkeypatch.setattr(bigframes.core.global_session, "_global_session", session)
     bigframes.options.bigquery._session_started = True
-    return bigframes.dataframe.DataFrame({}, session=session)
+    return bigframes.dataframe.DataFrame({"col": []}, session=session)
 
 
 def create_pandas_session(tables: Dict[str, pandas.DataFrame]) -> bigframes.Session:
diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/__init__.py b/third_party/bigframes_vendored/ibis/backends/bigquery/__init__.py
index e69de29bb2..43508fab11 100644
--- a/third_party/bigframes_vendored/ibis/backends/bigquery/__init__.py
+++ b/third_party/bigframes_vendored/ibis/backends/bigquery/__init__.py
@@ -0,0 +1,3 @@
+# Import all sub-modules to monkeypatch everything.
+import third_party.bigframes_vendored.ibis.backends.bigquery.compiler  # noqa
+import third_party.bigframes_vendored.ibis.backends.bigquery.registry  # noqa
diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/compiler.py b/third_party/bigframes_vendored/ibis/backends/bigquery/compiler.py
new file mode 100644
index 0000000000..414f0a7c81
--- /dev/null
+++ b/third_party/bigframes_vendored/ibis/backends/bigquery/compiler.py
@@ -0,0 +1,59 @@
+# Contains code from https://github.com/ibis-project/ibis/blob/master/ibis/backends/bigquery/compiler.py
+"""Module to convert from Ibis expression to SQL string."""
+
+from __future__ import annotations
+
+import re
+
+from ibis.backends.base.sql import compiler as sql_compiler
+import ibis.backends.bigquery.compiler
+from ibis.backends.bigquery.datatypes import BigQueryType
+import ibis.expr.datatypes as dt
+import ibis.expr.operations as ops
+
+_NAME_REGEX = re.compile(r'[^!"$()*,./;?@[\\\]^`{}~\n]+')
+_EXACT_NAME_REGEX = re.compile(f"^{_NAME_REGEX.pattern}$")
+
+
+class BigQueryTableSetFormatter(sql_compiler.TableSetFormatter):
+    def _quote_identifier(self, name):
+        """Restore 6.x version of identifier quoting.
+
+        7.x uses sqlglot which as of December 2023 doesn't know about the
+        extended unicode names for BigQuery yet.
+        """
+        if _EXACT_NAME_REGEX.match(name) is not None:
+            return name
+        return f"`{name}`"
+
+    def _format_in_memory_table(self, op):
+        """Restore 6.x version of InMemoryTable.
+
+        BigQuery DataFrames explicitly uses InMemoryTable only when we know
+        the data is small enough to embed in SQL.
+        """
+        schema = op.schema
+        names = schema.names
+        types = schema.types
+
+        raw_rows = []
+        for row in op.data.to_frame().itertuples(index=False):
+            raw_row = ", ".join(
+                f"{self._translate(lit)} AS {name}"
+                for lit, name in zip(
+                    map(ops.Literal, row, types), map(self._quote_identifier, names)
+                )
+            )
+            raw_rows.append(f"STRUCT({raw_row})")
+        array_type = BigQueryType.from_ibis(dt.Array(op.schema.as_struct()))
+
+        return f"UNNEST({array_type}[{', '.join(raw_rows)}])"
+
+
+# Override implementation.
+ibis.backends.bigquery.compiler.BigQueryTableSetFormatter._quote_identifier = (
+    BigQueryTableSetFormatter._quote_identifier
+)
+ibis.backends.bigquery.compiler.BigQueryTableSetFormatter._format_in_memory_table = (
+    BigQueryTableSetFormatter._format_in_memory_table
+)
diff --git a/third_party/bigframes_vendored/ibis/expr/operations/analytic.py b/third_party/bigframes_vendored/ibis/expr/operations/analytic.py
index 038987cac9..3d6a3b37b1 100644
--- a/third_party/bigframes_vendored/ibis/expr/operations/analytic.py
+++ b/third_party/bigframes_vendored/ibis/expr/operations/analytic.py
@@ -2,22 +2,22 @@
 
 from __future__ import annotations
 
-from ibis.expr.operations.analytic import Analytic
+import ibis.expr.operations as ops
 import ibis.expr.rules as rlz
 
 
-class FirstNonNullValue(Analytic):
+class FirstNonNullValue(ops.Analytic):
     """Retrieve the first element."""
 
-    arg = rlz.column(rlz.any)
-    output_dtype = rlz.dtype_like("arg")
+    arg: ops.Column
+    dtype = rlz.dtype_like("arg")
 
 
-class LastNonNullValue(Analytic):
+class LastNonNullValue(ops.Analytic):
     """Retrieve the last element."""
 
-    arg = rlz.column(rlz.any)
-    output_dtype = rlz.dtype_like("arg")
+    arg: ops.Column
+    dtype = rlz.dtype_like("arg")
 
 
 __all__ = [
diff --git a/third_party/bigframes_vendored/ibis/expr/operations/json.py b/third_party/bigframes_vendored/ibis/expr/operations/json.py
index dbb3fa3066..772c2e8ff4 100644
--- a/third_party/bigframes_vendored/ibis/expr/operations/json.py
+++ b/third_party/bigframes_vendored/ibis/expr/operations/json.py
@@ -6,4 +6,4 @@
 
 
 class ToJsonString(Unary):
-    output_dtype = dt.string
+    dtype = dt.string
diff --git a/third_party/bigframes_vendored/ibis/expr/operations/reductions.py b/third_party/bigframes_vendored/ibis/expr/operations/reductions.py
index 5e6ad9ecf2..e6644f477a 100644
--- a/third_party/bigframes_vendored/ibis/expr/operations/reductions.py
+++ b/third_party/bigframes_vendored/ibis/expr/operations/reductions.py
@@ -3,8 +3,8 @@
 from __future__ import annotations
 
 import ibis.expr.datatypes as dt
+import ibis.expr.operations.core as ibis_ops_core
 from ibis.expr.operations.reductions import Filterable, Reduction
-import ibis.expr.rules as rlz
 
 
 class ApproximateMultiQuantile(Filterable, Reduction):
@@ -13,9 +13,9 @@ class ApproximateMultiQuantile(Filterable, Reduction):
     See: https://cloud.google.com/bigquery/docs/reference/standard-sql/approximate_aggregate_functions#approx_quantiles
     """
 
-    arg = rlz.any
-    num_bins = rlz.value(dt.int64)
-    output_dtype = dt.Array(dt.float64)
+    arg: ibis_ops_core.Value
+    num_bins: ibis_ops_core.Value[dt.Int64]
+    dtype = dt.Array(dt.float64)
 
 
 __all__ = [

From 7cbbb7d4608d8b7d1a360b2fe2d39d89a52f9546 Mon Sep 17 00:00:00 2001
From: Ashley Xu <139821907+ashleyxuu@users.noreply.github.com>
Date: Fri, 15 Dec 2023 15:24:15 -0800
Subject: [PATCH 02/27] docs: add code snippets for explore query result page
 (#278)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes internal issue 316614454 🦕
---
 samples/snippets/explore_query_result_test.py | 70 +++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 samples/snippets/explore_query_result_test.py

diff --git a/samples/snippets/explore_query_result_test.py b/samples/snippets/explore_query_result_test.py
new file mode 100644
index 0000000000..5f0ec7d9b6
--- /dev/null
+++ b/samples/snippets/explore_query_result_test.py
@@ -0,0 +1,70 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def test_bigquery_dataframes_explore_query_result():
+    import bigframes.pandas as bpd
+
+    # [START bigquery_dataframes_explore_query_result]
+    # Load data from BigQuery
+    query_or_table = "bigquery-public-data.ml_datasets.penguins"
+    bq_df = bpd.read_gbq(query_or_table)
+
+    # Inspect one of the columns (or series) of the DataFrame:
+    bq_df["body_mass_g"]
+
+    # Compute the mean of this series:
+    average_body_mass = bq_df["body_mass_g"].mean()
+    print(f"average_body_mass: {average_body_mass}")
+
+    # Find the heaviest species using the groupby operation to calculate the
+    # mean body_mass_g:
+    (
+        bq_df["body_mass_g"]
+        .groupby(by=bq_df["species"])
+        .mean()
+        .sort_values(ascending=False)
+        .head(10)
+    )
+
+    # Create the Linear Regression model
+    from bigframes.ml.linear_model import LinearRegression
+
+    # Filter down to the data we want to analyze
+    adelie_data = bq_df[bq_df.species == "Adelie Penguin (Pygoscelis adeliae)"]
+
+    # Drop the columns we don't care about
+    adelie_data = adelie_data.drop(columns=["species"])
+
+    # Drop rows with nulls to get our training data
+    training_data = adelie_data.dropna()
+
+    # Pick feature columns and label column
+    X = training_data[
+        [
+            "island",
+            "culmen_length_mm",
+            "culmen_depth_mm",
+            "flipper_length_mm",
+            "sex",
+        ]
+    ]
+    y = training_data[["body_mass_g"]]
+
+    model = LinearRegression(fit_intercept=False)
+    model.fit(X, y)
+    model.score(X, y)
+    # [END bigquery_dataframes_explore_query_result]
+    assert average_body_mass is not None
+    assert model is not None

From 02f7ab64bf520f41a0d9a536d4d4880d3a3d401a Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Fri, 15 Dec 2023 16:18:18 -0800
Subject: [PATCH 03/27] refactor: move query execution from ArrayValue to
 Session (#255)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 bigframes/core/__init__.py         | 110 ++++-------------------------
 bigframes/core/blocks.py           |  37 +++++++---
 bigframes/core/compile/compiled.py |  16 +++++
 bigframes/core/compile/compiler.py |   6 ++
 bigframes/core/indexes/index.py    |   6 +-
 bigframes/core/nodes.py            |   8 ++-
 bigframes/dataframe.py             |   3 +-
 bigframes/session/__init__.py      |  76 ++++++++++++++++++++
 tests/system/small/test_session.py |   4 +-
 9 files changed, 155 insertions(+), 111 deletions(-)

diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index e19fec8f3f..e8ac8c1d0f 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -16,10 +16,8 @@
 from dataclasses import dataclass
 import io
 import typing
-from typing import Iterable, Literal, Optional, Sequence, Tuple
+from typing import Iterable, Literal, Sequence
 
-from google.cloud import bigquery
-import ibis
 import ibis.expr.types as ibis_types
 import pandas
 
@@ -86,7 +84,17 @@ def session(self) -> Session:
         required_session = self.node.session
         from bigframes import get_global_session
 
-        return self.node.session[0] if required_session else get_global_session()
+        return (
+            required_session if (required_session is not None) else get_global_session()
+        )
+
+    def _try_evaluate_local(self):
+        """Use only for unit testing paths - not fully featured. Will throw exception if fails."""
+        import ibis
+
+        return ibis.pandas.connect({}).execute(
+            self._compile_ordered()._to_ibis_expr(ordering_mode="unordered")
+        )
 
     def get_column_type(self, key: str) -> bigframes.dtypes.Dtype:
         return self._compile_ordered().get_column_type(key)
@@ -97,97 +105,9 @@ def _compile_ordered(self) -> compiled.OrderedIR:
     def _compile_unordered(self) -> compiled.UnorderedIR:
         return compiler.compile_unordered(self.node)
 
-    def shape(self) -> typing.Tuple[int, int]:
-        """Returns dimensions as (length, width) tuple."""
-        width = len(self._compile_unordered().columns)
-        count_expr = self._compile_unordered()._to_ibis_expr().count()
-
-        # Support in-memory engines for hermetic unit tests.
-        if not self.node.session:
-            try:
-                length = ibis.pandas.connect({}).execute(count_expr)
-                return (length, width)
-            except Exception:
-                # Not all cases can be handled by pandas engine
-                pass
-
-        sql = self.session.ibis_client.compile(count_expr)
-        row_iterator, _ = self.session._start_query(
-            sql=sql,
-            max_results=1,
-        )
-        length = next(row_iterator)[0]
-        return (length, width)
-
-    def to_sql(
-        self,
-        offset_column: typing.Optional[str] = None,
-        col_id_overrides: typing.Mapping[str, str] = {},
-        sorted: bool = False,
-    ) -> str:
-        array_value = self
-        if offset_column:
-            array_value = self.promote_offsets(offset_column)
-        if sorted:
-            return array_value._compile_ordered().to_sql(
-                col_id_overrides=col_id_overrides,
-                sorted=sorted,
-            )
-        else:
-            return array_value._compile_unordered().to_sql(
-                col_id_overrides=col_id_overrides
-            )
-
-    def start_query(
-        self,
-        job_config: Optional[bigquery.job.QueryJobConfig] = None,
-        max_results: Optional[int] = None,
-        *,
-        sorted: bool = True,
-    ) -> Tuple[bigquery.table.RowIterator, bigquery.QueryJob]:
-        """Execute a query and return metadata about the results."""
-        # TODO(swast): Cache the job ID so we can look it up again if they ask
-        # for the results? We'd need a way to invalidate the cache if DataFrame
-        # becomes mutable, though. Or move this method to the immutable
-        # expression class.
-        # TODO(swast): We might want to move this method to Session and/or
-        # provide our own minimal metadata class. Tight coupling to the
-        # BigQuery client library isn't ideal, especially if we want to support
-        # a LocalSession for unit testing.
-        # TODO(swast): Add a timeout here? If the query is taking a long time,
-        # maybe we just print the job metadata that we have so far?
-        sql = self.to_sql(sorted=sorted)  # type:ignore
-        return self.session._start_query(
-            sql=sql,
-            job_config=job_config,
-            max_results=max_results,
-        )
-
-    def cached(self, cluster_cols: typing.Sequence[str]) -> ArrayValue:
-        """Write the ArrayValue to a session table and create a new block object that references it."""
-        compiled_value = self._compile_ordered()
-        ibis_expr = compiled_value._to_ibis_expr(
-            ordering_mode="unordered", expose_hidden_cols=True
-        )
-        tmp_table = self.session._ibis_to_temp_table(
-            ibis_expr, cluster_cols=cluster_cols, api_name="cached"
-        )
-
-        table_expression = self.session.ibis_client.table(
-            f"{tmp_table.project}.{tmp_table.dataset_id}.{tmp_table.table_id}"
-        )
-        new_columns = [table_expression[column] for column in compiled_value.column_ids]
-        new_hidden_columns = [
-            table_expression[column]
-            for column in compiled_value._hidden_ordering_column_names
-        ]
-        return ArrayValue.from_ibis(
-            self.session,
-            table_expression,
-            columns=new_columns,
-            hidden_ordering_columns=new_hidden_columns,
-            ordering=compiled_value._ordering,
-        )
+    def row_count(self) -> ArrayValue:
+        """Get number of rows in ArrayValue as a single-entry ArrayValue."""
+        return ArrayValue(nodes.RowCountNode(child=self.node))
 
     # Operations
 
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 34913872e7..6542b694d2 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -137,8 +137,19 @@ def index(self) -> indexes.IndexValue:
     @functools.cached_property
     def shape(self) -> typing.Tuple[int, int]:
         """Returns dimensions as (length, width) tuple."""
-        impl_length, _ = self._expr.shape()
-        return (impl_length, len(self.value_columns))
+        row_count_expr = self.expr.row_count()
+
+        # Support in-memory engines for hermetic unit tests.
+        if self.expr.node.session is None:
+            try:
+                row_count = row_count_expr._try_evaluate_local().squeeze()
+                return (row_count, len(self.value_columns))
+            except Exception:
+                pass
+
+        iter, _ = self.session._execute(row_count_expr, sorted=False)
+        row_count = next(iter)[0]
+        return (row_count, len(self.value_columns))
 
     @property
     def index_columns(self) -> Sequence[str]:
@@ -182,6 +193,10 @@ def index_dtypes(
         """Returns the dtypes of the index columns."""
         return [self.expr.get_column_type(col) for col in self.index_columns]
 
+    @property
+    def session(self) -> core.Session:
+        return self._expr.session
+
     @functools.cached_property
     def col_id_to_label(self) -> typing.Mapping[str, Label]:
         """Get column label for value columns, or index name for index columns"""
@@ -376,7 +391,7 @@ def _to_dataframe(self, result) -> pd.DataFrame:
         """Convert BigQuery data to pandas DataFrame with specific dtypes."""
         dtypes = dict(zip(self.index_columns, self.index_dtypes))
         dtypes.update(zip(self.value_columns, self.dtypes))
-        return self._expr.session._rows_to_dataframe(result, dtypes)
+        return self.session._rows_to_dataframe(result, dtypes)
 
     def to_pandas(
         self,
@@ -404,9 +419,9 @@ def to_pandas_batches(self):
         """Download results one message at a time."""
         dtypes = dict(zip(self.index_columns, self.index_dtypes))
         dtypes.update(zip(self.value_columns, self.dtypes))
-        results_iterator, _ = self._expr.start_query()
+        results_iterator, _ = self.session._execute(self.expr, sorted=True)
         for arrow_table in results_iterator.to_arrow_iterable(
-            bqstorage_client=self._expr.session.bqstoragereadclient
+            bqstorage_client=self.session.bqstoragereadclient
         ):
             df = bigframes.session._io.pandas.arrow_to_pandas(arrow_table, dtypes)
             self._copy_index_to_pandas(df)
@@ -460,12 +475,12 @@ def _compute_and_count(
 
         expr = self._apply_value_keys_to_expr(value_keys=value_keys)
 
-        results_iterator, query_job = expr.start_query(
-            max_results=max_results, sorted=ordered
+        results_iterator, query_job = self.session._execute(
+            expr, max_results=max_results, sorted=ordered
         )
 
         table_size = (
-            expr.session._get_table_size(query_job.destination) / _BYTES_TO_MEGABYTES
+            self.session._get_table_size(query_job.destination) / _BYTES_TO_MEGABYTES
         )
         fraction = (
             max_download_size / table_size
@@ -607,7 +622,7 @@ def _compute_dry_run(
     ) -> bigquery.QueryJob:
         expr = self._apply_value_keys_to_expr(value_keys=value_keys)
         job_config = bigquery.QueryJobConfig(dry_run=True)
-        _, query_job = expr.start_query(job_config=job_config)
+        _, query_job = self.session._execute(expr, job_config=job_config, dry_run=True)
         return query_job
 
     def _apply_value_keys_to_expr(self, value_keys: Optional[Iterable[str]] = None):
@@ -1668,7 +1683,7 @@ def to_sql_query(
             # the BigQuery unicode column name feature?
             substitutions[old_id] = new_id
 
-        sql = array_value.to_sql(col_id_overrides=substitutions)
+        sql = self.session._to_sql(array_value, col_id_overrides=substitutions)
         return (
             sql,
             new_ids[: len(idx_labels)],
@@ -1678,7 +1693,7 @@ def to_sql_query(
     def cached(self) -> Block:
         """Write the block to a session table and create a new block object that references it."""
         return Block(
-            self.expr.cached(cluster_cols=self.index_columns),
+            self.session._execute_and_cache(self.expr, cluster_cols=self.index_columns),
             index_columns=self.index_columns,
             column_labels=self.column_labels,
             index_labels=self.index_labels,
diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py
index 537d9c8b52..d6183228d1 100644
--- a/bigframes/core/compile/compiled.py
+++ b/bigframes/core/compile/compiled.py
@@ -268,6 +268,22 @@ def to_sql(
         )
         return typing.cast(str, sql)
 
+    def row_count(self) -> OrderedIR:
+        original_table = self._to_ibis_expr()
+        ibis_table = original_table.agg(
+            [
+                original_table.count().name("count"),
+            ]
+        )
+        return OrderedIR(
+            ibis_table,
+            (ibis_table["count"],),
+            ordering=ExpressionOrdering(
+                ordering_value_columns=(OrderingColumnReference("count"),),
+                total_ordering_columns=frozenset(["count"]),
+            ),
+        )
+
     def _to_ibis_expr(
         self,
         *,
diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py
index 39892635f1..17dcde638f 100644
--- a/bigframes/core/compile/compiler.py
+++ b/bigframes/core/compile/compiler.py
@@ -173,6 +173,12 @@ def compile_concat(node: nodes.ConcatNode, ordered: bool = True):
         return concat_impl.concat_unordered(compiled_unordered)
 
 
+@_compile_node.register
+def compile_rowcount(node: nodes.RowCountNode, ordered: bool = True):
+    result = compile_unordered(node.child).row_count()
+    return result if ordered else result.to_unordered()
+
+
 @_compile_node.register
 def compile_aggregate(node: nodes.AggregateNode, ordered: bool = True):
     result = compile_unordered(node.child).aggregate(
diff --git a/bigframes/core/indexes/index.py b/bigframes/core/indexes/index.py
index fc7cf167d4..6fc284403d 100644
--- a/bigframes/core/indexes/index.py
+++ b/bigframes/core/indexes/index.py
@@ -396,6 +396,10 @@ def dtypes(
     ) -> typing.Sequence[typing.Union[bf_dtypes.Dtype, np.dtype[typing.Any]]]:
         return self._block.index_dtypes
 
+    @property
+    def session(self) -> core.Session:
+        return self._expr.session
+
     def __repr__(self) -> str:
         """Converts an Index to a string."""
         # TODO(swast): Add a timeout here? If the query is taking a long time,
@@ -411,7 +415,7 @@ def to_pandas(self) -> pandas.Index:
         index_columns = list(self._block.index_columns)
         dtypes = dict(zip(index_columns, self.dtypes))
         expr = self._expr.select_columns(index_columns)
-        results, _ = expr.start_query()
+        results, _ = self.session._execute(expr)
         df = expr.session._rows_to_dataframe(results, dtypes)
         df = df.set_index(index_columns)
         index = df.index
diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py
index 82a869dac2..30444f5565 100644
--- a/bigframes/core/nodes.py
+++ b/bigframes/core/nodes.py
@@ -139,7 +139,7 @@ class ReadGbqNode(BigFrameNode):
 
     @property
     def session(self):
-        return (self.table_session,)
+        return self.table_session
 
     def __hash__(self):
         return self._node_hash
@@ -229,6 +229,12 @@ def __hash__(self):
         return self._node_hash
 
 
+# TODO: Merge RowCount and Corr into Aggregate Node
+@dataclass(frozen=True)
+class RowCountNode(UnaryNode):
+    pass
+
+
 @dataclass(frozen=True)
 class AggregateNode(UnaryNode):
     aggregations: typing.Tuple[typing.Tuple[str, agg_ops.AggregateOp, str], ...]
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 3b0fd7008a..d777784f64 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -2701,7 +2701,8 @@ def _create_io_query(self, index: bool, ordering_id: Optional[str]) -> str:
 
         if ordering_id is not None:
             array_value = array_value.promote_offsets(ordering_id)
-        return array_value.to_sql(
+        return self._block.session._to_sql(
+            array_value=array_value,
             col_id_overrides=id_overrides,
         )
 
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index fb5fab86ce..a57f7b94c5 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -67,6 +67,7 @@
 from bigframes.core import log_adapter
 import bigframes.core as core
 import bigframes.core.blocks as blocks
+import bigframes.core.compile
 import bigframes.core.guid as guid
 from bigframes.core.ordering import IntegerEncoding, OrderingColumnReference
 import bigframes.core.ordering as orderings
@@ -1437,6 +1438,81 @@ def _start_query(
             results_iterator = query_job.result(max_results=max_results)
         return results_iterator, query_job
 
+    def _execute_and_cache(
+        self, array_value: core.ArrayValue, cluster_cols: typing.Sequence[str]
+    ) -> core.ArrayValue:
+        """Executes the query and uses the resulting table to rewrite future executions."""
+        # TODO: Use this for all executions? Problem is that caching materializes extra
+        # ordering columns
+        compiled_value = self._compile_ordered(array_value)
+
+        ibis_expr = compiled_value._to_ibis_expr(
+            ordering_mode="unordered", expose_hidden_cols=True
+        )
+        tmp_table = self._ibis_to_temp_table(
+            ibis_expr, cluster_cols=cluster_cols, api_name="cached"
+        )
+        table_expression = self.ibis_client.table(
+            f"{tmp_table.project}.{tmp_table.dataset_id}.{tmp_table.table_id}"
+        )
+        new_columns = [table_expression[column] for column in compiled_value.column_ids]
+        new_hidden_columns = [
+            table_expression[column]
+            for column in compiled_value._hidden_ordering_column_names
+        ]
+        # TODO: Instead, keep session-wide map of cached results and automatically reuse
+        return core.ArrayValue.from_ibis(
+            self,
+            table_expression,
+            columns=new_columns,
+            hidden_ordering_columns=new_hidden_columns,
+            ordering=compiled_value._ordering,
+        )
+
+    def _execute(
+        self,
+        array_value: core.ArrayValue,
+        job_config: Optional[bigquery.job.QueryJobConfig] = None,
+        max_results: Optional[int] = None,
+        *,
+        sorted: bool = True,
+        dry_run=False,
+    ) -> tuple[bigquery.table.RowIterator, bigquery.QueryJob]:
+        sql = self._to_sql(array_value, sorted=sorted)  # type:ignore
+        job_config = bigquery.QueryJobConfig(dry_run=dry_run)
+        return self._start_query(
+            sql=sql,
+            job_config=job_config,
+            max_results=max_results,
+        )
+
+    def _to_sql(
+        self,
+        array_value: core.ArrayValue,
+        offset_column: typing.Optional[str] = None,
+        col_id_overrides: typing.Mapping[str, str] = {},
+        sorted: bool = False,
+    ) -> str:
+        if offset_column:
+            array_value = array_value.promote_offsets(offset_column)
+        if sorted:
+            return self._compile_ordered(array_value).to_sql(
+                col_id_overrides=col_id_overrides, sorted=True
+            )
+        return self._compile_unordered(array_value).to_sql(
+            col_id_overrides=col_id_overrides
+        )
+
+    def _compile_ordered(
+        self, array_value: core.ArrayValue
+    ) -> bigframes.core.compile.OrderedIR:
+        return bigframes.core.compile.compile_ordered(array_value.node)
+
+    def _compile_unordered(
+        self, array_value: core.ArrayValue
+    ) -> bigframes.core.compile.UnorderedIR:
+        return bigframes.core.compile.compile_unordered(array_value.node)
+
     def _get_table_size(self, destination_table):
         table = self.bqclient.get_table(destination_table)
         return table.num_bytes
diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index 26c5093b35..e6eb40a5fa 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -44,7 +44,7 @@ def test_read_gbq_tokyo(
     result = df.sort_index().to_pandas()
     expected = scalars_pandas_df_index
 
-    _, query_job = df._block.expr.start_query()
+    _, query_job = session_tokyo._execute(df._block.expr)
     assert query_job.location == tokyo_location
 
     pd.testing.assert_frame_equal(result, expected)
@@ -379,7 +379,7 @@ def test_read_pandas_tokyo(
     result = df.to_pandas()
     expected = scalars_pandas_df_index
 
-    _, query_job = df._block.expr.start_query()
+    _, query_job = session_tokyo._execute(df._block.expr)
     assert query_job.location == tokyo_location
 
     pd.testing.assert_frame_equal(result, expected)

From 6c1969a35fe720cf3a804006bcc9046ba554fcc3 Mon Sep 17 00:00:00 2001
From: Huan Chen <142538604+Genesis929@users.noreply.github.com>
Date: Mon, 18 Dec 2023 12:34:37 -0800
Subject: [PATCH 04/27] feat: Add IntervalIndex support to bigframes.pandas.cut
 (#254)

* feature: Add IntervalIndex support to bigframes.pandas.cut

* add bins <= 0 error in CutOp

* add type ignore

* add type ignore to session

---------

Co-authored-by: Shobhit Singh <shobs@google.com>
---
 bigframes/core/reshape/__init__.py            | 20 +++++--
 bigframes/operations/aggregations.py          | 41 ++++++++++-----
 bigframes/series.py                           |  2 +-
 bigframes/session/__init__.py                 |  4 +-
 tests/system/small/test_pandas.py             | 34 ++++++++++++
 .../pandas/core/reshape/tile.py               | 52 +++++++++++++++----
 6 files changed, 124 insertions(+), 29 deletions(-)

diff --git a/bigframes/core/reshape/__init__.py b/bigframes/core/reshape/__init__.py
index 24c1bff309..d9cc99a036 100644
--- a/bigframes/core/reshape/__init__.py
+++ b/bigframes/core/reshape/__init__.py
@@ -14,7 +14,9 @@
 from __future__ import annotations
 
 import typing
-from typing import Iterable, Literal, Optional, Union
+from typing import Iterable, Literal, Optional, Tuple, Union
+
+import pandas as pd
 
 import bigframes.constants as constants
 import bigframes.core as core
@@ -108,17 +110,29 @@ def concat(
 
 def cut(
     x: bigframes.series.Series,
-    bins: int,
+    bins: Union[
+        int,
+        pd.IntervalIndex,
+        Iterable[Tuple[Union[int, float], Union[int, float]]],
+    ],
     *,
     labels: Optional[bool] = None,
 ) -> bigframes.series.Series:
-    if bins <= 0:
+    if isinstance(bins, int) and bins <= 0:
         raise ValueError("`bins` should be a positive integer.")
 
+    if isinstance(bins, Iterable):
+        if not isinstance(bins, pd.IntervalIndex):
+            bins = pd.IntervalIndex.from_tuples(list(bins))
+
+        if bins.is_overlapping:
+            raise ValueError("Overlapping IntervalIndex is not accepted.")
+
     if labels is not False:
         raise NotImplementedError(
             f"Only labels=False is supported in BigQuery DataFrames so far. {constants.FEEDBACK_LINK}"
         )
+
     return x._apply_window_op(agg_ops.CutOp(bins), window_spec=core.WindowSpec())
 
 
diff --git a/bigframes/operations/aggregations.py b/bigframes/operations/aggregations.py
index 363dfe819d..8178ebfaea 100644
--- a/bigframes/operations/aggregations.py
+++ b/bigframes/operations/aggregations.py
@@ -20,6 +20,7 @@
 import ibis.expr.datatypes as ibis_dtypes
 import ibis.expr.types as ibis_types
 from pandas import Int64Dtype
+import pandas as pd
 
 import bigframes.constants as constants
 import bigframes.dtypes as dtypes
@@ -228,21 +229,37 @@ def skips_nulls(self):
 
 
 class CutOp(WindowOp):
-    def __init__(self, bins: int):
-        self._bins_ibis = dtypes.literal_to_ibis_scalar(bins, force_dtype=Int64Dtype())
-        self._bins_int = bins
+    def __init__(self, bins: typing.Union[int, pd.IntervalIndex]):
+        if isinstance(bins, int):
+            if not bins > 0:
+                raise ValueError("`bins` should be a positive integer.")
+            self._bins_int = bins
+            self._bins = dtypes.literal_to_ibis_scalar(bins, force_dtype=Int64Dtype())
+        else:
+            self._bins_int = 0
+            self._bins = bins
 
     def _as_ibis(self, x: ibis_types.Column, window=None):
-        col_min = _apply_window_if_present(x.min(), window)
-        col_max = _apply_window_if_present(x.max(), window)
-        bin_width = (col_max - col_min) / self._bins_ibis
         out = ibis.case()
-        for this_bin in range(self._bins_int - 1):
-            out = out.when(
-                x <= (col_min + (this_bin + 1) * bin_width),
-                dtypes.literal_to_ibis_scalar(this_bin, force_dtype=Int64Dtype()),
-            )
-        out = out.when(x.notnull(), self._bins_ibis - 1)
+
+        if self._bins_int > 0:
+            col_min = _apply_window_if_present(x.min(), window)
+            col_max = _apply_window_if_present(x.max(), window)
+            bin_width = (col_max - col_min) / self._bins
+
+            for this_bin in range(self._bins_int - 1):
+                out = out.when(
+                    x <= (col_min + (this_bin + 1) * bin_width),
+                    dtypes.literal_to_ibis_scalar(this_bin, force_dtype=Int64Dtype()),
+                )
+            out = out.when(x.notnull(), self._bins - 1)
+        else:
+            for interval in self._bins:
+                condition = (x > interval.left) & (x <= interval.right)
+                interval_struct = ibis.struct(
+                    {"left_exclusive": interval.left, "right_inclusive": interval.right}
+                )
+                out = out.when(condition, interval_struct)
         return out.end()
 
     @property
diff --git a/bigframes/series.py b/bigframes/series.py
index c929775a00..8d8c711c92 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -1324,7 +1324,7 @@ def to_csv(self, path_or_buf=None, **kwargs) -> typing.Optional[str]:
         return self.to_pandas().to_csv(path_or_buf, **kwargs)
 
     def to_dict(self, into: type[dict] = dict) -> typing.Mapping:
-        return typing.cast(dict, self.to_pandas().to_dict(into))
+        return typing.cast(dict, self.to_pandas().to_dict(into))  # type: ignore
 
     def to_excel(self, excel_writer, sheet_name="Sheet1", **kwargs) -> None:
         return self.to_pandas().to_excel(excel_writer, sheet_name, **kwargs)
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index a57f7b94c5..fbe900106a 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -1017,13 +1017,13 @@ def read_csv(
                 header=header,
                 names=names,
                 index_col=index_col,
-                usecols=usecols,
+                usecols=usecols,  # type: ignore
                 dtype=dtype,
                 engine=engine,
                 encoding=encoding,
                 **kwargs,
             )
-            return self.read_pandas(pandas_df)
+            return self.read_pandas(pandas_df)  # type: ignore
 
     def read_pickle(
         self,
diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py
index a1079288cf..282c0d68eb 100644
--- a/tests/system/small/test_pandas.py
+++ b/tests/system/small/test_pandas.py
@@ -365,6 +365,40 @@ def test_cut(scalars_dfs):
     pd.testing.assert_series_equal(bf_result, pd_result)
 
 
+@pytest.mark.parametrize(
+    ("bins",),
+    [
+        ([(-5, 2), (2, 3), (-3000, -10)],),
+        (pd.IntervalIndex.from_tuples([(1, 2), (2, 3), (4, 5)]),),
+    ],
+)
+def test_cut_with_interval(scalars_dfs, bins):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = bpd.cut(scalars_df["int64_too"], bins, labels=False).to_pandas()
+
+    if isinstance(bins, list):
+        bins = pd.IntervalIndex.from_tuples(bins)
+    pd_result = pd.cut(scalars_pandas_df["int64_too"], bins, labels=False)
+
+    # Convert to match data format
+    pd_result_converted = pd.Series(
+        [
+            {"left_exclusive": interval.left, "right_inclusive": interval.right}
+            if pd.notna(val)
+            else pd.NA
+            for val, interval in zip(
+                pd_result, pd_result.cat.categories[pd_result.cat.codes]
+            )
+        ],
+        name=pd_result.name,
+    )
+    pd_result.index = pd_result.index.astype("Int64")
+
+    pd.testing.assert_series_equal(
+        bf_result, pd_result_converted, check_index=False, check_dtype=False
+    )
+
+
 @pytest.mark.parametrize(
     ("q",),
     [
diff --git a/third_party/bigframes_vendored/pandas/core/reshape/tile.py b/third_party/bigframes_vendored/pandas/core/reshape/tile.py
index d4471ed68e..55975c3fc1 100644
--- a/third_party/bigframes_vendored/pandas/core/reshape/tile.py
+++ b/third_party/bigframes_vendored/pandas/core/reshape/tile.py
@@ -24,31 +24,61 @@ def cut(
 
     ``labels=False`` implies you just want the bins back.
 
-    Examples:
-
-    .. code-block::
-
-        import bigframes.pandas as pd
+    **Examples:**
+
+        >>> import bigframes.pandas as bpd
+        >>> bpd.options.display.progress_bar = None
+        >>> s = bpd.Series([0, 1, 5, 10])
+        >>> s
+        0     0
+        1     1
+        2     5
+        3    10
+        dtype: Int64
 
-        pd.options.display.progress_bar = None
-        s = pd.Series([0, 1, 1, 2])
-        pd.cut(s, bins=4, labels=False)
+    Cut with an integer (equal-width bins):
 
+        >>> bpd.cut(s, bins=4, labels=False)
         0    0
-        1    1
+        1    0
         2    1
         3    3
         dtype: Int64
 
+    Cut with pd.IntervalIndex, requires importing pandas for IntervalIndex:
+
+        >>> import pandas as pd
+
+        >>> interval_index = pd.IntervalIndex.from_tuples([(0, 1), (1, 5), (5, 20)])
+        >>> bpd.cut(s, bins=interval_index, labels=False)
+        0                                            <NA>
+        1     {'left_exclusive': 0, 'right_inclusive': 1}
+        2     {'left_exclusive': 1, 'right_inclusive': 5}
+        3    {'left_exclusive': 5, 'right_inclusive': 20}
+        dtype: struct<left_exclusive: int64, right_inclusive: int64>[pyarrow]
+
+    Cut with an iterable of tuples:
+
+        >>> bins_tuples = [(0, 1), (1, 4), (5, 20)]
+        >>> bpd.cut(s, bins=bins_tuples, labels=False)
+        0                                            <NA>
+        1     {'left_exclusive': 0, 'right_inclusive': 1}
+        2                                            <NA>
+        3    {'left_exclusive': 5, 'right_inclusive': 20}
+        dtype: struct<left_exclusive: int64, right_inclusive: int64>[pyarrow]
+
     Args:
         x (Series):
             The input Series to be binned. Must be 1-dimensional.
-        bins (int):
+        bins (int, pd.IntervalIndex, Iterable[Tuple[Union[int, float], Union[int, float]]]):
             The criteria to bin by.
 
-            int : Defines the number of equal-width bins in the range of `x`. The
+            int: Defines the number of equal-width bins in the range of `x`. The
             range of `x` is extended by .1% on each side to include the minimum
             and maximum values of `x`.
+
+            pd.IntervalIndex or Iterable of tuples: Defines the exact bins to be used.
+            It's important to ensure that these bins are non-overlapping.
         labels (None):
             Specifies the labels for the returned bins. Must be the same length as
             the resulting bins. If False, returns only integer indicators of the

From bac62f76af1af6ca8834c3690c7c79aeb12dd331 Mon Sep 17 00:00:00 2001
From: Huan Chen <142538604+Genesis929@users.noreply.github.com>
Date: Mon, 18 Dec 2023 14:47:39 -0800
Subject: [PATCH 05/27] fix: dataframes to_gbq now creates dataset if it
 doesn't exist (#222)

* "fix: dataframes to_gbq now creates dataset if it doesn't exist

* fix: dataframes to_gbq now creates dataset if it doesn't exist

* fix: dataframes to_gbq now creates dataset if it doesn't exist

* update test

* update create dataset method.

* fix

---------

Co-authored-by: Shobhit Singh <shobs@google.com>
---
 bigframes/dataframe.py               | 17 +++++++++++++++--
 tests/system/small/test_dataframe.py | 15 +++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index d777784f64..1251e64fb0 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -34,6 +34,7 @@
     Union,
 )
 
+import google.api_core.exceptions
 import google.cloud.bigquery as bigquery
 import numpy
 import pandas
@@ -2508,7 +2509,14 @@ def to_gbq(
                 )
             if_exists = "replace"
 
-        if "." not in destination_table:
+        table_parts = destination_table.split(".")
+        default_project = self._block.expr.session.bqclient.project
+
+        if len(table_parts) == 2:
+            destination_dataset = f"{default_project}.{table_parts[0]}"
+        elif len(table_parts) == 3:
+            destination_dataset = f"{table_parts[0]}.{table_parts[1]}"
+        else:
             raise ValueError(
                 f"Got invalid value for destination_table {repr(destination_table)}. "
                 "Should be of the form 'datasetId.tableId' or 'projectId.datasetId.tableId'."
@@ -2523,11 +2531,16 @@ def to_gbq(
                 f"Valid options include None or one of {dispositions.keys()}."
             )
 
+        try:
+            self._session.bqclient.get_dataset(destination_dataset)
+        except google.api_core.exceptions.NotFound:
+            self._session.bqclient.create_dataset(destination_dataset, exists_ok=True)
+
         job_config = bigquery.QueryJobConfig(
             write_disposition=dispositions[if_exists],
             destination=bigquery.table.TableReference.from_string(
                 destination_table,
-                default_project=self._block.expr.session.bqclient.project,
+                default_project=default_project,
             ),
         )
 
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 663a7ceb49..ab68543d91 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -3683,3 +3683,18 @@ def test_to_pandas_downsampling_option_override(session):
     total_memory_bytes = df.memory_usage(deep=True).sum()
     total_memory_mb = total_memory_bytes / (1024 * 1024)
     assert total_memory_mb == pytest.approx(download_size, rel=0.3)
+
+
+def test_to_gbq_and_create_dataset(session, scalars_df_index, dataset_id_not_created):
+    dataset_id = dataset_id_not_created
+    destination_table = f"{dataset_id}.scalars_df"
+
+    result_table = scalars_df_index.to_gbq(destination_table)
+    assert (
+        result_table == destination_table
+        if destination_table
+        else result_table is not None
+    )
+
+    loaded_scalars_df_index = session.read_gbq(result_table)
+    assert not loaded_scalars_df_index.empty

From dab2f2cb8720ec68413a71bddaeadc1ec40b6541 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Tue, 19 Dec 2023 08:27:16 -0600
Subject: [PATCH 06/27] chore: use latest pip in nightly builds (#281)

* chore: use latest pip in nightly builds

* restrict ibis version

* update prerelease tests too
---
 .kokoro/release-nightly.sh | 3 +++
 noxfile.py                 | 2 +-
 setup.py                   | 3 ++-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/.kokoro/release-nightly.sh b/.kokoro/release-nightly.sh
index 0751cf2502..5624df3b8d 100755
--- a/.kokoro/release-nightly.sh
+++ b/.kokoro/release-nightly.sh
@@ -55,6 +55,9 @@ rm -rf build dist
 # internal issue b/261050975.
 git config --global --add safe.directory "${PROJECT_ROOT}"
 
+# Workaround for older pip not able to resolve dependencies. See internal
+# issue 316909553.
+python3.10 -m pip install pip==23.3.2
 python3.10 -m pip install --require-hashes -r .kokoro/requirements.txt
 
 # Disable buffering, so that the logs stream through.
diff --git a/noxfile.py b/noxfile.py
index c0ec3b0c54..c4bbd7a65a 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -535,7 +535,7 @@ def prerelease(session: nox.sessions.Session, tests_path):
     session.install(
         "--upgrade",
         # "--pre",
-        "ibis-framework>=7.1.0,<8.0.0dev",
+        "ibis-framework>=7.1.0,<7.2.0dev",
     )
     already_installed.add("ibis-framework")
 
diff --git a/setup.py b/setup.py
index 1ad4bbd3eb..9aaaaae04f 100644
--- a/setup.py
+++ b/setup.py
@@ -43,7 +43,8 @@
     "google-cloud-iam >=2.12.1",
     "google-cloud-resource-manager >=1.10.3",
     "google-cloud-storage >=2.0.0",
-    "ibis-framework[bigquery] >=7.1.0,<8.0.0dev",
+    # TODO: Relax upper bound once we have fixed unit tests with 7.2.0.
+    "ibis-framework[bigquery] >=7.1.0,<7.2.0dev",
     # TODO: Relax upper bound once we have fixed `system_prerelease` tests.
     "pandas >=1.5.0,<2.1.4",
     "pydata-google-auth >=1.8.2",

From 5092215767d77c90b132e9cd6b3e3749827ebe09 Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Tue, 19 Dec 2023 11:47:25 -0800
Subject: [PATCH 07/27] feat: add replace method to DataFrame (#261)

* feat: add replace method to DataFrame

* remove unwanted change to describe method

* better docs

* is_patype docstring

* docstring fix

* mypy fix
---
 bigframes/dataframe.py                        | 15 ++++
 bigframes/dtypes.py                           | 48 ++++++++++
 bigframes/operations/__init__.py              | 14 +++
 bigframes/series.py                           | 87 +++++++++++-------
 tests/system/small/test_dataframe.py          | 44 ++++++++++
 .../bigframes_vendored/pandas/core/frame.py   | 88 +++++++++++++++++++
 6 files changed, 265 insertions(+), 31 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 1251e64fb0..1d8169960b 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -1562,6 +1562,21 @@ def interpolate(self, method: str = "linear") -> DataFrame:
     def fillna(self, value=None) -> DataFrame:
         return self._apply_binop(value, ops.fillna_op, how="left")
 
+    def replace(
+        self, to_replace: typing.Any, value: typing.Any = None, *, regex: bool = False
+    ):
+        if utils.is_dict_like(value):
+            return self.apply(
+                lambda x: x.replace(
+                    to_replace=to_replace, value=value[x.name], regex=regex
+                )
+                if (x.name in value)
+                else x
+            )
+        return self.apply(
+            lambda x: x.replace(to_replace=to_replace, value=value, regex=regex)
+        )
+
     def ffill(self, *, limit: typing.Optional[int] = None) -> DataFrame:
         window = bigframes.core.WindowSpec(preceding=limit, following=0)
         return self._apply_window_op(agg_ops.LastNonNullOp(), window)
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 774eb74d06..6dfcc17f37 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -14,6 +14,7 @@
 
 """Mappings for Pandas dtypes supported by BigQuery DataFrames package"""
 
+import datetime
 import textwrap
 import typing
 from typing import Any, Dict, Iterable, Literal, Tuple, Union
@@ -437,3 +438,50 @@ def to_pandas_dtypes_overrides(schema: Iterable[bigquery.SchemaField]) -> Dict:
                 gcb3p_pandas_helpers.bq_to_arrow_data_type(field)
             )
     return dtypes
+
+
+def is_dtype(scalar: typing.Any, dtype: Dtype) -> bool:
+    """Captures whether a scalar can be losslessly represented by a dtype."""
+    if scalar is None:
+        return True
+    if pd.api.types.is_bool_dtype(dtype):
+        return pd.api.types.is_bool(scalar)
+    if pd.api.types.is_float_dtype(dtype):
+        return pd.api.types.is_float(scalar)
+    if pd.api.types.is_integer_dtype(dtype):
+        return pd.api.types.is_integer(scalar)
+    if isinstance(dtype, pd.StringDtype):
+        return isinstance(scalar, str)
+    if isinstance(dtype, pd.ArrowDtype):
+        pa_type = dtype.pyarrow_dtype
+        return is_patype(scalar, pa_type)
+    return False
+
+
+def is_patype(scalar: typing.Any, pa_type: pa.DataType) -> bool:
+    """Determine whether a scalar's type matches a given pyarrow type."""
+    if pa_type == pa.time64("us"):
+        return isinstance(scalar, datetime.time)
+    if pa_type == pa.timestamp("us"):
+        if isinstance(scalar, datetime.datetime):
+            return not scalar.tzinfo
+        if isinstance(scalar, pd.Timestamp):
+            return not scalar.tzinfo
+    if pa_type == pa.timestamp("us", tz="UTC"):
+        if isinstance(scalar, datetime.datetime):
+            return scalar.tzinfo == datetime.timezone.utc
+        if isinstance(scalar, pd.Timestamp):
+            return scalar.tzinfo == datetime.timezone.utc
+    if pa_type == pa.date32():
+        return isinstance(scalar, datetime.date)
+    return False
+
+
+def is_comparable(scalar: typing.Any, dtype: Dtype) -> bool:
+    """Whether scalar can be compare to items of dtype (though maybe requiring coercion)"""
+    if is_dtype(scalar, dtype):
+        return True
+    elif pd.api.types.is_numeric_dtype(dtype):
+        return pd.api.types.is_number(scalar)
+    else:
+        return False
diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py
index 0655aafdb3..753870a42d 100644
--- a/bigframes/operations/__init__.py
+++ b/bigframes/operations/__init__.py
@@ -523,6 +523,20 @@ def _as_ibis(self, x: ibis_types.Value):
         return bigframes.dtypes.cast_ibis_value(x, self.to_type)
 
 
+class MapOp(UnaryOp):
+    def __init__(
+        self,
+        mappings: typing.Tuple[typing.Tuple[typing.Hashable, typing.Hashable], ...],
+    ):
+        self._mappings = mappings
+
+    def _as_ibis(self, x: ibis_types.Value):
+        case = ibis.case()
+        for mapping in self._mappings:
+            case = case.when(x == mapping[0], mapping[1])
+        return case.else_(x).end()
+
+
 class FindOp(UnaryOp):
     def __init__(self, sub, start, end):
         self._sub = sub
diff --git a/bigframes/series.py b/bigframes/series.py
index 8d8c711c92..1b9982877a 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -442,42 +442,67 @@ def replace(
         self, to_replace: typing.Any, value: typing.Any = None, *, regex: bool = False
     ):
         if regex:
-            if not (isinstance(to_replace, str) and isinstance(value, str)):
-                raise NotImplementedError(
-                    f"replace regex mode only supports strings for 'to_replace' and 'value'. {constants.FEEDBACK_LINK}"
-                )
-            block, result_col = self._block.apply_unary_op(
-                self._value_column,
-                ops.ReplaceRegexOp(to_replace, value),
-                result_label=self.name,
-            )
-            return Series(block.select_column(result_col))
+            # No-op unless to_replace and series dtype are both string type
+            if not isinstance(to_replace, str) or not isinstance(
+                self.dtype, pandas.StringDtype
+            ):
+                return self
+            return self._regex_replace(to_replace, value)
         elif utils.is_dict_like(to_replace):
-            raise NotImplementedError(
-                f"Dict 'to_replace' not supported. {constants.FEEDBACK_LINK}"
-            )
+            return self._mapping_replace(to_replace)  # type: ignore
         elif utils.is_list_like(to_replace):
-            block, cond = self._block.apply_unary_op(
-                self._value_column, ops.IsInOp(to_replace)
-            )
-            block, result_col = block.apply_binary_op(
-                cond,
-                self._value_column,
-                ops.partial_arg1(ops.where_op, value),
-                result_label=self.name,
-            )
-            return Series(block.select_column(result_col))
+            replace_list = to_replace
         else:  # Scalar
-            block, cond = self._block.apply_unary_op(
-                self._value_column, ops.BinopPartialLeft(ops.eq_op, to_replace)
+            replace_list = [to_replace]
+        replace_list = [
+            i for i in replace_list if bigframes.dtypes.is_comparable(i, self.dtype)
+        ]
+        return self._simple_replace(replace_list, value) if replace_list else self
+
+    def _regex_replace(self, to_replace: str, value: str):
+        if not bigframes.dtypes.is_dtype(value, self.dtype):
+            raise NotImplementedError(
+                f"Cannot replace {self.dtype} elements with incompatible item {value} as mixed-type columns not supported. {constants.FEEDBACK_LINK}"
             )
-            block, result_col = block.apply_binary_op(
-                cond,
-                self._value_column,
-                ops.partial_arg1(ops.where_op, value),
-                result_label=self.name,
+        block, result_col = self._block.apply_unary_op(
+            self._value_column,
+            ops.ReplaceRegexOp(to_replace, value),
+            result_label=self.name,
+        )
+        return Series(block.select_column(result_col))
+
+    def _simple_replace(self, to_replace_list: typing.Sequence, value):
+        if not bigframes.dtypes.is_dtype(value, self.dtype):
+            raise NotImplementedError(
+                f"Cannot replace {self.dtype} elements with incompatible item {value} as mixed-type columns not supported. {constants.FEEDBACK_LINK}"
             )
-            return Series(block.select_column(result_col))
+
+        block, cond = self._block.apply_unary_op(
+            self._value_column, ops.IsInOp(to_replace_list)
+        )
+        block, result_col = block.apply_binary_op(
+            cond,
+            self._value_column,
+            ops.partial_arg1(ops.where_op, value),
+            result_label=self.name,
+        )
+        return Series(block.select_column(result_col))
+
+    def _mapping_replace(self, mapping: dict[typing.Hashable, typing.Hashable]):
+        tuples = []
+        for key, value in mapping.items():
+            if not bigframes.dtypes.is_comparable(key, self.dtype):
+                continue
+            if not bigframes.dtypes.is_dtype(value, self.dtype):
+                raise NotImplementedError(
+                    f"Cannot replace {self.dtype} elements with incompatible item {value} as mixed-type columns not supported. {constants.FEEDBACK_LINK}"
+                )
+            tuples.append((key, value))
+
+        block, result = self._block.apply_unary_op(
+            self._value_column, ops.MapOp(tuple(tuples))
+        )
+        return Series(block.select_column(result))
 
     def interpolate(self, method: str = "linear") -> Series:
         if method == "pad":
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index ab68543d91..ed78e73e5d 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -839,6 +839,50 @@ def test_df_fillna(scalars_dfs):
     pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
+def test_df_replace_scalar_scalar(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df.replace("Hello, World!", "Howdy, Planet!").to_pandas()
+    pd_result = scalars_pandas_df.replace("Hello, World!", "Howdy, Planet!")
+
+    pd.testing.assert_frame_equal(
+        pd_result,
+        bf_result,
+    )
+
+
+def test_df_replace_regex_scalar(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df.replace("^H.l", "Howdy, Planet!", regex=True).to_pandas()
+    pd_result = scalars_pandas_df.replace("^H.l", "Howdy, Planet!", regex=True)
+
+    pd.testing.assert_frame_equal(
+        pd_result,
+        bf_result,
+    )
+
+
+def test_df_replace_list_scalar(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df.replace(["Hello, World!", "T"], "Howdy, Planet!").to_pandas()
+    pd_result = scalars_pandas_df.replace(["Hello, World!", "T"], "Howdy, Planet!")
+
+    pd.testing.assert_frame_equal(
+        pd_result,
+        bf_result,
+    )
+
+
+def test_df_replace_value_dict(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df.replace(1, {"int64_col": 100, "int64_too": 200}).to_pandas()
+    pd_result = scalars_pandas_df.replace(1, {"int64_col": 100, "int64_too": 200})
+
+    pd.testing.assert_frame_equal(
+        pd_result,
+        bf_result,
+    )
+
+
 def test_df_ffill(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_result = scalars_df[["int64_col", "float64_col"]].ffill(limit=1).to_pandas()
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index c082b87336..00be9e5e9e 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -4356,6 +4356,94 @@ def fillna(self, value):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def replace(
+        self,
+        to_replace,
+        value=None,
+        *,
+        regex=False,
+    ):
+        """
+        Replace values given in `to_replace` with `value`.
+
+        Values of the Series/DataFrame are replaced with other values dynamically.
+        This differs from updating with ``.loc`` or ``.iloc``, which require
+        you to specify a location to update with some value.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({
+            ...     'int_col': [1, 1, 2, 3],
+            ...     'string_col': ["a", "b", "c", "b"],
+            ...     })
+
+        Using scalar `to_replace` and `value`:
+
+            >>> df.replace("b", "e")
+               int_col string_col
+            0        1          a
+            1        1          e
+            2        2          c
+            3        3          e
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+        Using dictionary:
+
+            >>> df.replace({"a": "e", 2: 5})
+               int_col string_col
+            0        1          e
+            1        1          b
+            2        5          c
+            3        3          b
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+        Using regex:
+
+            >>> df.replace("[ab]", "e", regex=True)
+               int_col string_col
+            0        1          e
+            1        1          e
+            2        2          c
+            3        3          e
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+
+        Args:
+            to_replace (str, regex, list, int, float or None):
+                How to find the values that will be replaced.
+                numeric: numeric values equal to `to_replace` will be replaced with `value`
+                str: string exactly matching `to_replace` will be replaced with `value`
+                regex: regexs matching `to_replace` will be replaced with`value`
+                list of str, regex, or numeric:
+                First, if `to_replace` and `value` are both lists, they **must** be the same length.
+                Second, if ``regex=True`` then all of the strings in **both**
+                lists will be interpreted as regexs otherwise they will match
+                directly. This doesn't matter much for `value` since there
+                are only a few possible substitution regexes you can use.
+                str, regex and numeric rules apply as above.
+
+            value (scalar, default None):
+                Value to replace any values matching `to_replace` with.
+                For a DataFrame a dict of values can be used to specify which
+                value to use for each column (columns not in the dict will not be
+                filled). Regular expressions, strings and lists or dicts of such
+                objects are also allowed.
+            regex (bool, default False):
+                Whether to interpret `to_replace` and/or `value` as regular
+                expressions. If this is ``True`` then `to_replace` *must* be a
+                string.
+
+        Returns:
+            Series/DataFrame: Object after replacement.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     @property
     def iloc(self):
         """Purely integer-location based indexing for selection by position."""

From ab493506e71ed8970a11fe2f88b2145150e09291 Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Tue, 19 Dec 2023 12:32:15 -0800
Subject: [PATCH 08/27] fix: fix DataFrameGroupby.agg() issue with
 as_index=False (#273)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #271 🦕
---
 bigframes/core/block_transforms.py    |  1 -
 bigframes/core/blocks.py              | 53 ++++++++-------------------
 bigframes/core/groupby/__init__.py    | 30 ++++++++++-----
 bigframes/dataframe.py                |  8 ++--
 bigframes/series.py                   | 16 ++++----
 tests/system/small/test_groupby.py    | 17 +++++++--
 tests/system/small/test_multiindex.py | 11 +++++-
 7 files changed, 70 insertions(+), 66 deletions(-)

diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py
index df84f70859..6654892287 100644
--- a/bigframes/core/block_transforms.py
+++ b/bigframes/core/block_transforms.py
@@ -332,7 +332,6 @@ def value_counts(
         by_column_ids=columns,
         aggregations=[(dummy, agg_ops.count_op)],
         dropna=dropna,
-        as_index=True,
     )
     count_id = agg_ids[0]
     if normalize:
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 6542b694d2..3163aa5b09 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -66,7 +66,7 @@
 _MONOTONIC_DECREASING = "monotonic_decreasing"
 
 
-LevelType = typing.Union[str, int]
+LevelType = typing.Hashable
 LevelsType = typing.Union[LevelType, typing.Sequence[LevelType]]
 
 
@@ -941,7 +941,6 @@ def aggregate(
         by_column_ids: typing.Sequence[str] = (),
         aggregations: typing.Sequence[typing.Tuple[str, agg_ops.AggregateOp]] = (),
         *,
-        as_index: bool = True,
         dropna: bool = True,
     ) -> typing.Tuple[Block, typing.Sequence[str]]:
         """
@@ -962,40 +961,21 @@ def aggregate(
         aggregate_labels = self._get_labels_for_columns(
             [agg[0] for agg in aggregations]
         )
-        if as_index:
-            names: typing.List[Label] = []
-            for by_col_id in by_column_ids:
-                if by_col_id in self.value_columns:
-                    names.append(self.col_id_to_label[by_col_id])
-                else:
-                    names.append(self.col_id_to_index_name[by_col_id])
-            return (
-                Block(
-                    result_expr,
-                    index_columns=by_column_ids,
-                    column_labels=aggregate_labels,
-                    index_labels=names,
-                ),
-                output_col_ids,
-            )
-        else:  # as_index = False
-            # If as_index=False, drop grouping levels, but keep grouping value columns
-            by_value_columns = [
-                col for col in by_column_ids if col in self.value_columns
-            ]
-            by_column_labels = self._get_labels_for_columns(by_value_columns)
-            labels = (*by_column_labels, *aggregate_labels)
-            offsets_id = guid.generate_guid()
-            result_expr_pruned = result_expr.select_columns(
-                [*by_value_columns, *output_col_ids]
-            ).promote_offsets(offsets_id)
-
-            return (
-                Block(
-                    result_expr_pruned, index_columns=[offsets_id], column_labels=labels
-                ),
-                output_col_ids,
-            )
+        names: typing.List[Label] = []
+        for by_col_id in by_column_ids:
+            if by_col_id in self.value_columns:
+                names.append(self.col_id_to_label[by_col_id])
+            else:
+                names.append(self.col_id_to_index_name[by_col_id])
+        return (
+            Block(
+                result_expr,
+                index_columns=by_column_ids,
+                column_labels=aggregate_labels,
+                index_labels=names,
+            ),
+            output_col_ids,
+        )
 
     def get_stat(self, column_id: str, stat: agg_ops.AggregateOp):
         """Gets aggregates immediately, and caches it"""
@@ -1324,7 +1304,6 @@ def pivot(
         result_block, _ = block.aggregate(
             by_column_ids=self.index_columns,
             aggregations=aggregations,
-            as_index=True,
             dropna=True,
         )
 
diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py
index a8b8afdae7..3ee46ef675 100644
--- a/bigframes/core/groupby/__init__.py
+++ b/bigframes/core/groupby/__init__.py
@@ -263,10 +263,10 @@ def _agg_string(self, func: str) -> df.DataFrame:
         agg_block, _ = self._block.aggregate(
             by_column_ids=self._by_col_ids,
             aggregations=aggregations,
-            as_index=self._as_index,
             dropna=self._dropna,
         )
-        return df.DataFrame(agg_block)
+        dataframe = df.DataFrame(agg_block)
+        return dataframe if self._as_index else self._convert_index(dataframe)
 
     def _agg_dict(self, func: typing.Mapping) -> df.DataFrame:
         aggregations: typing.List[typing.Tuple[str, agg_ops.AggregateOp]] = []
@@ -285,7 +285,6 @@ def _agg_dict(self, func: typing.Mapping) -> df.DataFrame:
         agg_block, _ = self._block.aggregate(
             by_column_ids=self._by_col_ids,
             aggregations=aggregations,
-            as_index=self._as_index,
             dropna=self._dropna,
         )
         if want_aggfunc_level:
@@ -297,7 +296,8 @@ def _agg_dict(self, func: typing.Mapping) -> df.DataFrame:
             )
         else:
             agg_block = agg_block.with_column_labels(pd.Index(column_labels))
-        return df.DataFrame(agg_block)
+        dataframe = df.DataFrame(agg_block)
+        return dataframe if self._as_index else self._convert_index(dataframe)
 
     def _agg_list(self, func: typing.Sequence) -> df.DataFrame:
         aggregations = [
@@ -311,7 +311,6 @@ def _agg_list(self, func: typing.Sequence) -> df.DataFrame:
         agg_block, _ = self._block.aggregate(
             by_column_ids=self._by_col_ids,
             aggregations=aggregations,
-            as_index=self._as_index,
             dropna=self._dropna,
         )
         agg_block = agg_block.with_column_labels(
@@ -319,7 +318,8 @@ def _agg_list(self, func: typing.Sequence) -> df.DataFrame:
                 column_labels, names=[*self._block.column_labels.names, None]
             )
         )
-        return df.DataFrame(agg_block)
+        dataframe = df.DataFrame(agg_block)
+        return dataframe if self._as_index else self._convert_index(dataframe)
 
     def _agg_named(self, **kwargs) -> df.DataFrame:
         aggregations = []
@@ -339,11 +339,21 @@ def _agg_named(self, **kwargs) -> df.DataFrame:
         agg_block, _ = self._block.aggregate(
             by_column_ids=self._by_col_ids,
             aggregations=aggregations,
-            as_index=self._as_index,
             dropna=self._dropna,
         )
         agg_block = agg_block.with_column_labels(column_labels)
-        return df.DataFrame(agg_block)
+        dataframe = df.DataFrame(agg_block)
+        return dataframe if self._as_index else self._convert_index(dataframe)
+
+    def _convert_index(self, dataframe: df.DataFrame):
+        """Convert index levels to columns except where names conflict."""
+        levels_to_drop = [
+            level for level in dataframe.index.names if level in dataframe.columns
+        ]
+
+        if len(levels_to_drop) == dataframe.index.nlevels:
+            return dataframe.reset_index(drop=True)
+        return dataframe.droplevel(levels_to_drop).reset_index(drop=False)
 
     aggregate = agg
 
@@ -379,10 +389,10 @@ def _aggregate_all(
         result_block, _ = self._block.aggregate(
             by_column_ids=self._by_col_ids,
             aggregations=aggregations,
-            as_index=self._as_index,
             dropna=self._dropna,
         )
-        return df.DataFrame(result_block)
+        dataframe = df.DataFrame(result_block)
+        return dataframe if self._as_index else self._convert_index(dataframe)
 
     def _apply_window_op(
         self,
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 1d8169960b..98aa8f1185 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -72,7 +72,7 @@
 # TODO(tbergeron): Convert to bytes-based limit
 MAX_INLINE_DF_SIZE = 5000
 
-LevelType = typing.Union[str, int]
+LevelType = typing.Hashable
 LevelsType = typing.Union[LevelType, typing.Sequence[LevelType]]
 SingleItemValue = Union[bigframes.series.Series, int, float, Callable]
 
@@ -1956,7 +1956,7 @@ def _stack_mono(self):
 
     def _stack_multi(self, level: LevelsType = -1):
         n_levels = self.columns.nlevels
-        if isinstance(level, int) or isinstance(level, str):
+        if not utils.is_list_like(level):
             level = [level]
         level_indices = []
         for level_ref in level:
@@ -1966,7 +1966,7 @@ def _stack_multi(self, level: LevelsType = -1):
                 else:
                     level_indices.append(level_ref)
             else:  # str
-                level_indices.append(self.columns.names.index(level_ref))
+                level_indices.append(self.columns.names.index(level_ref))  # type: ignore
 
         new_order = [
             *[i for i in range(n_levels) if i not in level_indices],
@@ -1982,7 +1982,7 @@ def _stack_multi(self, level: LevelsType = -1):
         return DataFrame(block)
 
     def unstack(self, level: LevelsType = -1):
-        if isinstance(level, int) or isinstance(level, str):
+        if not utils.is_list_like(level):
             level = [level]
 
         block = self._block
diff --git a/bigframes/series.py b/bigframes/series.py
index 1b9982877a..6837c1c7f8 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -841,7 +841,6 @@ def mode(self) -> Series:
         block, agg_ids = block.aggregate(
             by_column_ids=[self._value_column],
             aggregations=((self._value_column, agg_ops.count_op),),
-            as_index=False,
         )
         value_count_col_id = agg_ids[0]
         block, max_value_count_col_id = block.apply_window_op(
@@ -855,14 +854,15 @@ def mode(self) -> Series:
             ops.eq_op,
         )
         block = block.filter(is_mode_col_id)
-        mode_values_series = Series(
-            block.select_column(self._value_column).assign_label(
-                self._value_column, self.name
-            )
-        )
-        return typing.cast(
-            Series, mode_values_series.sort_values().reset_index(drop=True)
+        # use temporary name for reset_index to avoid collision, restore after dropping extra columns
+        block = (
+            block.with_index_labels(["mode_temp_internal"])
+            .order_by([OrderingColumnReference(self._value_column)])
+            .reset_index(drop=False)
         )
+        block = block.select_column(self._value_column).with_column_labels([self.name])
+        mode_values_series = Series(block.select_column(self._value_column))
+        return typing.cast(Series, mode_values_series)
 
     def mean(self) -> float:
         return typing.cast(float, self._apply_aggregation(agg_ops.mean_op))
diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py
index 5214905186..2919c167ef 100644
--- a/tests/system/small/test_groupby.py
+++ b/tests/system/small/test_groupby.py
@@ -122,23 +122,32 @@ def test_dataframe_groupby_agg_list(scalars_df_index, scalars_pandas_df_index):
     pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False)
 
 
+@pytest.mark.parametrize(
+    ("as_index"),
+    [
+        (True),
+        (False),
+    ],
+)
 def test_dataframe_groupby_agg_dict_with_list(
-    scalars_df_index, scalars_pandas_df_index
+    scalars_df_index, scalars_pandas_df_index, as_index
 ):
     col_names = ["int64_too", "float64_col", "int64_col", "bool_col", "string_col"]
     bf_result = (
         scalars_df_index[col_names]
-        .groupby("string_col")
+        .groupby("string_col", as_index=as_index)
         .agg({"int64_too": ["mean", "max"], "string_col": "count"})
     )
     pd_result = (
         scalars_pandas_df_index[col_names]
-        .groupby("string_col")
+        .groupby("string_col", as_index=as_index)
         .agg({"int64_too": ["mean", "max"], "string_col": "count"})
     )
     bf_result_computed = bf_result.to_pandas()
 
-    pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False)
+    pd.testing.assert_frame_equal(
+        pd_result, bf_result_computed, check_dtype=False, check_index_type=False
+    )
 
 
 def test_dataframe_groupby_agg_dict_no_lists(scalars_df_index, scalars_pandas_df_index):
diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py
index e7e93849c6..1708735f4c 100644
--- a/tests/system/small/test_multiindex.py
+++ b/tests/system/small/test_multiindex.py
@@ -356,17 +356,24 @@ def test_multi_index_dataframe_groupby(scalars_df_index, scalars_pandas_df_index
 def test_multi_index_dataframe_groupby_level_aggregate(
     scalars_df_index, scalars_pandas_df_index, level, as_index
 ):
+    index_cols = ["int64_too", "bool_col"]
     bf_result = (
-        scalars_df_index.set_index(["int64_too", "bool_col"])
+        scalars_df_index.set_index(index_cols)
         .groupby(level=level, as_index=as_index)
         .mean(numeric_only=True)
         .to_pandas()
     )
     pd_result = (
-        scalars_pandas_df_index.set_index(["int64_too", "bool_col"])
+        scalars_pandas_df_index.set_index(index_cols)
         .groupby(level=level, as_index=as_index)
         .mean(numeric_only=True)
     )
+    # For as_index=False, pandas will drop index levels used as groupings
+    # In the future, it will include this in the result, bigframes already does this behavior
+    if not as_index:
+        for col in index_cols:
+            if col in bf_result.columns:
+                bf_result = bf_result.drop(col, axis=1)
 
     # Pandas will have int64 index, while bigquery will have Int64 when resetting
     pandas.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)

From b36ce472678d2680bc8468d1e589b8bde3fde1fe Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Tue, 19 Dec 2023 13:22:15 -0800
Subject: [PATCH 09/27] chore: Remove symlink that breaks local testing (#274)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 docs/samples | 1 -
 1 file changed, 1 deletion(-)
 delete mode 120000 docs/samples

diff --git a/docs/samples b/docs/samples
deleted file mode 120000
index e804737ed3..0000000000
--- a/docs/samples
+++ /dev/null
@@ -1 +0,0 @@
-../samples
\ No newline at end of file

From e8da3a1ca1adff59b687725d9f367e7b3bdd60cf Mon Sep 17 00:00:00 2001
From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com>
Date: Tue, 19 Dec 2023 22:10:15 +0000
Subject: [PATCH 10/27] build: update actions/upload-artifact and
 actions/download-artifact (#276)

Source-Link: https://togithub.com/googleapis/synthtool/commit/280ddaed417057dfe5b1395731de07b7d09f5058
Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:346ab2efb51649c5dde7756cbbdc60dd394852ba83b9bbffc292a63549f33c17
---
 .github/.OwlBot.lock.yaml                    |  4 +-
 .github/workflows/docs.yml                   |  8 ++--
 .github/workflows/lint.yml                   |  4 +-
 .github/workflows/unittest.yml               | 18 ++++----
 .kokoro/requirements.txt                     | 48 ++++++++++----------
 .kokoro/samples/python3.12/common.cfg        | 40 ++++++++++++++++
 .kokoro/samples/python3.12/continuous.cfg    |  6 +++
 .kokoro/samples/python3.12/periodic-head.cfg | 11 +++++
 .kokoro/samples/python3.12/periodic.cfg      |  6 +++
 .kokoro/samples/python3.12/presubmit.cfg     |  6 +++
 10 files changed, 110 insertions(+), 41 deletions(-)
 create mode 100644 .kokoro/samples/python3.12/common.cfg
 create mode 100644 .kokoro/samples/python3.12/continuous.cfg
 create mode 100644 .kokoro/samples/python3.12/periodic-head.cfg
 create mode 100644 .kokoro/samples/python3.12/periodic.cfg
 create mode 100644 .kokoro/samples/python3.12/presubmit.cfg

diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml
index 453b540c1e..9bee240971 100644
--- a/.github/.OwlBot.lock.yaml
+++ b/.github/.OwlBot.lock.yaml
@@ -13,5 +13,5 @@
 # limitations under the License.
 docker:
   image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest
-  digest: sha256:caffe0a9277daeccc4d1de5c9b55ebba0901b57c2f713ec9c876b0d4ec064f61
-# created: 2023-11-08T19:46:45.022803742Z
+  digest: sha256:346ab2efb51649c5dde7756cbbdc60dd394852ba83b9bbffc292a63549f33c17
+# created: 2023-12-14T22:17:57.611773021Z
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 221806cedf..698fbc5c94 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -8,9 +8,9 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - name: Checkout
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
     - name: Setup Python
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: "3.9"
     - name: Install nox
@@ -24,9 +24,9 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - name: Checkout
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
     - name: Setup Python
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: "3.10"
     - name: Install nox
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 16d5a9e90f..4866193af2 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -8,9 +8,9 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - name: Checkout
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
     - name: Setup Python
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: "3.8"
     - name: Install nox
diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
index 465199fc9a..f059b5548a 100644
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@@ -11,9 +11,9 @@ jobs:
         python: ['3.9', '3.10', '3.11']
     steps:
     - name: Checkout
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
     - name: Setup Python
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python }}
     - name: Install nox
@@ -26,9 +26,9 @@ jobs:
       run: |
         nox -s unit-${{ matrix.python }}
     - name: Upload coverage results
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       with:
-        name: coverage-artifacts
+        name: coverage-artifact-${{ matrix.python }}
         path: .coverage-${{ matrix.python }}
 
   cover:
@@ -37,9 +37,9 @@ jobs:
         - unit
     steps:
     - name: Checkout
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
     - name: Setup Python
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: "3.8"
     - name: Install coverage
@@ -47,11 +47,11 @@ jobs:
         python -m pip install --upgrade setuptools pip wheel
         python -m pip install coverage
     - name: Download coverage results
-      uses: actions/download-artifact@v3
+      uses: actions/download-artifact@v4
       with:
-        name: coverage-artifacts
         path: .coverage-results/
     - name: Report coverage results
       run: |
-        coverage combine .coverage-results/.coverage*
+        find .coverage-results -type f -name '*.zip' -exec unzip {} \;
+        coverage combine .coverage-results/**/.coverage*
         coverage report --show-missing --fail-under=35
diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt
index 8957e21104..e5c1ffca94 100644
--- a/.kokoro/requirements.txt
+++ b/.kokoro/requirements.txt
@@ -93,30 +93,30 @@ colorlog==6.7.0 \
     # via
     #   gcp-docuploader
     #   nox
-cryptography==41.0.5 \
-    --hash=sha256:0c327cac00f082013c7c9fb6c46b7cc9fa3c288ca702c74773968173bda421bf \
-    --hash=sha256:0d2a6a598847c46e3e321a7aef8af1436f11c27f1254933746304ff014664d84 \
-    --hash=sha256:227ec057cd32a41c6651701abc0328135e472ed450f47c2766f23267b792a88e \
-    --hash=sha256:22892cc830d8b2c89ea60148227631bb96a7da0c1b722f2aac8824b1b7c0b6b8 \
-    --hash=sha256:392cb88b597247177172e02da6b7a63deeff1937fa6fec3bbf902ebd75d97ec7 \
-    --hash=sha256:3be3ca726e1572517d2bef99a818378bbcf7d7799d5372a46c79c29eb8d166c1 \
-    --hash=sha256:573eb7128cbca75f9157dcde974781209463ce56b5804983e11a1c462f0f4e88 \
-    --hash=sha256:580afc7b7216deeb87a098ef0674d6ee34ab55993140838b14c9b83312b37b86 \
-    --hash=sha256:5a70187954ba7292c7876734183e810b728b4f3965fbe571421cb2434d279179 \
-    --hash=sha256:73801ac9736741f220e20435f84ecec75ed70eda90f781a148f1bad546963d81 \
-    --hash=sha256:7d208c21e47940369accfc9e85f0de7693d9a5d843c2509b3846b2db170dfd20 \
-    --hash=sha256:8254962e6ba1f4d2090c44daf50a547cd5f0bf446dc658a8e5f8156cae0d8548 \
-    --hash=sha256:88417bff20162f635f24f849ab182b092697922088b477a7abd6664ddd82291d \
-    --hash=sha256:a48e74dad1fb349f3dc1d449ed88e0017d792997a7ad2ec9587ed17405667e6d \
-    --hash=sha256:b948e09fe5fb18517d99994184854ebd50b57248736fd4c720ad540560174ec5 \
-    --hash=sha256:c707f7afd813478e2019ae32a7c49cd932dd60ab2d2a93e796f68236b7e1fbf1 \
-    --hash=sha256:d38e6031e113b7421db1de0c1b1f7739564a88f1684c6b89234fbf6c11b75147 \
-    --hash=sha256:d3977f0e276f6f5bf245c403156673db103283266601405376f075c849a0b936 \
-    --hash=sha256:da6a0ff8f1016ccc7477e6339e1d50ce5f59b88905585f77193ebd5068f1e797 \
-    --hash=sha256:e270c04f4d9b5671ebcc792b3ba5d4488bf7c42c3c241a3748e2599776f29696 \
-    --hash=sha256:e886098619d3815e0ad5790c973afeee2c0e6e04b4da90b88e6bd06e2a0b1b72 \
-    --hash=sha256:ec3b055ff8f1dce8e6ef28f626e0972981475173d7973d63f271b29c8a2897da \
-    --hash=sha256:fba1e91467c65fe64a82c689dc6cf58151158993b13eb7a7f3f4b7f395636723
+cryptography==41.0.6 \
+    --hash=sha256:068bc551698c234742c40049e46840843f3d98ad7ce265fd2bd4ec0d11306596 \
+    --hash=sha256:0f27acb55a4e77b9be8d550d762b0513ef3fc658cd3eb15110ebbcbd626db12c \
+    --hash=sha256:2132d5865eea673fe6712c2ed5fb4fa49dba10768bb4cc798345748380ee3660 \
+    --hash=sha256:3288acccef021e3c3c10d58933f44e8602cf04dba96d9796d70d537bb2f4bbc4 \
+    --hash=sha256:35f3f288e83c3f6f10752467c48919a7a94b7d88cc00b0668372a0d2ad4f8ead \
+    --hash=sha256:398ae1fc711b5eb78e977daa3cbf47cec20f2c08c5da129b7a296055fbb22aed \
+    --hash=sha256:422e3e31d63743855e43e5a6fcc8b4acab860f560f9321b0ee6269cc7ed70cc3 \
+    --hash=sha256:48783b7e2bef51224020efb61b42704207dde583d7e371ef8fc2a5fb6c0aabc7 \
+    --hash=sha256:4d03186af98b1c01a4eda396b137f29e4e3fb0173e30f885e27acec8823c1b09 \
+    --hash=sha256:5daeb18e7886a358064a68dbcaf441c036cbdb7da52ae744e7b9207b04d3908c \
+    --hash=sha256:60e746b11b937911dc70d164060d28d273e31853bb359e2b2033c9e93e6f3c43 \
+    --hash=sha256:742ae5e9a2310e9dade7932f9576606836ed174da3c7d26bc3d3ab4bd49b9f65 \
+    --hash=sha256:7e00fb556bda398b99b0da289ce7053639d33b572847181d6483ad89835115f6 \
+    --hash=sha256:85abd057699b98fce40b41737afb234fef05c67e116f6f3650782c10862c43da \
+    --hash=sha256:8efb2af8d4ba9dbc9c9dd8f04d19a7abb5b49eab1f3694e7b5a16a5fc2856f5c \
+    --hash=sha256:ae236bb8760c1e55b7a39b6d4d32d2279bc6c7c8500b7d5a13b6fb9fc97be35b \
+    --hash=sha256:afda76d84b053923c27ede5edc1ed7d53e3c9f475ebaf63c68e69f1403c405a8 \
+    --hash=sha256:b27a7fd4229abef715e064269d98a7e2909ebf92eb6912a9603c7e14c181928c \
+    --hash=sha256:b648fe2a45e426aaee684ddca2632f62ec4613ef362f4d681a9a6283d10e079d \
+    --hash=sha256:c5a550dc7a3b50b116323e3d376241829fd326ac47bc195e04eb33a8170902a9 \
+    --hash=sha256:da46e2b5df770070412c46f87bac0849b8d685c5f2679771de277a422c7d0b86 \
+    --hash=sha256:f39812f70fc5c71a15aa3c97b2bbe213c3f2a460b79bd21c40d033bb34a9bf36 \
+    --hash=sha256:ff369dd19e8fe0528b02e8df9f2aeb2479f89b1270d90f96a63500afe9af5cae
     # via
     #   gcp-releasetool
     #   secretstorage
diff --git a/.kokoro/samples/python3.12/common.cfg b/.kokoro/samples/python3.12/common.cfg
new file mode 100644
index 0000000000..abf83e196d
--- /dev/null
+++ b/.kokoro/samples/python3.12/common.cfg
@@ -0,0 +1,40 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Build logs will be here
+action {
+  define_artifacts {
+    regex: "**/*sponge_log.xml"
+  }
+}
+
+# Specify which tests to run
+env_vars: {
+    key: "RUN_TESTS_SESSION"
+    value: "py-3.12"
+}
+
+# Declare build specific Cloud project.
+env_vars: {
+    key: "BUILD_SPECIFIC_GCLOUD_PROJECT"
+    value: "python-docs-samples-tests-312"
+}
+
+env_vars: {
+    key: "TRAMPOLINE_BUILD_FILE"
+    value: "github/python-bigquery-dataframes/.kokoro/test-samples.sh"
+}
+
+# Configure the docker image for kokoro-trampoline.
+env_vars: {
+    key: "TRAMPOLINE_IMAGE"
+    value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker"
+}
+
+# Download secrets for samples
+gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples"
+
+# Download trampoline resources.
+gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline"
+
+# Use the trampoline script to run in docker.
+build_file: "python-bigquery-dataframes/.kokoro/trampoline_v2.sh"
\ No newline at end of file
diff --git a/.kokoro/samples/python3.12/continuous.cfg b/.kokoro/samples/python3.12/continuous.cfg
new file mode 100644
index 0000000000..a1c8d9759c
--- /dev/null
+++ b/.kokoro/samples/python3.12/continuous.cfg
@@ -0,0 +1,6 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+    key: "INSTALL_LIBRARY_FROM_SOURCE"
+    value: "True"
+}
\ No newline at end of file
diff --git a/.kokoro/samples/python3.12/periodic-head.cfg b/.kokoro/samples/python3.12/periodic-head.cfg
new file mode 100644
index 0000000000..123a35fbd3
--- /dev/null
+++ b/.kokoro/samples/python3.12/periodic-head.cfg
@@ -0,0 +1,11 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+    key: "INSTALL_LIBRARY_FROM_SOURCE"
+    value: "True"
+}
+
+env_vars: {
+    key: "TRAMPOLINE_BUILD_FILE"
+    value: "github/python-bigquery-dataframes/.kokoro/test-samples-against-head.sh"
+}
diff --git a/.kokoro/samples/python3.12/periodic.cfg b/.kokoro/samples/python3.12/periodic.cfg
new file mode 100644
index 0000000000..71cd1e597e
--- /dev/null
+++ b/.kokoro/samples/python3.12/periodic.cfg
@@ -0,0 +1,6 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+    key: "INSTALL_LIBRARY_FROM_SOURCE"
+    value: "False"
+}
diff --git a/.kokoro/samples/python3.12/presubmit.cfg b/.kokoro/samples/python3.12/presubmit.cfg
new file mode 100644
index 0000000000..a1c8d9759c
--- /dev/null
+++ b/.kokoro/samples/python3.12/presubmit.cfg
@@ -0,0 +1,6 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+    key: "INSTALL_LIBRARY_FROM_SOURCE"
+    value: "True"
+}
\ No newline at end of file

From 9ec352a338f11d82aee9cd665ffb0e6e97cb391b Mon Sep 17 00:00:00 2001
From: Anthonios Partheniou <partheniou@google.com>
Date: Tue, 19 Dec 2023 18:04:15 -0500
Subject: [PATCH 11/27] fix: use setuptools.find_namespace_packages (#246)

Similar to googleapis/google-auth-library-python#1205 https://packaging.python.org/en/latest/guides/packaging-namespace-packages/#native-namespace-packages
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 9aaaaae04f..345d1ea752 100644
--- a/setup.py
+++ b/setup.py
@@ -82,7 +82,7 @@
 # benchmarks, etc.
 packages = [
     package
-    for package in setuptools.PEP420PackageFinder.find()
+    for package in setuptools.find_namespace_packages()
     if package.startswith("bigframes") or package.startswith("third_party")
 ]
 

From 95b673aeb1545744e4b1a353cf1f4d0202d8a1b2 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Wed, 20 Dec 2023 07:13:35 +0000
Subject: [PATCH 12/27] docs: code samples for `Series.{sum, mean, min, max}`,
 `astype` (#280)

* docs: code samples for `Series.{sum, mean, min, max}`, `astype`

* insert newlines in code samples to improve readability

---------

Co-authored-by: Huan Chen <142538604+Genesis929@users.noreply.github.com>
---
 bigframes/dtypes.py                           |   2 +-
 .../bigframes_vendored/pandas/core/generic.py |  38 +++-
 .../bigframes_vendored/pandas/core/series.py  | 192 ++++++++++++++++++
 3 files changed, 230 insertions(+), 2 deletions(-)

diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 6dfcc17f37..891c372a10 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -288,7 +288,7 @@ def bigframes_dtype_to_ibis_dtype(
                 f"""
                 Unexpected data type {bigframes_dtype}. The following
                         str dtypes are supppted: 'boolean','Float64','Int64', 'string',
-                        'tring[pyarrow]','timestamp[us, tz=UTC][pyarrow]',
+                        'string[pyarrow]','timestamp[us, tz=UTC][pyarrow]',
                         'timestamp[us][pyarrow]','date32[day][pyarrow]',
                         'time64[us][pyarrow]'. The following pandas.ExtensionDtype are
                         supported: pandas.BooleanDtype(), pandas.Float64Dtype(),
diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py
index 607243f844..ca5c6344ce 100644
--- a/third_party/bigframes_vendored/pandas/core/generic.py
+++ b/third_party/bigframes_vendored/pandas/core/generic.py
@@ -82,10 +82,46 @@ def astype(self, dtype):
         """
         Cast a pandas object to a specified dtype ``dtype``.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+        Create a DataFrame:
+
+            >>> d = {'col1': [1, 2], 'col2': [3, 4]}
+            >>> df = bpd.DataFrame(data=d)
+            >>> df.dtypes
+            col1    Int64
+            col2    Int64
+            dtype: object
+
+        Cast all columns to ``Float64``:
+
+            >>> df.astype('Float64').dtypes
+            col1    Float64
+            col2    Float64
+            dtype: object
+
+        Create a series of type ``Int64``:
+
+            >>> ser = bpd.Series([1, 2], dtype='Int64')
+            >>> ser
+            0    1
+            1    2
+            dtype: Int64
+
+        Convert to ``Float64`` type:
+
+            >>> ser.astype('Float64')
+            0    1.0
+            1    2.0
+            dtype: Float64
+
         Args:
             dtype (str or pandas.ExtensionDtype):
                 A dtype supported by BigQuery DataFrame include 'boolean','Float64','Int64',
-                'string', 'tring[pyarrow]','timestamp[us, tz=UTC][pyarrow]',
+                'string', 'string[pyarrow]','timestamp[us, tz=UTC][pyarrow]',
                 'timestamp[us][pyarrow]','date32[day][pyarrow]','time64[us][pyarrow]'
                 A pandas.ExtensionDtype include pandas.BooleanDtype(), pandas.Float64Dtype(),
                 pandas.Int64Dtype(), pandas.StringDtype(storage="pyarrow"),
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 8303df5ef4..d054684598 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -890,6 +890,95 @@ def groupby(
         used to group large amounts of data and compute operations on these
         groups.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+        You can group by a named index level.
+
+            >>> s = bpd.Series([380, 370., 24., 26.],
+            ...                index=["Falcon", "Falcon", "Parrot", "Parrot"],
+            ...                name="Max Speed")
+            >>> s.index.name="Animal"
+            >>> s
+            Animal
+            Falcon    380.0
+            Falcon    370.0
+            Parrot     24.0
+            Parrot     26.0
+            Name: Max Speed, dtype: Float64
+            >>> s.groupby("Animal").mean()
+            Animal
+            Falcon    375.0
+            Parrot     25.0
+            Name: Max Speed, dtype: Float64
+
+        You can also group by more than one index levels.
+
+            >>> import pandas as pd
+            >>> s = bpd.Series([380, 370., 24., 26.],
+            ...                index=pd.MultiIndex.from_tuples(
+            ...                    [("Falcon", "Clear"),
+            ...                     ("Falcon", "Cloudy"),
+            ...                     ("Parrot", "Clear"),
+            ...                     ("Parrot", "Clear")],
+            ...                    names=["Animal", "Sky"]),
+            ...                name="Max Speed")
+            >>> s
+            Animal    Sky
+            Falcon  Clear     380.0
+                    Cloudy    370.0
+            Parrot  Clear      24.0
+                    Clear      26.0
+            Name: Max Speed, dtype: Float64
+
+            >>> s.groupby("Animal").mean()
+            Animal
+            Falcon    375.0
+            Parrot     25.0
+            Name: Max Speed, dtype: Float64
+
+            >>> s.groupby("Sky").mean()
+            Sky
+            Clear     143.333333
+            Cloudy         370.0
+            Name: Max Speed, dtype: Float64
+
+            >>> s.groupby(["Animal", "Sky"]).mean()
+            Animal  Sky
+            Falcon  Clear     380.0
+                    Cloudy    370.0
+            Parrot  Clear      25.0
+            Name: Max Speed, dtype: Float64
+
+        You can also group by values in a Series provided the index matches with
+        the original series.
+
+            >>> df = bpd.DataFrame({'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot'],
+            ...                     'Max Speed': [380., 370., 24., 26.],
+            ...                     'Age': [10., 20., 4., 6.]})
+            >>> df
+            Animal  Max Speed   Age
+            0  Falcon      380.0  10.0
+            1  Falcon      370.0  20.0
+            2  Parrot       24.0   4.0
+            3  Parrot       26.0   6.0
+            <BLANKLINE>
+            [4 rows x 3 columns]
+
+            >>> df['Max Speed'].groupby(df['Animal']).mean()
+            Animal
+            Falcon    375.0
+            Parrot     25.0
+            Name: Max Speed, dtype: Float64
+
+            >>> df['Age'].groupby(df['Animal']).max()
+            Animal
+            Falcon    20.0
+            Parrot     6.0
+            Name: Age, dtype: Float64
+
         Args:
             by (mapping, function, label, pd.Grouper or list of such, default None):
                 Used to determine the groups for the groupby.
@@ -1661,6 +1750,31 @@ def max(
         If you want the index of the maximum, use ``idxmax``. This is the equivalent
         of the ``numpy.ndarray`` method ``argmax``.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+        Calculating the max of a Series:
+
+            >>> s = bpd.Series([1, 3])
+            >>> s
+            0    1
+            1    3
+            dtype: Int64
+            >>> s.max()
+            3
+
+        Calculating the max of a Series containing ``NA`` values:
+
+            >>> s = bpd.Series([1, 3, bpd.NA])
+            >>> s
+            0     1.0
+            1     3.0
+            2    <NA>
+            dtype: Float64
+            >>> s.max()
+            3.0
 
         Returns:
             scalar: Scalar.
@@ -1676,6 +1790,32 @@ def min(
         If you want the index of the minimum, use ``idxmin``. This is the equivalent
         of the ``numpy.ndarray`` method ``argmin``.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+        Calculating the min of a Series:
+
+            >>> s = bpd.Series([1, 3])
+            >>> s
+            0    1
+            1    3
+            dtype: Int64
+            >>> s.min()
+            1
+
+        Calculating the min of a Series containing ``NA`` values:
+
+            >>> s = bpd.Series([1, 3, bpd.NA])
+            >>> s
+            0     1.0
+            1     3.0
+            2    <NA>
+            dtype: Float64
+            >>> s.min()
+            1.0
+
         Returns:
             scalar: Scalar.
         """
@@ -1714,6 +1854,32 @@ def sum(self):
 
         This is equivalent to the method ``numpy.sum``.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+        Calculating the sum of a Series:
+
+            >>> s = bpd.Series([1, 3])
+            >>> s
+            0    1
+            1    3
+            dtype: Int64
+            >>> s.sum()
+            4
+
+        Calculating the sum of a Series containing ``NA`` values:
+
+            >>> s = bpd.Series([1, 3, bpd.NA])
+            >>> s
+            0     1.0
+            1     3.0
+            2    <NA>
+            dtype: Float64
+            >>> s.sum()
+            4.0
+
         Returns:
             scalar: Scalar.
         """
@@ -1722,6 +1888,32 @@ def sum(self):
     def mean(self):
         """Return the mean of the values over the requested axis.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+        Calculating the mean of a Series:
+
+            >>> s = bpd.Series([1, 3])
+            >>> s
+            0    1
+            1    3
+            dtype: Int64
+            >>> s.mean()
+            2.0
+
+        Calculating the mean of a Series containing ``NA`` values:
+
+            >>> s = bpd.Series([1, 3, bpd.NA])
+            >>> s
+            0     1.0
+            1     3.0
+            2    <NA>
+            dtype: Float64
+            >>> s.mean()
+            2.0
+
         Returns:
             scalar: Scalar.
         """

From ad6746569b3af11be9d40805a1449ee1e89288dc Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Thu, 21 Dec 2023 00:33:37 +0000
Subject: [PATCH 13/27] fix: make `Series.str.replace` work for simple strings
 (#285)

---
 bigframes/operations/__init__.py               |  2 +-
 tests/system/small/operations/test_strings.py  |  2 ++
 .../bigframes_vendored/pandas/core/series.py   | 18 ++++++++++++++++++
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py
index 753870a42d..678774978a 100644
--- a/bigframes/operations/__init__.py
+++ b/bigframes/operations/__init__.py
@@ -385,7 +385,7 @@ def _as_ibis(self, x: ibis_types.Value):
             ibis_types.StringValue, ibis_types.literal(self._pat)
         )
         repl_str_value = typing.cast(
-            ibis_types.StringValue, ibis_types.literal(self._pat)
+            ibis_types.StringValue, ibis_types.literal(self._repl)
         )
 
         return typing.cast(ibis_types.StringValue, x).replace(
diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py
index 27a35134d4..79f92c94b4 100644
--- a/tests/system/small/operations/test_strings.py
+++ b/tests/system/small/operations/test_strings.py
@@ -94,6 +94,8 @@ def test_str_extract(scalars_dfs, pat):
         (".*", "blah", True, 0, True),
         ("h.l", "blah", False, 0, True),
         (re.compile("(?i).e.."), "blah", None, 0, True),
+        ("H", "h", True, 0, False),
+        (", ", "__", True, 0, False),
     ],
 )
 def test_str_replace(scalars_dfs, pat, repl, case, flags, regex):
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index d054684598..366f32c77e 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -2304,6 +2304,24 @@ def str(self):
         NAs stay NA unless handled otherwise by a particular method. Patterned
         after Python’s string methods, with some inspiration from R’s stringr package.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series(["A_Str_Series"])
+            >>> s
+            0    A_Str_Series
+            dtype: string
+
+            >>> s.str.lower()
+            0    a_str_series
+            dtype: string
+
+            >>> s.str.replace("_", "")
+            0    AStrSeries
+            dtype: string
+
         Returns:
             bigframes.operations.strings.StringMethods:
                 An accessor containing string methods.

From a1c06319ab0e3697c3175112490488002bb344c0 Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Wed, 20 Dec 2023 18:35:18 -0800
Subject: [PATCH 14/27] feat: specific pyarrow mappings for decimal, bytes
 types (#283)

* feat: new bytes, json, decimal type mappings

* amend tests to reflect new types

* add implicit type conversion for df.replace

* more type casting tests

* skip pandas 1.x for more tests

---------

Co-authored-by: Tim Swast <swast@google.com>
---
 bigframes/core/block_transforms.py          |   2 +-
 bigframes/core/blocks.py                    |   2 +-
 bigframes/core/compile/compiled.py          |  14 +-
 bigframes/core/groupby/__init__.py          |   7 +-
 bigframes/dataframe.py                      |  20 +--
 bigframes/dtypes.py                         | 139 ++++++++++++++------
 bigframes/series.py                         |  24 +++-
 tests/system/large/ml/test_compose.py       |   4 +-
 tests/system/large/ml/test_core.py          |   1 +
 tests/system/small/ml/test_core.py          |   3 +-
 tests/system/small/ml/test_imported.py      |   2 +
 tests/system/small/ml/test_llm.py           |  10 +-
 tests/system/small/ml/test_preprocessing.py |  16 +++
 tests/system/small/ml/test_remote.py        |   1 +
 tests/system/small/test_dataframe.py        |  37 +++---
 tests/system/small/test_dataframe_io.py     |  18 ++-
 tests/system/small/test_multiindex.py       |   7 +-
 tests/system/small/test_series.py           |  15 ++-
 tests/system/small/test_session.py          |   3 +
 tests/system/utils.py                       |  36 ++++-
 tests/unit/test_dtypes.py                   |   7 +-
 21 files changed, 267 insertions(+), 101 deletions(-)

diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py
index 6654892287..c6867c1a33 100644
--- a/bigframes/core/block_transforms.py
+++ b/bigframes/core/block_transforms.py
@@ -131,7 +131,7 @@ def interpolate(block: blocks.Block, method: str = "linear") -> blocks.Block:
         if len(index_columns) != 1:
             raise ValueError("only method 'linear' supports multi-index")
         xvalues = block.index_columns[0]
-        if block.index_dtypes[0] not in dtypes.NUMERIC_BIGFRAMES_TYPES:
+        if block.index_dtypes[0] not in dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE:
             raise ValueError("Can only interpolate on numeric index.")
 
     for column in original_columns:
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 3163aa5b09..779d11b371 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -1063,7 +1063,7 @@ def _standard_stats(self, column_id) -> typing.Sequence[agg_ops.AggregateOp]:
         stats: list[agg_ops.AggregateOp] = [agg_ops.count_op]
         if dtype not in bigframes.dtypes.UNORDERED_DTYPES:
             stats += [agg_ops.min_op, agg_ops.max_op]
-        if dtype in bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES:
+        if dtype in bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE:
             # Notable exclusions:
             # prod op tends to cause overflows
             # Also, var_op is redundant as can be derived from std
diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py
index d6183228d1..199c8db785 100644
--- a/bigframes/core/compile/compiled.py
+++ b/bigframes/core/compile/compiled.py
@@ -220,7 +220,10 @@ def _get_ibis_column(self, key: str) -> ibis_types.Value:
             raise ValueError(
                 "Column name {} not in set of values: {}".format(key, self.column_ids)
             )
-        return typing.cast(ibis_types.Value, self._column_names[key])
+        return typing.cast(
+            ibis_types.Value,
+            bigframes.dtypes.ibis_value_to_canonical_type(self._column_names[key]),
+        )
 
     def get_column_type(self, key: str) -> bigframes.dtypes.Dtype:
         ibis_type = typing.cast(
@@ -1177,7 +1180,14 @@ def _to_ibis_expr(
         # Make sure all dtypes are the "canonical" ones for BigFrames. This is
         # important for operations like UNION where the schema must match.
         table = self._table.select(
-            bigframes.dtypes.ibis_value_to_canonical_type(column) for column in columns
+            bigframes.dtypes.ibis_value_to_canonical_type(
+                column.resolve(self._table)
+                # TODO(https://github.com/ibis-project/ibis/issues/7613): use
+                # public API to refer to Deferred type.
+                if isinstance(column, ibis.common.deferred.Deferred)
+                else column
+            )
+            for column in columns
         )
         base_table = table
         if self._reduced_predicate is not None:
diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py
index 3ee46ef675..66ba901649 100644
--- a/bigframes/core/groupby/__init__.py
+++ b/bigframes/core/groupby/__init__.py
@@ -359,7 +359,8 @@ def _convert_index(self, dataframe: df.DataFrame):
 
     def _raise_on_non_numeric(self, op: str):
         if not all(
-            dtype in dtypes.NUMERIC_BIGFRAMES_TYPES for dtype in self._block.dtypes
+            dtype in dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE
+            for dtype in self._block.dtypes
         ):
             raise NotImplementedError(
                 f"'{op}' does not support non-numeric columns. "
@@ -371,7 +372,9 @@ def _raise_on_non_numeric(self, op: str):
     def _aggregated_columns(self, numeric_only: bool = False) -> typing.Sequence[str]:
         valid_agg_cols: list[str] = []
         for col_id in self._selected_cols:
-            is_numeric = self._column_type(col_id) in dtypes.NUMERIC_BIGFRAMES_TYPES
+            is_numeric = (
+                self._column_type(col_id) in dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE
+            )
             if is_numeric or not numeric_only:
                 valid_agg_cols.append(col_id)
         return valid_agg_cols
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 98aa8f1185..423c2bcaac 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -1800,7 +1800,7 @@ def agg(
     ) -> DataFrame | bigframes.series.Series:
         if utils.is_list_like(func):
             if any(
-                dtype not in bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES
+                dtype not in bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE
                 for dtype in self.dtypes
             ):
                 raise NotImplementedError(
@@ -1867,7 +1867,7 @@ def melt(
         )
 
     def describe(self) -> DataFrame:
-        df_numeric = self._drop_non_numeric(keep_bool=False)
+        df_numeric = self._drop_non_numeric(permissive=False)
         if len(df_numeric.columns) == 0:
             raise NotImplementedError(
                 f"df.describe() currently only supports numeric values. {constants.FEEDBACK_LINK}"
@@ -2005,10 +2005,12 @@ def unstack(self, level: LevelsType = -1):
         )
         return DataFrame(pivot_block)
 
-    def _drop_non_numeric(self, keep_bool=True) -> DataFrame:
-        types_to_keep = set(bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES)
-        if not keep_bool:
-            types_to_keep -= set(bigframes.dtypes.BOOL_BIGFRAMES_TYPES)
+    def _drop_non_numeric(self, permissive=True) -> DataFrame:
+        types_to_keep = (
+            set(bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE)
+            if permissive
+            else set(bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES_RESTRICTIVE)
+        )
         non_numeric_cols = [
             col_id
             for col_id, dtype in zip(self._block.value_columns, self._block.dtypes)
@@ -2026,7 +2028,7 @@ def _drop_non_bool(self) -> DataFrame:
 
     def _raise_on_non_numeric(self, op: str):
         if not all(
-            dtype in bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES
+            dtype in bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE
             for dtype in self._block.dtypes
         ):
             raise NotImplementedError(
@@ -2301,7 +2303,7 @@ def notna(self) -> DataFrame:
 
     def cumsum(self):
         is_numeric_types = [
-            (dtype in bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES)
+            (dtype in bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE)
             for _, dtype in self.dtypes.items()
         ]
         if not all(is_numeric_types):
@@ -2313,7 +2315,7 @@ def cumsum(self):
 
     def cumprod(self) -> DataFrame:
         is_numeric_types = [
-            (dtype in bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES)
+            (dtype in bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE)
             for _, dtype in self.dtypes.items()
         ]
         if not all(is_numeric_types):
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 891c372a10..b754acea2e 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -15,6 +15,7 @@
 """Mappings for Pandas dtypes supported by BigQuery DataFrames package"""
 
 import datetime
+import decimal
 import textwrap
 import typing
 from typing import Any, Dict, Iterable, Literal, Tuple, Union
@@ -30,6 +31,7 @@
 
 import bigframes.constants as constants
 import third_party.bigframes_vendored.google_cloud_bigquery._pandas_helpers as gcb3p_pandas_helpers
+import third_party.bigframes_vendored.ibis.expr.operations as vendored_ibis_ops
 
 # Type hints for Pandas dtypes supported by BigQuery DataFrame
 Dtype = Union[
@@ -40,9 +42,6 @@
     pd.ArrowDtype,
 ]
 
-# Corresponds to the pandas concept of numeric type (such as when 'numeric_only' is specified in an operation)
-NUMERIC_BIGFRAMES_TYPES = [pd.BooleanDtype(), pd.Float64Dtype(), pd.Int64Dtype()]
-
 # On BQ side, ARRAY, STRUCT, GEOGRAPHY, JSON are not orderable
 UNORDERED_DTYPES = [gpd.array.GeometryDtype()]
 
@@ -57,6 +56,9 @@
     "timestamp[us][pyarrow]",
     "date32[day][pyarrow]",
     "time64[us][pyarrow]",
+    "decimal128(38, 9)[pyarrow]",
+    "decimal256(38, 9)[pyarrow]",
+    "binary[pyarrow]",
 ]
 
 # Type hints for Ibis data types supported by BigQuery DataFrame
@@ -72,8 +74,17 @@
 
 BOOL_BIGFRAMES_TYPES = [pd.BooleanDtype()]
 
-# Several operations are restricted to these types.
-NUMERIC_BIGFRAMES_TYPES = [pd.BooleanDtype(), pd.Float64Dtype(), pd.Int64Dtype()]
+# Corresponds to the pandas concept of numeric type (such as when 'numeric_only' is specified in an operation)
+# Pandas is inconsistent, so two definitions are provided, each used in different contexts
+NUMERIC_BIGFRAMES_TYPES_RESTRICTIVE = [
+    pd.Float64Dtype(),
+    pd.Int64Dtype(),
+]
+NUMERIC_BIGFRAMES_TYPES_PERMISSIVE = NUMERIC_BIGFRAMES_TYPES_RESTRICTIVE + [
+    pd.BooleanDtype(),
+    pd.ArrowDtype(pa.decimal128(38, 9)),
+    pd.ArrowDtype(pa.decimal256(76, 38)),
+]
 
 # Type hints for Ibis data types that can be read to Python objects by BigQuery DataFrame
 ReadOnlyIbisDtype = Union[
@@ -97,6 +108,15 @@
         ibis_dtypes.Timestamp(timezone="UTC"),
         pd.ArrowDtype(pa.timestamp("us", tz="UTC")),
     ),
+    (ibis_dtypes.binary, pd.ArrowDtype(pa.binary())),
+    (
+        ibis_dtypes.Decimal(precision=38, scale=9, nullable=True),
+        pd.ArrowDtype(pa.decimal128(38, 9)),
+    ),
+    (
+        ibis_dtypes.Decimal(precision=76, scale=38, nullable=True),
+        pd.ArrowDtype(pa.decimal256(76, 38)),
+    ),
 )
 
 BIGFRAMES_TO_IBIS: Dict[Dtype, ibis_dtypes.DataType] = {
@@ -112,6 +132,9 @@
     ibis_dtypes.time: pa.time64("us"),
     ibis_dtypes.Timestamp(timezone=None): pa.timestamp("us"),
     ibis_dtypes.Timestamp(timezone="UTC"): pa.timestamp("us", tz="UTC"),
+    ibis_dtypes.binary: pa.binary(),
+    ibis_dtypes.Decimal(precision=38, scale=9, nullable=True): pa.decimal128(38, 9),
+    ibis_dtypes.Decimal(precision=76, scale=38, nullable=True): pa.decimal256(76, 38),
 }
 
 ARROW_TO_IBIS = {arrow: ibis for ibis, arrow in IBIS_TO_ARROW.items()}
@@ -125,10 +148,6 @@
 )
 IBIS_TO_BIGFRAMES.update(
     {
-        ibis_dtypes.binary: np.dtype("O"),
-        ibis_dtypes.json: np.dtype("O"),
-        ibis_dtypes.Decimal(precision=38, scale=9, nullable=True): np.dtype("O"),
-        ibis_dtypes.Decimal(precision=76, scale=38, nullable=True): np.dtype("O"),
         ibis_dtypes.GeoSpatial(
             geotype="geography", srid=4326, nullable=True
         ): gpd.array.GeometryDtype(),
@@ -178,7 +197,7 @@ def ibis_dtype_to_bigframes_dtype(
     # our IO returns them as objects. Eventually, we should support them as
     # ArrowDType (and update the IO accordingly)
     if isinstance(ibis_dtype, ibis_dtypes.Array):
-        return np.dtype("O")
+        return pd.ArrowDtype(ibis_dtype_to_arrow_dtype(ibis_dtype))
 
     if isinstance(ibis_dtype, ibis_dtypes.Struct):
         return pd.ArrowDtype(ibis_dtype_to_arrow_dtype(ibis_dtype))
@@ -200,7 +219,9 @@ def ibis_dtype_to_bigframes_dtype(
 
 def ibis_dtype_to_arrow_dtype(ibis_dtype: ibis_dtypes.DataType) -> pa.DataType:
     if isinstance(ibis_dtype, ibis_dtypes.Array):
-        return pa.list_(ibis_dtype_to_arrow_dtype(ibis_dtype.value_type))
+        return pa.list_(
+            ibis_dtype_to_arrow_dtype(ibis_dtype.value_type.copy(nullable=True))
+        )
 
     if isinstance(ibis_dtype, ibis_dtypes.Struct):
         return pa.struct(
@@ -224,21 +245,13 @@ def ibis_value_to_canonical_type(value: ibis_types.Value) -> ibis_types.Value:
     This is useful in cases where multiple types correspond to the same BigFrames dtype.
     """
     ibis_type = value.type()
+    name = value.get_name()
+    if ibis_type.is_json():
+        value = vendored_ibis_ops.ToJsonString(value).to_expr()
+        return value.name(name)
     # Allow REQUIRED fields to be joined with NULLABLE fields.
     nullable_type = ibis_type.copy(nullable=True)
-    return value.cast(nullable_type).name(value.get_name())
-
-
-def ibis_table_to_canonical_types(table: ibis_types.Table) -> ibis_types.Table:
-    """Converts an Ibis table expression to canonical types.
-
-    This is useful in cases where multiple types correspond to the same BigFrames dtype.
-    """
-    casted_columns = []
-    for column_name in table.columns:
-        column = typing.cast(ibis_types.Value, table[column_name])
-        casted_columns.append(ibis_value_to_canonical_type(column))
-    return table.select(*casted_columns)
+    return value.cast(nullable_type).name(name)
 
 
 def arrow_dtype_to_ibis_dtype(arrow_dtype: pa.DataType) -> ibis_dtypes.DataType:
@@ -386,15 +399,35 @@ def cast_ibis_value(
             ibis_dtypes.bool,
             ibis_dtypes.float64,
             ibis_dtypes.string,
+            ibis_dtypes.Decimal(precision=38, scale=9),
+            ibis_dtypes.Decimal(precision=76, scale=38),
+        ),
+        ibis_dtypes.float64: (
+            ibis_dtypes.string,
+            ibis_dtypes.int64,
+            ibis_dtypes.Decimal(precision=38, scale=9),
+            ibis_dtypes.Decimal(precision=76, scale=38),
+        ),
+        ibis_dtypes.string: (
+            ibis_dtypes.int64,
+            ibis_dtypes.float64,
+            ibis_dtypes.Decimal(precision=38, scale=9),
+            ibis_dtypes.Decimal(precision=76, scale=38),
+            ibis_dtypes.binary,
         ),
-        ibis_dtypes.float64: (ibis_dtypes.string, ibis_dtypes.int64),
-        ibis_dtypes.string: (ibis_dtypes.int64, ibis_dtypes.float64),
         ibis_dtypes.date: (ibis_dtypes.string,),
-        ibis_dtypes.Decimal(precision=38, scale=9): (ibis_dtypes.float64,),
-        ibis_dtypes.Decimal(precision=76, scale=38): (ibis_dtypes.float64,),
+        ibis_dtypes.Decimal(precision=38, scale=9): (
+            ibis_dtypes.float64,
+            ibis_dtypes.Decimal(precision=76, scale=38),
+        ),
+        ibis_dtypes.Decimal(precision=76, scale=38): (
+            ibis_dtypes.float64,
+            ibis_dtypes.Decimal(precision=38, scale=9),
+        ),
         ibis_dtypes.time: (),
         ibis_dtypes.timestamp: (ibis_dtypes.Timestamp(timezone="UTC"),),
         ibis_dtypes.Timestamp(timezone="UTC"): (ibis_dtypes.timestamp,),
+        ibis_dtypes.binary: (ibis_dtypes.string,),
     }
 
     value = ibis_value_to_canonical_type(value)
@@ -458,30 +491,62 @@ def is_dtype(scalar: typing.Any, dtype: Dtype) -> bool:
     return False
 
 
+# string is binary
 def is_patype(scalar: typing.Any, pa_type: pa.DataType) -> bool:
     """Determine whether a scalar's type matches a given pyarrow type."""
     if pa_type == pa.time64("us"):
         return isinstance(scalar, datetime.time)
-    if pa_type == pa.timestamp("us"):
+    elif pa_type == pa.timestamp("us"):
         if isinstance(scalar, datetime.datetime):
             return not scalar.tzinfo
         if isinstance(scalar, pd.Timestamp):
             return not scalar.tzinfo
-    if pa_type == pa.timestamp("us", tz="UTC"):
+    elif pa_type == pa.timestamp("us", tz="UTC"):
         if isinstance(scalar, datetime.datetime):
             return scalar.tzinfo == datetime.timezone.utc
         if isinstance(scalar, pd.Timestamp):
             return scalar.tzinfo == datetime.timezone.utc
-    if pa_type == pa.date32():
+    elif pa_type == pa.date32():
         return isinstance(scalar, datetime.date)
+    elif pa_type == pa.binary():
+        return isinstance(scalar, bytes)
+    elif pa_type == pa.decimal128(38, 9):
+        # decimal.Decimal is a superset, but ibis performs out-of-bounds and loss-of-precision checks
+        return isinstance(scalar, decimal.Decimal)
+    elif pa_type == pa.decimal256(76, 38):
+        # decimal.Decimal is a superset, but ibis performs out-of-bounds and loss-of-precision checks
+        return isinstance(scalar, decimal.Decimal)
     return False
 
 
-def is_comparable(scalar: typing.Any, dtype: Dtype) -> bool:
-    """Whether scalar can be compare to items of dtype (though maybe requiring coercion)"""
+def is_compatible(scalar: typing.Any, dtype: Dtype) -> typing.Optional[Dtype]:
+    """Whether scalar can be compare to items of dtype (though maybe requiring coercion). Returns the datatype that must be used for the comparison"""
     if is_dtype(scalar, dtype):
-        return True
+        return dtype
     elif pd.api.types.is_numeric_dtype(dtype):
-        return pd.api.types.is_number(scalar)
-    else:
-        return False
+        # Implicit conversion currently only supported for numeric types
+        if pd.api.types.is_bool(scalar):
+            return lcd_type(pd.BooleanDtype(), dtype)
+        if pd.api.types.is_float(scalar):
+            return lcd_type(pd.Float64Dtype(), dtype)
+        if pd.api.types.is_integer(scalar):
+            return lcd_type(pd.Int64Dtype(), dtype)
+        if isinstance(scalar, decimal.Decimal):
+            # TODO: Check context to see if can use NUMERIC instead of BIGNUMERIC
+            return lcd_type(pd.ArrowDtype(pa.decimal128(76, 38)), dtype)
+    return None
+
+
+def lcd_type(dtype1: Dtype, dtype2: Dtype) -> typing.Optional[Dtype]:
+    # Implicit conversion currently only supported for numeric types
+    hierarchy: list[Dtype] = [
+        pd.BooleanDtype(),
+        pd.Int64Dtype(),
+        pd.Float64Dtype(),
+        pd.ArrowDtype(pa.decimal128(38, 9)),
+        pd.ArrowDtype(pa.decimal256(76, 38)),
+    ]
+    if (dtype1 not in hierarchy) or (dtype2 not in hierarchy):
+        return None
+    lcd_index = max(hierarchy.index(dtype1), hierarchy.index(dtype2))
+    return hierarchy[lcd_index]
diff --git a/bigframes/series.py b/bigframes/series.py
index 6837c1c7f8..eefd2b755d 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -16,6 +16,7 @@
 
 from __future__ import annotations
 
+import functools
 import itertools
 import numbers
 import textwrap
@@ -455,7 +456,7 @@ def replace(
         else:  # Scalar
             replace_list = [to_replace]
         replace_list = [
-            i for i in replace_list if bigframes.dtypes.is_comparable(i, self.dtype)
+            i for i in replace_list if bigframes.dtypes.is_compatible(i, self.dtype)
         ]
         return self._simple_replace(replace_list, value) if replace_list else self
 
@@ -472,11 +473,15 @@ def _regex_replace(self, to_replace: str, value: str):
         return Series(block.select_column(result_col))
 
     def _simple_replace(self, to_replace_list: typing.Sequence, value):
-        if not bigframes.dtypes.is_dtype(value, self.dtype):
+        result_type = bigframes.dtypes.is_compatible(value, self.dtype)
+        if not result_type:
             raise NotImplementedError(
                 f"Cannot replace {self.dtype} elements with incompatible item {value} as mixed-type columns not supported. {constants.FEEDBACK_LINK}"
             )
 
+        if result_type != self.dtype:
+            return self.astype(result_type)._simple_replace(to_replace_list, value)
+
         block, cond = self._block.apply_unary_op(
             self._value_column, ops.IsInOp(to_replace_list)
         )
@@ -490,15 +495,26 @@ def _simple_replace(self, to_replace_list: typing.Sequence, value):
 
     def _mapping_replace(self, mapping: dict[typing.Hashable, typing.Hashable]):
         tuples = []
+        lcd_types: list[typing.Optional[bigframes.dtypes.Dtype]] = []
         for key, value in mapping.items():
-            if not bigframes.dtypes.is_comparable(key, self.dtype):
+            lcd_type = bigframes.dtypes.is_compatible(key, self.dtype)
+            if not lcd_type:
                 continue
             if not bigframes.dtypes.is_dtype(value, self.dtype):
                 raise NotImplementedError(
                     f"Cannot replace {self.dtype} elements with incompatible item {value} as mixed-type columns not supported. {constants.FEEDBACK_LINK}"
                 )
             tuples.append((key, value))
+            lcd_types.append(lcd_type)
 
+        result_dtype = functools.reduce(
+            lambda t1, t2: bigframes.dtypes.lcd_type(t1, t2) if (t1 and t2) else None,
+            lcd_types,
+        )
+        if not result_dtype:
+            raise NotImplementedError(
+                f"Cannot replace {self.dtype} elements with incompatible mapping {mapping} as mixed-type columns not supported. {constants.FEEDBACK_LINK}"
+            )
         block, result = self._block.apply_unary_op(
             self._value_column, ops.MapOp(tuple(tuples))
         )
@@ -782,7 +798,7 @@ def _central_moment(self, n: int) -> float:
 
     def agg(self, func: str | typing.Sequence[str]) -> scalars.Scalar | Series:
         if _is_list_like(func):
-            if self.dtype not in bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES:
+            if self.dtype not in bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE:
                 raise NotImplementedError(
                     f"Multiple aggregations only supported on numeric series. {constants.FEEDBACK_LINK}"
                 )
diff --git a/tests/system/large/ml/test_compose.py b/tests/system/large/ml/test_compose.py
index 0c280e5d02..6ea4f72489 100644
--- a/tests/system/large/ml/test_compose.py
+++ b/tests/system/large/ml/test_compose.py
@@ -72,7 +72,7 @@ def test_columntransformer_standalone_fit_and_transform(
         expected.standard_scaled_flipper_length_mm.astype("Float64")
     )
 
-    pandas.testing.assert_frame_equal(result, expected, rtol=1e-3)
+    pandas.testing.assert_frame_equal(result, expected, rtol=1e-3, check_dtype=False)
 
 
 def test_columntransformer_standalone_fit_transform(new_penguins_df):
@@ -123,4 +123,4 @@ def test_columntransformer_standalone_fit_transform(new_penguins_df):
         expected.standard_scaled_flipper_length_mm.astype("Float64")
     )
 
-    pandas.testing.assert_frame_equal(result, expected, rtol=1e-3)
+    pandas.testing.assert_frame_equal(result, expected, rtol=1e-3, check_dtype=False)
diff --git a/tests/system/large/ml/test_core.py b/tests/system/large/ml/test_core.py
index 3b30d7eb1d..df387e6ee1 100644
--- a/tests/system/large/ml/test_core.py
+++ b/tests/system/large/ml/test_core.py
@@ -184,4 +184,5 @@ def test_bqml_standalone_transform(penguins_df_default_index, new_penguins_df):
         expected,
         check_exact=False,
         rtol=0.1,
+        check_dtype=False,
     )
diff --git a/tests/system/small/ml/test_core.py b/tests/system/small/ml/test_core.py
index eece5ef21d..f39815aec2 100644
--- a/tests/system/small/ml/test_core.py
+++ b/tests/system/small/ml/test_core.py
@@ -292,11 +292,12 @@ def test_model_predict_with_unnamed_index(
 def test_remote_model_predict(
     bqml_linear_remote_model: core.BqmlModel, new_penguins_df
 ):
-    predictions = bqml_linear_remote_model.predict(new_penguins_df).to_pandas()
     expected = pd.DataFrame(
         {"predicted_body_mass_g": [[3739.54], [3675.79], [3619.54]]},
         index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
+        dtype=pd.ArrowDtype(pa.list_(pa.float64())),
     )
+    predictions = bqml_linear_remote_model.predict(new_penguins_df).to_pandas()
     pd.testing.assert_frame_equal(
         predictions[["predicted_body_mass_g"]].sort_index(),
         expected,
diff --git a/tests/system/small/ml/test_imported.py b/tests/system/small/ml/test_imported.py
index 9008e85a0b..8ffd9924e9 100644
--- a/tests/system/small/ml/test_imported.py
+++ b/tests/system/small/ml/test_imported.py
@@ -51,6 +51,7 @@ def test_tensorflow_model_predict(imported_tensorflow_model, llm_text_df):
         result,
         expected,
         check_exact=False,
+        check_dtype=False,
         atol=0.1,
     )
 
@@ -90,6 +91,7 @@ def test_onnx_model_predict(imported_onnx_model, onnx_iris_df):
         result,
         expected,
         check_exact=False,
+        check_dtype=False,
         atol=0.1,
     )
 
diff --git a/tests/system/small/ml/test_llm.py b/tests/system/small/ml/test_llm.py
index 267a2ed9c1..fd1b803eea 100644
--- a/tests/system/small/ml/test_llm.py
+++ b/tests/system/small/ml/test_llm.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import pytest
 
 from bigframes.ml import llm
@@ -202,8 +201,7 @@ def test_embedding_generator_predict_success(
     assert "text_embedding" in df.columns
     series = df["text_embedding"]
     value = series[0]
-    assert isinstance(value, np.ndarray)
-    assert value.size == 768
+    assert len(value) == 768
 
 
 @pytest.mark.flaky(retries=2, delay=120)
@@ -215,8 +213,7 @@ def test_embedding_generator_multilingual_predict_success(
     assert "text_embedding" in df.columns
     series = df["text_embedding"]
     value = series[0]
-    assert isinstance(value, np.ndarray)
-    assert value.size == 768
+    assert len(value) == 768
 
 
 @pytest.mark.flaky(retries=2, delay=120)
@@ -228,5 +225,4 @@ def test_embedding_generator_predict_series_success(
     assert "text_embedding" in df.columns
     series = df["text_embedding"]
     value = series[0]
-    assert isinstance(value, np.ndarray)
-    assert value.size == 768
+    assert len(value) == 768
diff --git a/tests/system/small/ml/test_preprocessing.py b/tests/system/small/ml/test_preprocessing.py
index 45548acca3..c3bd7f3b87 100644
--- a/tests/system/small/ml/test_preprocessing.py
+++ b/tests/system/small/ml/test_preprocessing.py
@@ -15,6 +15,7 @@
 import math
 
 import pandas as pd
+import pyarrow as pa
 
 import bigframes.ml.preprocessing
 
@@ -453,6 +454,9 @@ def test_one_hot_encoder_default_params(new_penguins_df):
                 [{"index": 2, "value": 1.0}],
             ],
         },
+        dtype=pd.ArrowDtype(
+            pa.list_(pa.struct([("index", pa.int64()), ("value", pa.float64())]))
+        ),
         index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
     )
 
@@ -482,6 +486,9 @@ def test_one_hot_encoder_default_params_fit_transform(new_penguins_df):
                 [{"index": 2, "value": 1.0}],
             ],
         },
+        dtype=pd.ArrowDtype(
+            pa.list_(pa.struct([("index", pa.int64()), ("value", pa.float64())]))
+        ),
         index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
     )
 
@@ -507,6 +514,9 @@ def test_one_hot_encoder_series_default_params(new_penguins_df):
                 [{"index": 2, "value": 1.0}],
             ],
         },
+        dtype=pd.ArrowDtype(
+            pa.list_(pa.struct([("index", pa.int64()), ("value", pa.float64())]))
+        ),
         index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
     )
 
@@ -537,6 +547,9 @@ def test_one_hot_encoder_params(new_penguins_df):
                 [{"index": 0, "value": 1.0}],
             ],
         },
+        dtype=pd.ArrowDtype(
+            pa.list_(pa.struct([("index", pa.int64()), ("value", pa.float64())]))
+        ),
         index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
     )
 
@@ -567,6 +580,9 @@ def test_one_hot_encoder_different_data(penguins_df_default_index, new_penguins_
                 [{"index": 2, "value": 1.0}],
             ],
         },
+        dtype=pd.ArrowDtype(
+            pa.list_(pa.struct([("index", pa.int64()), ("value", pa.float64())]))
+        ),
         index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
     )
 
diff --git a/tests/system/small/ml/test_remote.py b/tests/system/small/ml/test_remote.py
index e8eb1c85e8..5036cdadfc 100644
--- a/tests/system/small/ml/test_remote.py
+++ b/tests/system/small/ml/test_remote.py
@@ -29,5 +29,6 @@ def test_remote_linear_vertex_model_predict(
         predictions[["predicted_body_mass_g"]].sort_index(),
         expected,
         check_exact=False,
+        check_dtype=False,
         rtol=0.1,
     )
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index ed78e73e5d..86b8cfbe66 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -19,7 +19,6 @@
 from typing import Tuple
 
 import geopandas as gpd  # type: ignore
-import numpy as np
 import pandas as pd
 import pandas.testing
 import pyarrow as pa  # type: ignore
@@ -29,7 +28,11 @@
 import bigframes._config.display_options as display_options
 import bigframes.dataframe as dataframe
 import bigframes.series as series
-from tests.system.utils import assert_pandas_df_equal, assert_series_equal
+from tests.system.utils import (
+    assert_pandas_df_equal,
+    assert_series_equal,
+    skip_legacy_pandas,
+)
 
 
 def test_df_construct_copy(scalars_dfs):
@@ -273,19 +276,19 @@ def test_df_info(scalars_dfs):
         "  #  Column         Non-Null Count    Dtype\n"
         "---  -------------  ----------------  ------------------------------\n"
         "  0  bool_col       8 non-null        boolean\n"
-        "  1  bytes_col      6 non-null        object\n"
+        "  1  bytes_col      6 non-null        binary[pyarrow]\n"
         "  2  date_col       7 non-null        date32[day][pyarrow]\n"
         "  3  datetime_col   6 non-null        timestamp[us][pyarrow]\n"
         "  4  geography_col  4 non-null        geometry\n"
         "  5  int64_col      8 non-null        Int64\n"
         "  6  int64_too      9 non-null        Int64\n"
-        "  7  numeric_col    6 non-null        object\n"
+        "  7  numeric_col    6 non-null        decimal128(38, 9)[pyarrow]\n"
         "  8  float64_col    7 non-null        Float64\n"
         "  9  rowindex_2     9 non-null        Int64\n"
         " 10  string_col     8 non-null        string\n"
         " 11  time_col       6 non-null        time64[us][pyarrow]\n"
         " 12  timestamp_col  6 non-null        timestamp[us, tz=UTC][pyarrow]\n"
-        "dtypes: Float64(1), Int64(3), boolean(1), date32[day][pyarrow](1), geometry(1), object(2), string(1), time64[us][pyarrow](1), timestamp[us, tz=UTC][pyarrow](1), timestamp[us][pyarrow](1)\n"
+        "dtypes: Float64(1), Int64(3), binary[pyarrow](1), boolean(1), date32[day][pyarrow](1), decimal128(38, 9)[pyarrow](1), geometry(1), string(1), time64[us][pyarrow](1), timestamp[us, tz=UTC][pyarrow](1), timestamp[us][pyarrow](1)\n"
         "memory usage: 945 bytes\n"
     )
 
@@ -362,6 +365,7 @@ def test_drop_bigframes_index_with_na(scalars_dfs):
     pd.testing.assert_frame_equal(pd_result, bf_result)
 
 
+@skip_legacy_pandas
 def test_drop_bigframes_multiindex(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     scalars_df = scalars_df.copy()
@@ -841,13 +845,11 @@ def test_df_fillna(scalars_dfs):
 
 def test_df_replace_scalar_scalar(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df.replace("Hello, World!", "Howdy, Planet!").to_pandas()
-    pd_result = scalars_pandas_df.replace("Hello, World!", "Howdy, Planet!")
+    bf_result = scalars_df.replace(555.555, 3).to_pandas()
+    pd_result = scalars_pandas_df.replace(555.555, 3)
 
-    pd.testing.assert_frame_equal(
-        pd_result,
-        bf_result,
-    )
+    # pandas has narrower result types as they are determined dynamically
+    pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False)
 
 
 def test_df_replace_regex_scalar(scalars_dfs):
@@ -863,12 +865,14 @@ def test_df_replace_regex_scalar(scalars_dfs):
 
 def test_df_replace_list_scalar(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df.replace(["Hello, World!", "T"], "Howdy, Planet!").to_pandas()
-    pd_result = scalars_pandas_df.replace(["Hello, World!", "T"], "Howdy, Planet!")
+    bf_result = scalars_df.replace([555.555, 3.2], 3).to_pandas()
+    pd_result = scalars_pandas_df.replace([555.555, 3.2], 3)
 
+    # pandas has narrower result types as they are determined dynamically
     pd.testing.assert_frame_equal(
         pd_result,
         bf_result,
+        check_dtype=False,
     )
 
 
@@ -1198,13 +1202,13 @@ def test_get_dtypes(scalars_df_default_index):
         pd.Series(
             {
                 "bool_col": pd.BooleanDtype(),
-                "bytes_col": np.dtype("O"),
+                "bytes_col": pd.ArrowDtype(pa.binary()),
                 "date_col": pd.ArrowDtype(pa.date32()),
                 "datetime_col": pd.ArrowDtype(pa.timestamp("us")),
                 "geography_col": gpd.array.GeometryDtype(),
                 "int64_col": pd.Int64Dtype(),
                 "int64_too": pd.Int64Dtype(),
-                "numeric_col": np.dtype("O"),
+                "numeric_col": pd.ArrowDtype(pa.decimal128(38, 9)),
                 "float64_col": pd.Float64Dtype(),
                 "rowindex": pd.Int64Dtype(),
                 "rowindex_2": pd.Int64Dtype(),
@@ -1232,7 +1236,7 @@ def test_get_dtypes_array_struct(session):
         dtypes,
         pd.Series(
             {
-                "array_column": np.dtype("O"),
+                "array_column": pd.ArrowDtype(pa.list_(pa.int64())),
                 "struct_column": pd.ArrowDtype(
                     pa.struct(
                         [
@@ -2138,6 +2142,7 @@ def test_dataframe_agg_multi_string(scalars_dfs):
     ).all()
 
 
+@skip_legacy_pandas
 def test_df_describe(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     # pyarrows time columns fail in pandas
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index fb9fb7bb89..59864e483e 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -56,7 +56,9 @@ def test_to_pandas_array_struct_correct_result(session):
     result = df.to_pandas()
     expected = pd.DataFrame(
         {
-            "array_column": [[1, 3, 2]],
+            "array_column": pd.Series(
+                [[1, 3, 2]], dtype=pd.ArrowDtype(pa.list_(pa.int64()))
+            ),
             "struct_column": pd.Series(
                 [{"string_field": "a", "float_field": 1.2}],
                 dtype=pd.ArrowDtype(
@@ -91,7 +93,8 @@ def test_load_json(session):
     expected = pd.DataFrame(
         {
             "json_column": ['{"bar":true,"foo":10}'],
-        }
+        },
+        dtype=pd.StringDtype(storage="pyarrow"),
     )
     expected.index = expected.index.astype("Int64")
     pd.testing.assert_series_equal(result.dtypes, expected.dtypes)
@@ -137,6 +140,8 @@ def test_to_csv_index(
     dtype = scalars_df.reset_index().dtypes.to_dict()
     dtype.pop("geography_col")
     dtype.pop("rowindex")
+    # read_csv will decode into bytes inproperly, convert_pandas_dtypes will encode properly from string
+    dtype.pop("bytes_col")
     gcs_df = pd.read_csv(
         path,
         dtype=dtype,
@@ -148,7 +153,6 @@ def test_to_csv_index(
 
     scalars_pandas_df = scalars_pandas_df.copy()
     scalars_pandas_df.index = scalars_pandas_df.index.astype("int64")
-
     # Ordering should be maintained for tables smaller than 1 GB.
     pd.testing.assert_frame_equal(gcs_df, scalars_pandas_df)
 
@@ -174,6 +178,8 @@ def test_to_csv_tabs(
     dtype = scalars_df.reset_index().dtypes.to_dict()
     dtype.pop("geography_col")
     dtype.pop("rowindex")
+    # read_csv will decode into bytes inproperly, convert_pandas_dtypes will encode properly from string
+    dtype.pop("bytes_col")
     gcs_df = pd.read_csv(
         path,
         sep="\t",
@@ -216,6 +222,8 @@ def test_to_gbq_index(scalars_dfs, dataset_id, index):
         df_out = df_out.sort_values("rowindex_2").reset_index(drop=True)
 
     convert_pandas_dtypes(df_out, bytes_col=False)
+    # pd.read_gbq interpets bytes_col as object, reconvert to pyarrow binary
+    df_out["bytes_col"] = df_out["bytes_col"].astype(pd.ArrowDtype(pa.binary()))
     expected = scalars_pandas_df.copy()
     expected.index.name = index_col
     pd.testing.assert_frame_equal(df_out, expected, check_index_type=False)
@@ -377,7 +385,9 @@ def test_to_parquet_index(scalars_dfs, gcs_folder, index):
     scalars_pandas_df.index = scalars_pandas_df.index.astype("Int64")
 
     # Ordering should be maintained for tables smaller than 1 GB.
-    pd.testing.assert_frame_equal(gcs_df, scalars_pandas_df)
+    pd.testing.assert_frame_equal(
+        gcs_df.drop("bytes_col", axis=1), scalars_pandas_df.drop("bytes_col", axis=1)
+    )
 
 
 def test_to_sql_query_unnamed_index_included(
diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py
index 1708735f4c..2d4e1f0204 100644
--- a/tests/system/small/test_multiindex.py
+++ b/tests/system/small/test_multiindex.py
@@ -394,14 +394,17 @@ def test_multi_index_dataframe_groupby_level_aggregate(
 def test_multi_index_dataframe_groupby_level_analytic(
     scalars_df_index, scalars_pandas_df_index, level, as_index
 ):
+    # Drop "numeric_col" as pandas doesn't support numerics for grouped window function
     bf_result = (
-        scalars_df_index.set_index(["int64_too", "bool_col"])
+        scalars_df_index.drop("numeric_col", axis=1)
+        .set_index(["int64_too", "bool_col"])
         .groupby(level=level, as_index=as_index, dropna=False)
         .cumsum(numeric_only=True)
         .to_pandas()
     )
     pd_result = (
-        scalars_pandas_df_index.set_index(["int64_too", "bool_col"])
+        scalars_pandas_df_index.drop("numeric_col", axis=1)
+        .set_index(["int64_too", "bool_col"])
         .groupby(level=level, as_index=as_index, dropna=False)
         .cumsum(numeric_only=True)
     )
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 623da74aa4..6f919f740f 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -24,7 +24,11 @@
 
 import bigframes.pandas
 import bigframes.series as series
-from tests.system.utils import assert_pandas_df_equal, assert_series_equal
+from tests.system.utils import (
+    assert_pandas_df_equal,
+    assert_series_equal,
+    skip_legacy_pandas,
+)
 
 
 def test_series_construct_copy(scalars_dfs):
@@ -81,14 +85,14 @@ def test_series_construct_from_list_escaped_strings():
     [
         ("bool_col", pd.BooleanDtype()),
         # TODO(swast): Use a more efficient type.
-        ("bytes_col", numpy.dtype("object")),
+        ("bytes_col", pd.ArrowDtype(pa.binary())),
         ("date_col", pd.ArrowDtype(pa.date32())),
         ("datetime_col", pd.ArrowDtype(pa.timestamp("us"))),
         ("float64_col", pd.Float64Dtype()),
         ("geography_col", gpd.array.GeometryDtype()),
         ("int64_col", pd.Int64Dtype()),
         # TODO(swast): Use a more efficient type.
-        ("numeric_col", numpy.dtype("object")),
+        ("numeric_col", pd.ArrowDtype(pa.decimal128(38, 9))),
         ("int64_too", pd.Int64Dtype()),
         ("string_col", pd.StringDtype(storage="pyarrow")),
         ("time_col", pd.ArrowDtype(pa.time64("us"))),
@@ -2519,8 +2523,12 @@ def test_mask_custom_value(scalars_dfs):
         ("int64_col", pd.Float64Dtype()),
         ("int64_col", "string[pyarrow]"),
         ("int64_col", "boolean"),
+        ("int64_col", pd.ArrowDtype(pa.decimal128(38, 9))),
+        ("int64_col", pd.ArrowDtype(pa.decimal256(76, 38))),
         ("bool_col", "Int64"),
         ("bool_col", "string[pyarrow]"),
+        ("string_col", "binary[pyarrow]"),
+        ("bytes_col", "string[pyarrow]"),
         # pandas actually doesn't let folks convert to/from naive timestamp and
         # raises a deprecation warning to use tz_localize/tz_convert instead,
         # but BigQuery always stores values as UTC and doesn't have to deal
@@ -2538,6 +2546,7 @@ def test_mask_custom_value(scalars_dfs):
         # https://cloud.google.com/bigquery/docs/reference/standard-sql/conversion_functions
     ],
 )
+@skip_legacy_pandas
 def test_astype(scalars_df_index, scalars_pandas_df_index, column, to_type):
     bf_result = scalars_df_index[column].astype(to_type).to_pandas()
     pd_result = scalars_pandas_df_index[column].astype(to_type)
diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index e6eb40a5fa..8ce442376a 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -30,6 +30,7 @@
 import bigframes.dataframe
 import bigframes.dtypes
 import bigframes.ml.linear_model
+from tests.system.utils import skip_legacy_pandas
 
 FIRST_FILE = "000000000000"
 
@@ -385,6 +386,7 @@ def test_read_pandas_tokyo(
     pd.testing.assert_frame_equal(result, expected)
 
 
+@skip_legacy_pandas
 def test_read_csv_gcs_default_engine(session, scalars_dfs, gcs_folder):
     scalars_df, _ = scalars_dfs
     if scalars_df.index.name is not None:
@@ -441,6 +443,7 @@ def test_read_csv_gcs_bq_engine(session, scalars_dfs, gcs_folder):
         pytest.param("\t", id="custom_sep"),
     ],
 )
+@skip_legacy_pandas
 def test_read_csv_local_default_engine(session, scalars_dfs, sep):
     scalars_df, scalars_pandas_df = scalars_dfs
     with tempfile.TemporaryDirectory() as dir:
diff --git a/tests/system/utils.py b/tests/system/utils.py
index f49b5ece31..a4647b4f51 100644
--- a/tests/system/utils.py
+++ b/tests/system/utils.py
@@ -14,11 +14,23 @@
 
 import base64
 import decimal
+import functools
 
 import geopandas as gpd  # type: ignore
 import numpy as np
 import pandas as pd
 import pyarrow as pa  # type: ignore
+import pytest
+
+
+def skip_legacy_pandas(test):
+    @functools.wraps(test)
+    def wrapper(*args, **kwds):
+        if pd.__version__.startswith("1."):
+            pytest.skip("Skips pandas 1.x as not compatible with 2.x behavior.")
+        return test(*args, **kwds)
+
+    return wrapper
 
 
 def assert_pandas_df_equal(df0, df1, ignore_order: bool = False, **kwargs):
@@ -133,16 +145,28 @@ def convert_pandas_dtypes(df: pd.DataFrame, bytes_col: bool):
             df["geography_col"].replace({np.nan: None})
         )
 
-    # Convert bytes types column.
-    if bytes_col:
+    if bytes_col and not isinstance(df["bytes_col"].dtype, pd.ArrowDtype):
         df["bytes_col"] = df["bytes_col"].apply(
             lambda value: base64.b64decode(value) if not pd.isnull(value) else value
         )
+        arrow_table = pa.Table.from_pandas(
+            pd.DataFrame(df, columns=["bytes_col"]),
+            schema=pa.schema([("bytes_col", pa.binary())]),
+        )
+        df["bytes_col"] = arrow_table.to_pandas(types_mapper=pd.ArrowDtype)["bytes_col"]
 
-    # Convert numeric types column.
-    df["numeric_col"] = df["numeric_col"].apply(
-        lambda value: decimal.Decimal(str(value)) if value else None  # type: ignore
-    )
+    if not isinstance(df["numeric_col"].dtype, pd.ArrowDtype):
+        # Convert numeric types column.
+        df["numeric_col"] = df["numeric_col"].apply(
+            lambda value: decimal.Decimal(str(value)) if value else None  # type: ignore
+        )
+        arrow_table = pa.Table.from_pandas(
+            pd.DataFrame(df, columns=["numeric_col"]),
+            schema=pa.schema([("numeric_col", pa.decimal128(38, 9))]),
+        )
+        df["numeric_col"] = arrow_table.to_pandas(types_mapper=pd.ArrowDtype)[
+            "numeric_col"
+        ]
 
 
 def assert_pandas_df_equal_pca_components(actual, expected, **kwargs):
diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py
index 6ceaaf911b..e648fd28cc 100644
--- a/tests/unit/test_dtypes.py
+++ b/tests/unit/test_dtypes.py
@@ -31,11 +31,11 @@
         # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
         pytest.param(
             ibis_dtypes.Decimal(precision=76, scale=38, nullable=True),
-            np.dtype("O"),
+            pd.ArrowDtype(pa.decimal256(76, 38)),
             id="bignumeric",
         ),
         pytest.param(ibis_dtypes.boolean, pd.BooleanDtype(), id="bool"),
-        pytest.param(ibis_dtypes.binary, np.dtype("O"), id="bytes"),
+        pytest.param(ibis_dtypes.binary, pd.ArrowDtype(pa.binary()), id="bytes"),
         pytest.param(ibis_dtypes.date, pd.ArrowDtype(pa.date32()), id="date"),
         pytest.param(
             ibis_dtypes.Timestamp(), pd.ArrowDtype(pa.timestamp("us")), id="datetime"
@@ -49,10 +49,9 @@
         pytest.param(ibis_dtypes.int8, pd.Int64Dtype(), id="int8-as-int64"),
         pytest.param(ibis_dtypes.int64, pd.Int64Dtype(), id="int64"),
         # TODO(tswast): custom dtype (or at least string dtype) for JSON objects
-        pytest.param(ibis_dtypes.json, np.dtype("O"), id="json"),
         pytest.param(
             ibis_dtypes.Decimal(precision=38, scale=9, nullable=True),
-            np.dtype("O"),
+            pd.ArrowDtype(pa.decimal128(38, 9)),
             id="numeric",
         ),
         pytest.param(

From 9c5012ec68275db83d1f6f7e743f5edaaaacd8cb Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Thu, 21 Dec 2023 23:41:45 +0000
Subject: [PATCH 15/27] docs: code samples for `drop` and `fillna` (#284)

---
 .../bigframes_vendored/pandas/core/frame.py   | 141 ++++++++++++++++++
 .../bigframes_vendored/pandas/core/series.py  |  81 ++++++++++
 2 files changed, 222 insertions(+)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 00be9e5e9e..427e586c52 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -869,6 +869,97 @@ def drop(
 
         Remove columns by directly specifying column names.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame(np.arange(12).reshape(3, 4),
+            ...                    columns=['A', 'B', 'C', 'D'])
+            >>> df
+               A  B   C   D
+            0  0  1   2   3
+            1  4  5   6   7
+            2  8  9  10  11
+            <BLANKLINE>
+            [3 rows x 4 columns]
+
+        Drop columns:
+
+            >>> df.drop(['B', 'C'], axis=1)
+               A   D
+            0  0   3
+            1  4   7
+            2  8  11
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+            >>> df.drop(columns=['B', 'C'])
+               A   D
+            0  0   3
+            1  4   7
+            2  8  11
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+        Drop a row by index:
+
+            >>> df.drop([0, 1])
+               A  B   C   D
+            2  8  9  10  11
+            <BLANKLINE>
+            [1 rows x 4 columns]
+
+        Drop columns and/or rows of MultiIndex DataFrame:
+
+            >>> import pandas as pd
+            >>> midx = pd.MultiIndex(levels=[['llama', 'cow', 'falcon'],
+            ...                              ['speed', 'weight', 'length']],
+            ...                      codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
+            ...                             [0, 1, 2, 0, 1, 2, 0, 1, 2]])
+            >>> df = bpd.DataFrame(index=midx, columns=['big', 'small'],
+            ...                    data=[[45, 30], [200, 100], [1.5, 1], [30, 20],
+            ...                          [250, 150], [1.5, 0.8], [320, 250],
+            ...                          [1, 0.8], [0.3, 0.2]])
+            >>> df
+                             big  small
+            llama  speed    45.0   30.0
+                   weight  200.0  100.0
+                   length    1.5    1.0
+            cow    speed    30.0   20.0
+                   weight  250.0  150.0
+                   length    1.5    0.8
+            falcon speed   320.0  250.0
+                   weight    1.0    0.8
+                   length    0.3    0.2
+            <BLANKLINE>
+            [9 rows x 2 columns]
+
+        Drop a specific index and column combination from the MultiIndex
+        DataFrame, i.e., drop the index ``'cow'`` and column ``'small'``:
+
+            >>> df.drop(index='cow', columns='small')
+                             big
+            llama  speed    45.0
+                   weight  200.0
+                   length    1.5
+            falcon speed   320.0
+                   weight    1.0
+                   length    0.3
+            <BLANKLINE>
+            [6 rows x 1 columns]
+
+            >>> df.drop(index='length', level=1)
+                             big  small
+            llama  speed    45.0   30.0
+                   weight  200.0  100.0
+            cow    speed    30.0   20.0
+                   weight  250.0  150.0
+            falcon speed   320.0  250.0
+                   weight    1.0    0.8
+            <BLANKLINE>
+            [6 rows x 2 columns]
+
         Args:
             labels:
                 Index or column labels to drop.
@@ -4343,6 +4434,56 @@ def fillna(self, value):
         """
         Fill NA/NaN values using the specified method.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame([[np.nan, 2, np.nan, 0],
+            ...                     [3, 4, np.nan, 1],
+            ...                     [np.nan, np.nan, np.nan, np.nan],
+            ...                     [np.nan, 3, np.nan, 4]],
+            ...                    columns=list("ABCD")).astype("Float64")
+            >>> df
+                A     B     C     D
+            0  <NA>   2.0  <NA>   0.0
+            1   3.0   4.0  <NA>   1.0
+            2  <NA>  <NA>  <NA>  <NA>
+            3  <NA>   3.0  <NA>   4.0
+            <BLANKLINE>
+            [4 rows x 4 columns]
+
+        Replace all NA elements with 0s.
+
+            >>> df.fillna(0)
+                 A    B    C    D
+            0  0.0  2.0  0.0  0.0
+            1  3.0  4.0  0.0  1.0
+            2  0.0  0.0  0.0  0.0
+            3  0.0  3.0  0.0  4.0
+            <BLANKLINE>
+            [4 rows x 4 columns]
+
+        You can use fill values from another DataFrame:
+
+            >>> df_fill = bpd.DataFrame(np.arange(12).reshape(3, 4),
+            ...                         columns=['A', 'B', 'C', 'D'])
+            >>> df_fill
+               A  B   C   D
+            0  0  1   2   3
+            1  4  5   6   7
+            2  8  9  10  11
+            <BLANKLINE>
+            [3 rows x 4 columns]
+            >>> df.fillna(df_fill)
+                A    B     C     D
+            0   0.0  2.0   2.0   0.0
+            1   3.0  4.0   6.0   1.0
+            2   8.0  9.0  10.0  11.0
+            3  <NA>  3.0  <NA>   4.0
+            <BLANKLINE>
+            [4 rows x 4 columns]
+
         Args:
             value (scalar, Series):
                 Value to use to fill holes (e.g. 0), alternately a
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 366f32c77e..01cc3a0500 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -1062,6 +1062,55 @@ def drop(
         When using a multi-index, labels on different levels can be removed
         by specifying the level.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series(data=np.arange(3), index=['A', 'B', 'C'])
+            >>> s
+            A    0
+            B    1
+            C    2
+            dtype: Int64
+
+        Drop labels B and C:
+
+            >>> s.drop(labels=['B', 'C'])
+            A    0
+            dtype: Int64
+
+        Drop 2nd level label in MultiIndex Series:
+
+            >>> import pandas as pd
+            >>> midx = pd.MultiIndex(levels=[['llama', 'cow', 'falcon'],
+            ...                              ['speed', 'weight', 'length']],
+            ...                      codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
+            ...                             [0, 1, 2, 0, 1, 2, 0, 1, 2]])
+
+            >>> s = bpd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],
+            ...               index=midx)
+            >>> s
+            llama   speed      45.0
+                    weight    200.0
+                    length      1.2
+            cow     speed      30.0
+                    weight    250.0
+                    length      1.5
+            falcon  speed     320.0
+                    weight      1.0
+                    length      0.3
+            dtype: Float64
+
+            >>> s.drop(labels='weight', level=1)
+            llama   speed      45.0
+                    length      1.2
+            cow     speed      30.0
+                    length      1.5
+            falcon  speed     320.0
+                    length      0.3
+            dtype: Float64
+
         Args:
             labels (single label or list-like):
                 Index labels to drop.
@@ -1193,6 +1242,38 @@ def fillna(
         """
         Fill NA/NaN values using the specified method.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series([np.nan, 2, np.nan, -1])
+            >>> s
+            0    <NA>
+            1     2.0
+            2    <NA>
+            3    -1.0
+            dtype: Float64
+
+        Replace all NA elements with 0s.
+
+            >>> s.fillna(0)
+            0    0.0
+            1    2.0
+            2    0.0
+            3   -1.0
+            dtype: Float64
+
+        You can use fill values from another Series:
+
+            >>> s_fill = bpd.Series([11, 22, 33])
+            >>> s.fillna(s_fill)
+            0    11.0
+            1     2.0
+            2    33.0
+            3    -1.0
+            dtype: Float64
+
         Args:
             value (scalar, dict, Series, or DataFrame, default None):
                 Value to use to fill holes (e.g. 0).

From acc0eb7010951c8cfb91aecc45268b041217dd09 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Fri, 22 Dec 2023 02:45:36 +0000
Subject: [PATCH 16/27] docs: code samples for `reset_index` and `sort_values`
 (#282)

* docs: code samples for `reset_index` and `sort_values`

* fix alignment in dataframe api code samples
---
 .../bigframes_vendored/pandas/core/frame.py   | 161 ++++++++++++++++++
 .../bigframes_vendored/pandas/core/series.py  | 110 ++++++++++++
 2 files changed, 271 insertions(+)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 427e586c52..fb34193710 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -1138,6 +1138,93 @@ def reset_index(
 
         Reset the index of the DataFrame, and use the default one instead.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> import numpy as np
+            >>> df = bpd.DataFrame([('bird', 389.0),
+            ...                     ('bird', 24.0),
+            ...                     ('mammal', 80.5),
+            ...                     ('mammal', np.nan)],
+            ...                    index=['falcon', 'parrot', 'lion', 'monkey'],
+            ...                    columns=('class', 'max_speed'))
+            >>> df
+                     class  max_speed
+            falcon    bird      389.0
+            parrot    bird       24.0
+            lion    mammal       80.5
+            monkey  mammal       <NA>
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+        When we reset the index, the old index is added as a column, and a new sequential index is used:
+
+            >>> df.reset_index()
+                index   class  max_speed
+            0  falcon    bird      389.0
+            1  parrot    bird       24.0
+            2    lion  mammal       80.5
+            3  monkey  mammal       <NA>
+            <BLANKLINE>
+            [4 rows x 3 columns]
+
+        We can use the ``drop`` parameter to avoid the old index being added as a column:
+
+            >>> df.reset_index(drop=True)
+                class  max_speed
+            0    bird      389.0
+            1    bird       24.0
+            2  mammal       80.5
+            3  mammal       <NA>
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+        You can also use ``reset_index`` with ``MultiIndex``.
+
+            >>> import pandas as pd
+            >>> index = pd.MultiIndex.from_tuples([('bird', 'falcon'),
+            ...                                    ('bird', 'parrot'),
+            ...                                    ('mammal', 'lion'),
+            ...                                    ('mammal', 'monkey')],
+            ...                                   names=['class', 'name'])
+            >>> columns = ['speed', 'max']
+            >>> df = bpd.DataFrame([(389.0, 'fly'),
+            ...                     (24.0, 'fly'),
+            ...                     (80.5, 'run'),
+            ...                     (np.nan, 'jump')],
+            ...                    index=index,
+            ...                    columns=columns)
+            >>> df
+                           speed   max
+            class  name
+            bird   falcon  389.0   fly
+                   parrot   24.0   fly
+            mammal lion     80.5   run
+                   monkey   <NA>  jump
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+            >>> df.reset_index()
+                class    name  speed   max
+            0    bird  falcon  389.0   fly
+            1    bird  parrot   24.0   fly
+            2  mammal    lion   80.5   run
+            3  mammal  monkey   <NA>  jump
+            <BLANKLINE>
+            [4 rows x 4 columns]
+
+            >>> df.reset_index(drop=True)
+               speed   max
+            0  389.0   fly
+            1   24.0   fly
+            2   80.5   run
+            3   <NA>  jump
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+
         Args:
             drop (bool, default False):
                 Do not try to insert index into dataframe columns. This resets
@@ -1347,6 +1434,80 @@ def sort_values(
     ) -> DataFrame:
         """Sort by the values along row axis.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({
+            ...     'col1': ['A', 'A', 'B', bpd.NA, 'D', 'C'],
+            ...     'col2': [2, 1, 9, 8, 7, 4],
+            ...     'col3': [0, 1, 9, 4, 2, 3],
+            ...     'col4': ['a', 'B', 'c', 'D', 'e', 'F']
+            ... })
+            >>> df
+               col1  col2  col3 col4
+            0     A     2     0    a
+            1     A     1     1    B
+            2     B     9     9    c
+            3  <NA>     8     4    D
+            4     D     7     2    e
+            5     C     4     3    F
+            <BLANKLINE>
+            [6 rows x 4 columns]
+
+        Sort by col1:
+
+            >>> df.sort_values(by=['col1'])
+               col1  col2  col3 col4
+            0     A     2     0    a
+            1     A     1     1    B
+            2     B     9     9    c
+            5     C     4     3    F
+            4     D     7     2    e
+            3  <NA>     8     4    D
+            <BLANKLINE>
+            [6 rows x 4 columns]
+
+        Sort by multiple columns:
+
+            >>> df.sort_values(by=['col1', 'col2'])
+               col1  col2  col3 col4
+            1     A     1     1    B
+            0     A     2     0    a
+            2     B     9     9    c
+            5     C     4     3    F
+            4     D     7     2    e
+            3  <NA>     8     4    D
+            <BLANKLINE>
+            [6 rows x 4 columns]
+
+        Sort Descending:
+
+            >>> df.sort_values(by='col1', ascending=False)
+               col1  col2  col3 col4
+            4     D     7     2    e
+            5     C     4     3    F
+            2     B     9     9    c
+            0     A     2     0    a
+            1     A     1     1    B
+            3  <NA>     8     4    D
+            <BLANKLINE>
+            [6 rows x 4 columns]
+
+        Putting NAs first:
+
+            >>> df.sort_values(by='col1', ascending=False, na_position='first')
+               col1  col2  col3 col4
+            3  <NA>     8     4    D
+            4     D     7     2    e
+            5     C     4     3    F
+            2     B     9     9    c
+            0     A     2     0    a
+            1     A     1     1    B
+            <BLANKLINE>
+            [6 rows x 4 columns]
+
         Args:
             by (str or Sequence[str]):
                 Name or list of names to sort by.
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 01cc3a0500..778ad68e0e 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -168,6 +168,53 @@ def reset_index(
         when the index is meaningless and needs to be reset to the default
         before another operation.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series([1, 2, 3, 4], name='foo',
+            ...                index=['a', 'b', 'c', 'd'])
+            >>> s.index.name = "idx"
+            >>> s
+            idx
+            a    1
+            b    2
+            c    3
+            d    4
+            Name: foo, dtype: Int64
+
+        Generate a DataFrame with default index.
+
+            >>> s.reset_index()
+                idx  foo
+            0     a    1
+            1     b    2
+            2     c    3
+            3     d    4
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+        To specify the name of the new column use ``name`` param.
+
+            >>> s.reset_index(name="bar")
+                idx   bar
+            0     a    1
+            1     b    2
+            2     c    3
+            3     d    4
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+        To generate a new Series with the default index set param ``drop=True``.
+
+            >>> s.reset_index(drop=True)
+            0    1
+            1    2
+            2    3
+            3    4
+            Name: foo, dtype: Int64
+
         Args:
             drop (bool, default False):
                 Just reset the index, without inserting it as a column in
@@ -699,6 +746,69 @@ def sort_values(
         Sort a Series in ascending or descending order by some
         criterion.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series([np.nan, 1, 3, 10, 5])
+            >>> s
+            0    <NA>
+            1     1.0
+            2     3.0
+            3    10.0
+            4     5.0
+            dtype: Float64
+
+        Sort values ascending order (default behaviour):
+
+            >>> s.sort_values(ascending=True)
+            1     1.0
+            2     3.0
+            4     5.0
+            3    10.0
+            0    <NA>
+            dtype: Float64
+
+        Sort values descending order:
+
+            >>> s.sort_values(ascending=False)
+            3    10.0
+            4     5.0
+            2     3.0
+            1     1.0
+            0    <NA>
+            dtype: Float64
+
+        Sort values putting NAs first:
+
+            >>> s.sort_values(na_position='first')
+            0    <NA>
+            1     1.0
+            2     3.0
+            4     5.0
+            3    10.0
+            dtype: Float64
+
+        Sort a series of strings:
+
+            >>> s = bpd.Series(['z', 'b', 'd', 'a', 'c'])
+            >>> s
+            0    z
+            1    b
+            2    d
+            3    a
+            4    c
+            dtype: string
+
+            >>> s.sort_values()
+            3    a
+            1    b
+            4    c
+            2    d
+            0    z
+            dtype: string
+
         Args:
             axis (0 or 'index'):
                 Unused. Parameter needed for compatibility with DataFrame.

From ad51035bcf80d6a49f134df26624b578010b5b12 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Tue, 26 Dec 2023 18:29:00 +0000
Subject: [PATCH 17/27] docs: code samples for `isna`, `isnull`, `dropna`,
 `isin` (#289)

* docs: code samples for `isna`, `isnull`, `dropna`, `isin`

* fix header alignment in rendering
---
 .../bigframes_vendored/pandas/core/frame.py   | 81 +++++++++++++++++++
 .../bigframes_vendored/pandas/core/generic.py | 65 +++++++++++++++
 .../bigframes_vendored/pandas/core/series.py  | 70 ++++++++++++++++
 3 files changed, 216 insertions(+)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index fb34193710..2de63b9103 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -1289,9 +1289,57 @@ def duplicated(self, subset=None, keep="first"):
 
     def dropna(
         self,
+        *,
+        axis: int | str = 0,
+        how: str = "any",
+        ignore_index=False,
     ) -> DataFrame:
         """Remove missing values.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"name": ['Alfred', 'Batman', 'Catwoman'],
+            ...                     "toy": [np.nan, 'Batmobile', 'Bullwhip'],
+            ...                     "born": [bpd.NA, "1940-04-25", bpd.NA]})
+            >>> df
+                   name        toy        born
+            0    Alfred       <NA>        <NA>
+            1    Batman  Batmobile  1940-04-25
+            2  Catwoman   Bullwhip        <NA>
+            <BLANKLINE>
+            [3 rows x 3 columns]
+
+        Drop the rows where at least one element is missing:
+
+            >>> df.dropna()
+                 name        toy        born
+            1  Batman  Batmobile  1940-04-25
+            <BLANKLINE>
+            [1 rows x 3 columns]
+
+        Drop the columns where at least one element is missing.
+
+            >>> df.dropna(axis='columns')
+                   name
+            0    Alfred
+            1    Batman
+            2  Catwoman
+            <BLANKLINE>
+            [3 rows x 1 columns]
+
+        Drop the rows where all elements are missing:
+
+            >>> df.dropna(how='all')
+                   name        toy        born
+            0    Alfred       <NA>        <NA>
+            1    Batman  Batmobile  1940-04-25
+            2  Catwoman   Bullwhip        <NA>
+            <BLANKLINE>
+            [3 rows x 3 columns]
+
         Args:
             axis ({0 or 'index', 1 or 'columns'}, default 'columns'):
                 Determine if rows or columns which contain missing values are
@@ -1318,6 +1366,39 @@ def isin(self, values):
         """
         Whether each element in the DataFrame is contained in values.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'num_legs': [2, 4], 'num_wings': [2, 0]},
+            ...                    index=['falcon', 'dog'])
+            >>> df
+                    num_legs  num_wings
+            falcon         2          2
+            dog            4          0
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+        When ``values`` is a list check whether every value in the DataFrame is
+        present in the list (which animals have 0 or 2 legs or wings).
+
+            >>> df.isin([0, 2])
+                    num_legs  num_wings
+            falcon      True       True
+            dog        False       True
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+        When ``values`` is a dict, we can pass it to check for each column separately:
+
+            >>> df.isin({'num_wings': [0, 3]})
+                    num_legs  num_wings
+            falcon     False      False
+            dog        False       True
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
         Args:
             values (iterable, or dict):
                 The result will only be true at a location if all the
diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py
index ca5c6344ce..2885162fd6 100644
--- a/third_party/bigframes_vendored/pandas/core/generic.py
+++ b/third_party/bigframes_vendored/pandas/core/generic.py
@@ -499,6 +499,71 @@ def isna(self) -> NDFrame:
         False values. Characters such as empty strings ``''`` or
         :attr:`numpy.inf` are not considered NA values.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> import numpy as np
+
+            >>> df = bpd.DataFrame(dict(
+            ...         age=[5, 6, np.nan],
+            ...         born=[bpd.NA, "1940-04-25", "1940-04-25"],
+            ...         name=['Alfred', 'Batman', ''],
+            ...         toy=[None, 'Batmobile', 'Joker'],
+            ... ))
+            >>> df
+                age        born    name        toy
+            0   5.0        <NA>  Alfred       <NA>
+            1   6.0  1940-04-25  Batman  Batmobile
+            2  <NA>  1940-04-25              Joker
+            <BLANKLINE>
+            [3 rows x 4 columns]
+
+        Show which entries in a DataFrame are NA:
+
+            >>> df.isna()
+                age   born   name    toy
+            0  False   True  False   True
+            1  False  False  False  False
+            2   True  False  False  False
+            <BLANKLINE>
+            [3 rows x 4 columns]
+
+            >>> df.isnull()
+                age   born   name    toy
+            0  False   True  False   True
+            1  False  False  False  False
+            2   True  False  False  False
+            <BLANKLINE>
+            [3 rows x 4 columns]
+
+        Show which entries in a Series are NA:
+
+            >>> ser = bpd.Series([5, None, 6, np.nan, bpd.NA])
+            >>> ser
+            0     5.0
+            1    <NA>
+            2     6.0
+            3    <NA>
+            4    <NA>
+            dtype: Float64
+
+            >>> ser.isna()
+            0    False
+            1     True
+            2    False
+            3     True
+            4     True
+            dtype: boolean
+
+            >>> ser.isnull()
+            0    False
+            1     True
+            2    False
+            3     True
+            4     True
+            dtype: boolean
+
         Returns:
             Mask of bool values for each element that indicates whether an
             element is an NA value.
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 778ad68e0e..cbe0963051 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -1460,6 +1460,42 @@ def dropna(self, *, axis=0, inplace: bool = False, how=None) -> Series:
         """
         Return a new Series with missing values removed.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+        Drop NA values from a Series:
+
+            >>> ser = bpd.Series([1., 2., np.nan])
+            >>> ser
+            0     1.0
+            1     2.0
+            2    <NA>
+            dtype: Float64
+
+            >>> ser.dropna()
+            0    1.0
+            1    2.0
+            dtype: Float64
+
+        Empty strings are not considered NA values. ``None`` is considered an NA value.
+
+            >>> ser = bpd.Series(['2', bpd.NA, '', None, 'I stay'], dtype='object')
+            >>> ser
+            0         2
+            1      <NA>
+            2
+            3      <NA>
+            4    I stay
+            dtype: string
+
+            >>> ser.dropna()
+            0         2
+            2
+            4    I stay
+            dtype: string
+
         Args:
             axis (0 or 'index'):
                 Unused. Parameter needed for compatibility with DataFrame.
@@ -2531,6 +2567,40 @@ def isin(self, values):
             the same. That is, if any form of NaN is present in values, all forms
             of NaN in the series will be considered a match. (though pandas may not)
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series(['llama', 'cow', 'llama', 'beetle', 'llama',
+            ...                 'hippo'], name='animal')
+            >>> s
+            0     llama
+            1       cow
+            2     llama
+            3    beetle
+            4     llama
+            5     hippo
+            Name: animal, dtype: string
+
+            >>> s.isin(['cow', 'llama'])
+            0     True
+            1     True
+            2     True
+            3    False
+            4     True
+            5    False
+            Name: animal, dtype: boolean
+
+        Strings and integers are distinct and are therefore not comparable:
+
+            >>> bpd.Series([1]).isin(['1'])
+            0    False
+            dtype: boolean
+            >>> bpd.Series([1.1]).isin(['1.1'])
+            0    False
+            dtype: boolean
+
         Args:
             values (list-like):
                 The sequence of values to test. Passing in a single string will raise a

From 0e1bbfc1055aff9757b5138907c11caab2f3965a Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Tue, 26 Dec 2023 19:10:15 +0000
Subject: [PATCH 18/27] docs: code samples for `Series.{add, replace, unique,
 T, transpose}` (#287)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [x] Appropriate docs were updated (if necessary)
  - `Series.add`: https://screenshot.googleplex.com/763p36yJKKvM5DY
  - `Series.replace`: https://screenshot.googleplex.com/9MHuQZnKakqjjJn
  - `Series.unique`: https://screenshot.googleplex.com/7BfuQE3bFcYASqu
  - `Series.T`: https://screenshot.googleplex.com/8cSYpwKXrYetsEg
  - `Series.transpose`: https://screenshot.googleplex.com/7mM2zBwxRiqfDUV

Fixes internal issue 317297573 🦕
---
 .../bigframes_vendored/pandas/core/frame.py   |   4 +-
 .../bigframes_vendored/pandas/core/series.py  | 190 +++++++++++++++++-
 2 files changed, 191 insertions(+), 3 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 2de63b9103..d7ecae102b 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -4371,7 +4371,7 @@ def stack(self, level=-1):
             BigQuery DataFrames does not support stack operations that would
             combine columns of different dtypes.
 
-        **Example:**
+        **Examples:**
 
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
@@ -4410,7 +4410,7 @@ def unstack(self, level=-1):
         If the index is not a MultiIndex, the output will be a Series
         (the analogue of stack when the columns are not a MultiIndex).
 
-        **Example:**
+        **Examples:**
 
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index cbe0963051..b0a4cb8193 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -143,13 +143,51 @@ def name(self) -> Hashable:
 
     @property
     def T(self) -> Series:
-        """Return the transpose, which is by definition self."""
+        """Return the transpose, which is by definition self.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series(['Ant', 'Bear', 'Cow'])
+            >>> s
+            0     Ant
+            1    Bear
+            2     Cow
+            dtype: string
+
+            >>> s.T
+            0     Ant
+            1    Bear
+            2     Cow
+            dtype: string
+
+        """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
     def transpose(self) -> Series:
         """
         Return the transpose, which is by definition self.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series(['Ant', 'Bear', 'Cow'])
+            >>> s
+            0     Ant
+            1    Bear
+            2     Cow
+            dtype: string
+
+            >>> s.transpose()
+            0     Ant
+            1    Bear
+            2     Cow
+            dtype: string
+
         Returns:
             Series: Series.
         """
@@ -539,6 +577,36 @@ def nunique(self) -> int:
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def unique(self) -> Series:
+        """
+        Return unique values of Series object.
+
+        Uniques are returned in order of appearance. Hash table-based unique,
+        therefore does NOT sort.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series([2, 1, 3, 3], name='A')
+            >>> s
+            0    2
+            1    1
+            2    3
+            3    3
+            Name: A, dtype: Int64
+            >>> s.unique()
+            0    2
+            1    1
+            2    3
+            Name: A, dtype: Int64
+
+        Returns:
+            Series: The unique values returned as a Series.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def mode(self) -> Series:
         """
         Return the mode(s) of the Series.
@@ -1405,6 +1473,77 @@ def replace(
         This differs from updating with ``.loc`` or ``.iloc``, which require
         you to specify a location to update with some value.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series([1, 2, 3, 4, 5])
+            >>> s
+            0    1
+            1    2
+            2    3
+            3    4
+            4    5
+            dtype: Int64
+
+            >>> s.replace(1, 5)
+            0    5
+            1    2
+            2    3
+            3    4
+            4    5
+            dtype: Int64
+
+        You can replace a list of values:
+
+            >>> s.replace([1, 3, 5], -1)
+            0    -1
+            1     2
+            2    -1
+            3     4
+            4    -1
+            dtype: Int64
+
+        You can use a replacement mapping:
+
+            >>> s.replace({1: 5, 3: 10})
+            0     5
+            1     2
+            2    10
+            3     4
+            4     5
+            dtype: Int64
+
+        With a string Series you can use a simple string replacement or a regex
+        replacement:
+
+            >>> s = bpd.Series(["Hello", "Another Hello"])
+            >>> s.replace("Hello", "Hi")
+            0               Hi
+            1    Another Hello
+            dtype: string
+
+            >>> s.replace("Hello", "Hi", regex=True)
+            0            Hi
+            1    Another Hi
+            dtype: string
+
+            >>> s.replace("^Hello", "Hi", regex=True)
+            0               Hi
+            1    Another Hello
+            dtype: string
+
+            >>> s.replace("Hello$", "Hi", regex=True)
+            0            Hi
+            1    Another Hi
+            dtype: string
+
+            >>> s.replace("[Hh]e", "__", regex=True)
+            0            __llo
+            1    Anot__r __llo
+            dtype: string
+
         Args:
             to_replace (str, regex, list, int, float or None):
                 How to find the values that will be replaced.
@@ -1702,6 +1841,55 @@ def add(self, other) -> Series:
         Equivalent to ``series + other``, but with support to substitute a fill_value for
         missing data in either one of the inputs.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> a = bpd.Series([1, 2, 3, bpd.NA])
+            >>> a
+            0     1.0
+            1     2.0
+            2     3.0
+            3    <NA>
+            dtype: Float64
+
+            >>> b = bpd.Series([10, 20, 30, 40])
+            >>> b
+            0     10
+            1     20
+            2     30
+            3     40
+            dtype: Int64
+
+            >>> a.add(b)
+            0    11.0
+            1    22.0
+            2    33.0
+            3    <NA>
+            dtype: Float64
+
+        You can also use the mathematical operator ``+``:
+
+            >>> a + b
+            0    11.0
+            1    22.0
+            2    33.0
+            3    <NA>
+            dtype: Float64
+
+        Adding two Series with explicit indexes:
+
+            >>> a = bpd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
+            >>> b = bpd.Series([10, 20, 30, 40], index=['a', 'b', 'd', 'e'])
+            >>> a.add(b)
+            a      11
+            b      22
+            c    <NA>
+            d      34
+            e    <NA>
+            dtype: Int64
+
         Args:
             other (Series, or scalar value):
 

From 2cd64891170dcd4f2a709024a2993e36db210976 Mon Sep 17 00:00:00 2001
From: Huan Chen <142538604+Genesis929@users.noreply.github.com>
Date: Wed, 27 Dec 2023 16:36:15 -0800
Subject: [PATCH 19/27] feat: Add dataframe.to_html (#259)

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes b/296945119
---
 bigframes/dataframe.py                        |  52 ++++++++
 tests/system/small/test_dataframe.py          |   9 ++
 .../bigframes_vendored/pandas/core/frame.py   | 124 ++++++++++++++++++
 3 files changed, 185 insertions(+)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 423c2bcaac..ab0006ea20 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -2682,6 +2682,58 @@ def to_string(
             encoding,
         )
 
+    def to_html(
+        self,
+        buf=None,
+        columns: Sequence[str] | None = None,
+        col_space=None,
+        header: bool = True,
+        index: bool = True,
+        na_rep: str = "NaN",
+        formatters=None,
+        float_format=None,
+        sparsify: bool | None = None,
+        index_names: bool = True,
+        justify: str | None = None,
+        max_rows: int | None = None,
+        max_cols: int | None = None,
+        show_dimensions: bool = False,
+        decimal: str = ".",
+        bold_rows: bool = True,
+        classes: str | list | tuple | None = None,
+        escape: bool = True,
+        notebook: bool = False,
+        border: int | None = None,
+        table_id: str | None = None,
+        render_links: bool = False,
+        encoding: str | None = None,
+    ) -> str:
+        return self.to_pandas().to_html(
+            buf,
+            columns,  # type: ignore
+            col_space,
+            header,
+            index,
+            na_rep,
+            formatters,
+            float_format,
+            sparsify,
+            index_names,
+            justify,  # type: ignore
+            max_rows,
+            max_cols,
+            show_dimensions,
+            decimal,
+            bold_rows,
+            classes,
+            escape,
+            notebook,
+            border,
+            table_id,
+            render_links,
+            encoding,
+        )
+
     def to_markdown(
         self,
         buf=None,
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 86b8cfbe66..cb2e4f94fa 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -3463,6 +3463,15 @@ def test_df_to_string(scalars_df_index, scalars_pandas_df_index):
     assert bf_result == pd_result
 
 
+def test_df_to_html(scalars_df_index, scalars_pandas_df_index):
+    unsupported = ["numeric_col"]  # formatted differently
+
+    bf_result = scalars_df_index.drop(columns=unsupported).to_html()
+    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_html()
+
+    assert bf_result == pd_result
+
+
 def test_df_to_markdown(scalars_df_index, scalars_pandas_df_index):
     # Nulls have bug from tabulate https://github.com/astanin/python-tabulate/issues/231
     bf_result = scalars_df_index.dropna().to_markdown()
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index d7ecae102b..f2de8fcb6a 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -685,6 +685,130 @@ def to_string(
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def to_html(
+        self,
+        buf=None,
+        columns: Sequence[str] | None = None,
+        col_space=None,
+        header: bool = True,
+        index: bool = True,
+        na_rep: str = "NaN",
+        formatters=None,
+        float_format=None,
+        sparsify: bool | None = None,
+        index_names: bool = True,
+        justify: str | None = None,
+        max_rows: int | None = None,
+        max_cols: int | None = None,
+        show_dimensions: bool = False,
+        decimal: str = ".",
+        bold_rows: bool = True,
+        classes: str | list | tuple | None = None,
+        escape: bool = True,
+        notebook: bool = False,
+        border: int | None = None,
+        table_id: str | None = None,
+        render_links: bool = False,
+        encoding: str | None = None,
+    ):
+        """Render a DataFrame as an HTML table.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
+            >>> print(df.to_html())
+            <table border="1" class="dataframe">
+            <thead>
+                <tr style="text-align: right;">
+                <th></th>
+                <th>col1</th>
+                <th>col2</th>
+                </tr>
+            </thead>
+            <tbody>
+                <tr>
+                <th>0</th>
+                <td>1</td>
+                <td>3</td>
+                </tr>
+                <tr>
+                <th>1</th>
+                <td>2</td>
+                <td>4</td>
+                </tr>
+            </tbody>
+            </table>
+
+        Args:
+            buf (str, Path or StringIO-like, optional, default None):
+                Buffer to write to. If None, the output is returned as a string.
+            columns (sequence, optional, default None):
+                The subset of columns to write. Writes all columns by default.
+            col_space (str or int, list or dict of int or str, optional):
+                The minimum width of each column in CSS length units. An int is
+                assumed to be px units.
+            header (bool, optional):
+                Whether to print column labels, default True.
+            index (bool, optional, default True):
+                Whether to print index (row) labels.
+            na_rep (str, optional, default 'NaN'):
+                String representation of NAN to use.
+            formatters (list, tuple or dict of one-param. functions, optional):
+                Formatter functions to apply to columns' elements by position or
+                name.
+                The result of each function must be a unicode string.
+                List/tuple must be of length equal to the number of columns.
+            float_format (one-parameter function, optional, default None):
+                Formatter function to apply to columns' elements if they are
+                floats. This function must return a unicode string and will
+                be applied only to the non-NaN elements, with NaN being
+                handled by na_rep.
+            sparsify (bool, optional, default True):
+                Set to False for a DataFrame with a hierarchical index to print
+                every multiindex key at each row.
+            index_names (bool, optional, default True):
+                Prints the names of the indexes.
+            justify (str, default None):
+                How to justify the column labels. If None uses the option from
+                the print configuration (controlled by set_option), 'right' out
+                of the box. Valid values are, 'left', 'right', 'center', 'justify',
+                'justify-all', 'start', 'end', 'inherit', 'match-parent', 'initial',
+                'unset'.
+            max_rows (int, optional):
+                Maximum number of rows to display in the console.
+            max_cols (int, optional):
+                Maximum number of columns to display in the console.
+            show_dimensions (bool, default False):
+                Display DataFrame dimensions (number of rows by number of columns).
+            decimal (str, default '.'):
+                Character recognized as decimal separator, e.g. ',' in Europe.
+            bold_rows (bool, default True):
+                Make the row labels bold in the output.
+            classes (str or list or tuple, default None):
+                CSS class(es) to apply to the resulting html table.
+            escape (bool, default True):
+                Convert the characters <, >, and & to HTML-safe sequences.
+            notebook (bool, default False):
+                Whether the generated HTML is for IPython Notebook.
+            border (int):
+                A border=border attribute is included in the opening <table>
+                tag. Default pd.options.display.html.border.
+            table_id (str, optional):
+                A css id is included in the opening <table> tag if specified.
+            render_links (bool, default False):
+                Convert URLs to HTML links.
+            encoding (str, default "utf-8"):
+                Set character encoding.
+
+        Returns:
+            str or None: If buf is None, returns the result as a string. Otherwise
+            returns None.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def to_markdown(
         self,
         buf=None,

From ac1a745ddce9865f4585777b43c2234b9bf2841d Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Thu, 28 Dec 2023 18:00:22 +0000
Subject: [PATCH 20/27] fix: exclude pandas 2.2.0rc0 to unblock prerelease
 tests (#292)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes internal issue 317908521 🦕
---
 noxfile.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/noxfile.py b/noxfile.py
index c4bbd7a65a..1d3624005a 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -518,9 +518,13 @@ def prerelease(session: nox.sessions.Session, tests_path):
         "--prefer-binary",
         "--pre",
         "--upgrade",
-        # TODO(shobs): Remove tying to version 2.1.3 after
-        # https://github.com/pandas-dev/pandas/issues/56463 is resolved
-        "pandas!=2.1.4",
+        # TODO(shobs): Remove excluding version 2.1.4 after
+        # https://github.com/pandas-dev/pandas/issues/56463 is resolved.
+        #
+        # TODO(shobs): Remove excluding version 2.2.0rc0 after
+        # https://github.com/pandas-dev/pandas/issues/56646 and
+        # https://github.com/pandas-dev/pandas/issues/56651 are resolved.
+        "pandas!=2.1.4,!=2.2.0rc0",
     )
     already_installed.add("pandas")
 

From 252f3a2a0e1296c7d786acdc0bdebe9e4a9ae1be Mon Sep 17 00:00:00 2001
From: Ashley Xu <139821907+ashleyxuu@users.noreply.github.com>
Date: Thu, 28 Dec 2023 10:54:15 -0800
Subject: [PATCH 21/27] docs: fix the rendering for `get_dummies` (#291)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [x] Appropriate docs were updated (if necessary)
        - docs:  https://screenshot.googleplex.com/8X53mhLdQb2dQsd
Fixes internal issue 317915956 🦕
---
 third_party/bigframes_vendored/pandas/core/reshape/encoding.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/third_party/bigframes_vendored/pandas/core/reshape/encoding.py b/third_party/bigframes_vendored/pandas/core/reshape/encoding.py
index da92b58f50..b7f67473ea 100644
--- a/third_party/bigframes_vendored/pandas/core/reshape/encoding.py
+++ b/third_party/bigframes_vendored/pandas/core/reshape/encoding.py
@@ -25,6 +25,7 @@ def get_dummies(
     prepended to the value.
 
     **Examples:**
+
         >>> import bigframes.pandas as pd
         >>> pd.options.display.progress_bar = None
         >>> s = pd.Series(list('abca'))

From 746115d5564c95bc3c4a5309c99e7a29e535e6fe Mon Sep 17 00:00:00 2001
From: Huan Chen <142538604+Genesis929@users.noreply.github.com>
Date: Thu, 28 Dec 2023 12:42:15 -0800
Subject: [PATCH 22/27] Fix: Update dataframe.to_gbq to dedup column names.
 (#286)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 bigframes/dataframe.py                  | 14 ++++----
 tests/system/small/test_dataframe_io.py | 44 +++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index ab0006ea20..595670b0b6 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -2759,26 +2759,28 @@ def _apply_unary_op(self, operation: ops.UnaryOp) -> DataFrame:
     def _create_io_query(self, index: bool, ordering_id: Optional[str]) -> str:
         """Create query text representing this dataframe for I/O."""
         array_value = self._block.expr
+
+        new_col_labels, new_idx_labels = utils.get_standardized_ids(
+            self._block.column_labels, self.index.names
+        )
+
         columns = list(self._block.value_columns)
-        column_labels = list(self._block.column_labels)
+        column_labels = new_col_labels
         # This code drops unnamed indexes to keep consistent with the behavior of
         # most pandas write APIs. The exception is `pandas.to_csv`, which keeps
         # unnamed indexes as `Unnamed: 0`.
         # TODO(chelsealin): check if works for multiple indexes.
         if index and self.index.name is not None:
             columns.extend(self._block.index_columns)
-            column_labels.extend(self.index.names)
+            column_labels.extend(new_idx_labels)
         else:
             array_value = array_value.drop_columns(self._block.index_columns)
 
         # Make columns in SQL reflect _labels_ not _ids_. Note: This may use
         # the arbitrary unicode column labels feature in BigQuery, which is
         # currently (June 2023) in preview.
-        # TODO(swast): Handle duplicate and NULL labels.
         id_overrides = {
-            col_id: col_label
-            for col_id, col_label in zip(columns, column_labels)
-            if col_label and isinstance(col_label, str)
+            col_id: col_label for col_id, col_label in zip(columns, column_labels)
         }
 
         if ordering_id is not None:
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index 59864e483e..6f1b31b48e 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -273,6 +273,50 @@ def test_to_gbq_if_exists(
     )
 
 
+def test_to_gbq_w_duplicate_column_names(
+    scalars_df_index, scalars_pandas_df_index, dataset_id
+):
+    """Test the `to_gbq` API when dealing with duplicate column names."""
+    destination_table = f"{dataset_id}.test_to_gbq_w_duplicate_column_names"
+
+    # Renaming 'int64_too' to 'int64_col', which will result in 'int64_too'
+    # becoming 'int64_col_1' after deduplication.
+    scalars_df_index = scalars_df_index.rename(columns={"int64_too": "int64_col"})
+    scalars_df_index.to_gbq(destination_table, if_exists="replace")
+
+    bf_result = bpd.read_gbq(destination_table, index_col="rowindex").to_pandas()
+
+    pd.testing.assert_series_equal(
+        scalars_pandas_df_index["int64_col"], bf_result["int64_col"]
+    )
+    pd.testing.assert_series_equal(
+        scalars_pandas_df_index["int64_too"],
+        bf_result["int64_col_1"],
+        check_names=False,
+    )
+
+
+def test_to_gbq_w_None_column_names(
+    scalars_df_index, scalars_pandas_df_index, dataset_id
+):
+    """Test the `to_gbq` API with None as a column name."""
+    destination_table = f"{dataset_id}.test_to_gbq_w_none_column_names"
+
+    scalars_df_index = scalars_df_index.rename(columns={"int64_too": None})
+    scalars_df_index.to_gbq(destination_table, if_exists="replace")
+
+    bf_result = bpd.read_gbq(destination_table, index_col="rowindex").to_pandas()
+
+    pd.testing.assert_series_equal(
+        scalars_pandas_df_index["int64_col"], bf_result["int64_col"]
+    )
+    pd.testing.assert_series_equal(
+        scalars_pandas_df_index["int64_too"],
+        bf_result["bigframes_unnamed_column"],
+        check_names=False,
+    )
+
+
 def test_to_gbq_w_invalid_destination_table(scalars_df_index):
     with pytest.raises(ValueError):
         scalars_df_index.to_gbq("table_id")

From 7cbc2b0ba572d11778ba7caf7c95b7fb8f3a31a7 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Fri, 29 Dec 2023 00:06:19 +0000
Subject: [PATCH 23/27] docs: code samples for `Series.{map, to_list, count}`
 (#290)

docs: code samples for `DataFrame.copy` and `Series.copy`
---
 .../bigframes_vendored/pandas/core/generic.py | 61 ++++++++++++++++
 .../bigframes_vendored/pandas/core/series.py  | 70 +++++++++++++++++++
 2 files changed, 131 insertions(+)

diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py
index 2885162fd6..c079cbff7f 100644
--- a/third_party/bigframes_vendored/pandas/core/generic.py
+++ b/third_party/bigframes_vendored/pandas/core/generic.py
@@ -448,6 +448,67 @@ def copy(self):
         and indices. Modifications to the data or indices of the copy will not
         be reflected in the original object.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+        Modification in the original Series will not affect the copy Series:
+
+            >>> s = bpd.Series([1, 2], index=["a", "b"])
+            >>> s
+            a    1
+            b    2
+            dtype: Int64
+
+            >>> s_copy = s.copy()
+            >>> s_copy
+            a    1
+            b    2
+            dtype: Int64
+
+            >>> s.loc['b'] = 22
+            >>> s
+            a     1
+            b    22
+            dtype: Int64
+            >>> s_copy
+            a    1
+            b    2
+            dtype: Int64
+
+        Modification in the original DataFrame will not affect the copy DataFrame:
+
+            >>> df = bpd.DataFrame({'a': [1, 3], 'b': [2, 4]})
+            >>> df
+               a  b
+            0  1  2
+            1  3  4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+            >>> df_copy = df.copy()
+            >>> df_copy
+               a  b
+            0  1  2
+            1  3  4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+            >>> df.loc[df["b"] == 2, "b"] = 22
+            >>> df
+               a     b
+            0  1  22.0
+            1  3   4.0
+            <BLANKLINE>
+            [2 rows x 2 columns]
+            >>> df_copy
+               a  b
+            0  1  2
+            1  3  4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
         Returns:
             Object type matches caller.
         """
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index b0a4cb8193..b97f9018dd 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -433,6 +433,21 @@ def tolist(self) -> list:
         (for str, int, float) or a pandas scalar
         (for Timestamp/Timedelta/Interval/Period).
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series([1, 2, 3])
+            >>> s
+            0    1
+            1    2
+            2    3
+            dtype: Int64
+
+            >>> s.to_list()
+            [1, 2, 3]
+
         Returns:
             list: list of the values
         """
@@ -560,6 +575,20 @@ def count(self):
         """
         Return number of non-NA/null observations in the Series.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series([0.0, 1.0, bpd.NA])
+            >>> s
+            0     0.0
+            1     1.0
+            2    <NA>
+            dtype: Float64
+            >>> s.count()
+            2
+
         Returns:
             int or Series (if level specified): Number of non-null values in the
                 Series.
@@ -2845,6 +2874,47 @@ def map(
             ``__missing__`` (i.e. provide a method for default values). These
             are treated the same as ``dict``.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series(['cat', 'dog', bpd.NA, 'rabbit'])
+            >>> s
+            0       cat
+            1       dog
+            2      <NA>
+            3    rabbit
+            dtype: string
+
+        `map` can accepts a `dict`. Values that are not found in the `dict` are
+        converted to `NA`:
+
+            >>> s.map({'cat': 'kitten', 'dog': 'puppy'})
+            0    kitten
+            1     puppy
+            2      <NA>
+            3      <NA>
+            dtype: string
+
+        It also accepts a remote function:
+
+            >>> @bpd.remote_function([str], str)
+            ... def my_mapper(val):
+            ...     vowels = ["a", "e", "i", "o", "u"]
+            ...     if val:
+            ...         return "".join([
+            ...             ch.upper() if ch in vowels else ch for ch in val
+            ...         ])
+            ...     return "N/A"
+
+            >>> s.map(my_mapper)
+            0       cAt
+            1       dOg
+            2       N/A
+            3    rAbbIt
+            dtype: string
+
         Args:
             arg (function, Mapping, Series):
                 remote function, collections.abc.Mapping subclass or Series

From 64bdf7622f3b5a5b5ec9176b1558a9cd4b7a756a Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Tue, 2 Jan 2024 11:20:16 -0800
Subject: [PATCH 24/27] chore: stop using deprecated ibis relabel method (#297)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 bigframes/core/compile/compiled.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py
index 199c8db785..524699290b 100644
--- a/bigframes/core/compile/compiled.py
+++ b/bigframes/core/compile/compiled.py
@@ -346,7 +346,9 @@ def _to_ibis_expr(
             table = table.filter(base_table[PREDICATE_COLUMN])
         table = table.drop(*columns_to_drop)
         if col_id_overrides:
-            table = table.relabel(col_id_overrides)
+            table = table.rename(
+                {value: key for key, value in col_id_overrides.items()}
+            )
         if fraction is not None:
             table = table.filter(ibis.random() < ibis.literal(fraction))
         return table
@@ -1194,7 +1196,9 @@ def _to_ibis_expr(
             table = table.filter(base_table[PREDICATE_COLUMN])
         table = table.drop(*columns_to_drop)
         if col_id_overrides:
-            table = table.relabel(col_id_overrides)
+            table = table.rename(
+                {value: key for key, value in col_id_overrides.items()}
+            )
         if fraction is not None:
             table = table.filter(ibis.random() < ibis.literal(fraction))
         return table

From c2b1892825545a34ce4ed5b0ef99e99348466108 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Tue, 2 Jan 2024 20:28:16 +0000
Subject: [PATCH 25/27] docs: code samples for `sample`, `get`, `Series.round`
 (#295)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BEGIN_COMMIT_OVERRIDE
docs: code samples for `sample`, `get`, `Series.round` (#295)

docs: code samples for DataFrame `set_index`, `items` (#295)
END_COMMIT_OVERRIDE

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [x] Appropriate docs were updated (if necessary)
  - `DataFrame.sample`, `Series.sample`: https://screenshot.googleplex.com/kPy5swVACMeBhSo
  - `DataFrame.get`, `Series.get`: https://screenshot.googleplex.com/7hirn5oz2b4L6B3
  - `DataFrame.set_index`: https://screenshot.googleplex.com/3CXARrp5hwV6gau
  - `DataFrame.items`: https://screenshot.googleplex.com/bk3HAiXZQq3TYD9
  - `Series.round`: https://screenshot.googleplex.com/C9c4m84NWNMnAwS

Fixes internal issues 318011542 and 318011745 🦕
---
 .../bigframes_vendored/pandas/core/frame.py   | 78 ++++++++++++++-
 .../bigframes_vendored/pandas/core/generic.py | 94 +++++++++++++++++++
 .../bigframes_vendored/pandas/core/series.py  | 19 ++++
 3 files changed, 189 insertions(+), 2 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index f2de8fcb6a..9259d14bab 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -1187,6 +1187,47 @@ def set_index(
         Set the DataFrame index (row labels) using one existing column. The
         index can replace the existing index.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'month': [1, 4, 7, 10],
+            ...                     'year': [2012, 2014, 2013, 2014],
+            ...                     'sale': [55, 40, 84, 31]})
+            >>> df
+               month  year  sale
+            0      1  2012    55
+            1      4  2014    40
+            2      7  2013    84
+            3     10  2014    31
+            <BLANKLINE>
+            [4 rows x 3 columns]
+
+        Set the 'month' column to become the index:
+
+            >>> df.set_index('month')
+                   year  sale
+            month
+            1      2012    55
+            4      2014    40
+            7      2013    84
+            10     2014    31
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+        Create a MultiIndex using columns 'year' and 'month':
+
+            >>> df.set_index(['year', 'month'])
+                        sale
+            year month
+            2012 1        55
+            2014 4        40
+            2013 7        84
+            2014 10       31
+            <BLANKLINE>
+            [4 rows x 1 columns]
+
         Args:
             keys:
                 A label. This parameter can be a single column key.
@@ -1621,6 +1662,39 @@ def items(self):
         Iterates over the DataFrame columns, returning a tuple with
         the column name and the content as a Series.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'species': ['bear', 'bear', 'marsupial'],
+            ...                     'population': [1864, 22000, 80000]},
+            ...                    index=['panda', 'polar', 'koala'])
+            >>> df
+                     species  population
+            panda       bear        1864
+            polar       bear       22000
+            koala  marsupial       80000
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+            >>> for label, content in df.items():
+            ...     print(f'--> label: {label}')
+            ...     print(f'--> content:\\n{content}')
+            ...
+            --> label: species
+            --> content:
+            panda         bear
+            polar         bear
+            koala    marsupial
+            Name: species, dtype: string
+            --> label: population
+            --> content:
+            panda     1864
+            polar    22000
+            koala    80000
+            Name: population, dtype: Int64
+
         Returns:
             Iterator: Iterator of label, Series for each column.
         """
@@ -4587,7 +4661,7 @@ def index(self):
             ...                     'Location': ['Seattle', 'New York', 'Kona']},
             ...                    index=([10, 20, 30]))
             >>> df
-                Name  Age  Location
+                  Name  Age  Location
             10   Alice   25   Seattle
             20     Bob   30  New York
             30  Aritra   35      Kona
@@ -4603,7 +4677,7 @@ def index(self):
 
             >>> df1 = df.set_index(["Name", "Location"])
             >>> df1
-                            Age
+                             Age
             Name   Location
             Alice  Seattle    25
             Bob    New York   30
diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py
index c079cbff7f..bc31e02263 100644
--- a/third_party/bigframes_vendored/pandas/core/generic.py
+++ b/third_party/bigframes_vendored/pandas/core/generic.py
@@ -254,6 +254,55 @@ def get(self, key, default=None):
 
         Returns default value if not found.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame(
+            ...     [
+            ...         [24.3, 75.7, "high"],
+            ...         [31, 87.8, "high"],
+            ...         [22, 71.6, "medium"],
+            ...         [35, 95, "medium"],
+            ...     ],
+            ...     columns=["temp_celsius", "temp_fahrenheit", "windspeed"],
+            ...     index=["2014-02-12", "2014-02-13", "2014-02-14", "2014-02-15"],
+            ... )
+            >>> df
+                        temp_celsius  temp_fahrenheit windspeed
+            2014-02-12          24.3             75.7      high
+            2014-02-13          31.0             87.8      high
+            2014-02-14          22.0             71.6    medium
+            2014-02-15          35.0             95.0    medium
+            <BLANKLINE>
+            [4 rows x 3 columns]
+
+            >>> df.get(["temp_celsius", "windspeed"])
+                        temp_celsius windspeed
+            2014-02-12          24.3      high
+            2014-02-13          31.0      high
+            2014-02-14          22.0    medium
+            2014-02-15          35.0    medium
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+            >>> ser = df['windspeed']
+            >>> ser
+            2014-02-12      high
+            2014-02-13      high
+            2014-02-14    medium
+            2014-02-15    medium
+            Name: windspeed, dtype: string
+            >>> ser.get('2014-02-13')
+            'high'
+
+        If the key is not found, the default value will be used.
+
+            >>> df.get(["temp_celsius", "temp_kelvin"])
+            >>> df.get(["temp_celsius", "temp_kelvin"], default="default_value")
+            'default_value'
+
         Args:
             key: object
 
@@ -410,6 +459,51 @@ def sample(
 
         You can use `random_state` for reproducibility.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'num_legs': [2, 4, 8, 0],
+            ...                     'num_wings': [2, 0, 0, 0],
+            ...                     'num_specimen_seen': [10, 2, 1, 8]},
+            ...                    index=['falcon', 'dog', 'spider', 'fish'])
+            >>> df
+                    num_legs  num_wings  num_specimen_seen
+            falcon         2          2                 10
+            dog            4          0                  2
+            spider         8          0                  1
+            fish           0          0                  8
+            <BLANKLINE>
+            [4 rows x 3 columns]
+
+        Fetch one random row from the DataFrame (Note that we use `random_state`
+        to ensure reproducibility of the examples):
+
+            >>> df.sample(random_state=1)
+                 num_legs  num_wings  num_specimen_seen
+            dog         4          0                  2
+            <BLANKLINE>
+            [1 rows x 3 columns]
+
+        A random 50% sample of the DataFrame:
+
+            >>> df.sample(frac=0.5, random_state=1)
+                  num_legs  num_wings  num_specimen_seen
+            dog          4          0                  2
+            fish         0          0                  8
+            <BLANKLINE>
+            [2 rows x 3 columns]
+
+        Extract 3 random elements from the Series `df['num_legs']`:
+
+            >>> s = df['num_legs']
+            >>> s.sample(n=3, random_state=1)
+            dog       4
+            fish      0
+            spider    8
+            Name: num_legs, dtype: Int64
+
         Args:
             n (Optional[int], default None):
                 Number of items from axis to return. Cannot be used with `frac`.
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index b97f9018dd..1ee1a8d5b5 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -722,6 +722,25 @@ def round(self, decimals: int = 0) -> Series:
         """
         Round each value in a Series to the given number of decimals.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series([0.1, 1.3, 2.7])
+            >>> s.round()
+            0    0.0
+            1    1.0
+            2    3.0
+            dtype: Float64
+
+            >>> s = bpd.Series([0.123, 1.345, 2.789])
+            >>> s.round(decimals=2)
+            0    0.12
+            1    1.34
+            2    2.79
+            dtype: Float64
+
         Args:
             decimals (int, default 0):
                 Number of decimal places to round to. If decimals is negative,

From eb69f60db52544882fb06c2d5fa0e41226dfe93f Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Tue, 2 Jan 2024 23:44:39 +0000
Subject: [PATCH 26/27] docs: code samples for `DataFrame.rename`,
 `Series.rename` (#293)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BEGIN_COMMIT_OVERRIDE
docs: code samples for `rename` , `size` (#293)

docs: code samples for `Series.{name, std, agg}` (#293)
END_COMMIT_OVERRIDE

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [x] Appropriate docs were updated (if necessary)
  - `DataFrame.size`: https://screenshot.googleplex.com/55MHXNuAamdfbud
  - `Series.size`: https://screenshot.googleplex.com/5ve4T8UJq2TUiWb
  - `DataFrame.rename`: https://screenshot.googleplex.com/7eWsfcz8tmLx4pY
  - `Series.rename`: https://screenshot.googleplex.com/3HbXTxCaJVsbEzs
  - `Series.name`: https://screenshot.googleplex.com/7FpNDWJEyiqGLpN
  - `Series.std`: https://screenshot.googleplex.com/4RSTC8s2tYYK5cW
  - `Series.agg`: https://screenshot.googleplex.com/63TmACx23TPJu2K

Fixes internal issues 317997641 and 317998300 🦕
---
 .../bigframes_vendored/pandas/core/frame.py   |  24 ++++
 .../bigframes_vendored/pandas/core/generic.py |  13 ++
 .../bigframes_vendored/pandas/core/series.py  | 127 ++++++++++++++++++
 3 files changed, 164 insertions(+)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 9259d14bab..c3794c550e 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -1146,6 +1146,30 @@ def rename(
         Dict values must be unique (1-to-1). Labels not contained in a dict
         will be left as-is. Extra labels listed don't throw an error.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+            >>> df
+               A  B
+            0  1  4
+            1  2  5
+            2  3  6
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+        Rename columns using a mapping:
+
+            >>> df.rename(columns={"A": "col1", "B": "col2"})
+               col1  col2
+            0     1     4
+            1     2     5
+            2     3     6
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
         Args:
             columns (Mapping):
                 Dict-like from old column labels to new column labels.
diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py
index bc31e02263..72b947f96c 100644
--- a/third_party/bigframes_vendored/pandas/core/generic.py
+++ b/third_party/bigframes_vendored/pandas/core/generic.py
@@ -29,6 +29,19 @@ def ndim(self) -> int:
     def size(self) -> int:
         """Return an int representing the number of elements in this object.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series({'a': 1, 'b': 2, 'c': 3})
+            >>> s.size
+            3
+
+            >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
+            >>> df.size
+            4
+
         Returns:
             int: Return the number of rows if Series. Otherwise return the number of
                 rows times number of columns if DataFrame.
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 1ee1a8d5b5..98c4fcdd44 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -135,6 +135,35 @@ def name(self) -> Hashable:
         to form a DataFrame. It is also used whenever displaying the Series
         using the interpreter.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+        For a Series:
+
+            >>> s = bpd.Series([1, 2, 3], dtype="Int64", name='Numbers')
+            >>> s
+            0    1
+            1    2
+            2    3
+            Name: Numbers, dtype: Int64
+            >>> s.name
+            'Numbers'
+
+        If the Series is part of a DataFrame:
+
+            >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
+            >>> df
+               col1  col2
+            0     1     3
+            1     2     4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+            >>> s = df["col1"]
+            >>> s.name
+            'col1'
+
         Returns:
             hashable object: The name of the Series, also the column name
                 if part of a DataFrame.
@@ -560,6 +589,27 @@ def agg(self, func):
         """
         Aggregate using one or more operations over the specified axis.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series([1, 2, 3, 4])
+            >>> s
+            0    1
+            1    2
+            2    3
+            3    4
+            dtype: Int64
+
+            >>> s.agg('min')
+            1
+
+            >>> s.agg(['min', 'max'])
+            min    1.0
+            max    4.0
+            dtype: Float64
+
         Args:
             func (function):
                 Function to use for aggregating the data.
@@ -2292,6 +2342,29 @@ def std(
 
         Normalized by N-1 by default.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'person_id': [0, 1, 2, 3],
+            ...                     'age': [21, 25, 62, 43],
+            ...                     'height': [1.61, 1.87, 1.49, 2.01]}
+            ...                   ).set_index('person_id')
+            >>> df
+                       age  height
+            person_id
+            0           21    1.61
+            1           25    1.87
+            2           62    1.49
+            3           43    2.01
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+            >>> df.std()
+            age       18.786076
+            height     0.237417
+            dtype: Float64
 
         Returns
         -------
@@ -2649,6 +2722,34 @@ def rename(self, index, **kwargs) -> Series | None:
 
         Alternatively, change ``Series.name`` with a scalar value.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series([1, 2, 3])
+            >>> s
+            0    1
+            1    2
+            2    3
+            dtype: Int64
+
+        You can changes the Series name by specifying a string scalar:
+
+            >>> s.rename("my_name")
+            0    1
+            1    2
+            2    3
+            Name: my_name, dtype: Int64
+
+        You can change the labels by specifying a mapping:
+
+            >>> s.rename({1: 3, 2: 5})
+            0    1
+            3    2
+            5    3
+            dtype: Int64
+
         Args:
             index (scalar, hashable sequence, dict-like or function optional):
                 Functions or dict-like are transformations to apply to
@@ -2990,3 +3091,29 @@ def values(self):
 
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    @property
+    def size(self) -> int:
+        """Return the number of elements in the underlying data.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+        For Series:
+
+            >>> s = bpd.Series({'a': 1, 'b': 2, 'c': 3})
+            >>> s.size
+            3
+
+        For Index:
+
+            >>> idx = bpd.Index(bpd.Series([1, 2, 3]))
+            >>> idx.size
+            3
+
+        Returns:
+            int: Return the number of elements in the underlying data.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

From 655178ad71c2b64f720d0d195813a97889c38f5a Mon Sep 17 00:00:00 2001
From: "release-please[bot]"
 <55107282+release-please[bot]@users.noreply.github.com>
Date: Wed, 3 Jan 2024 11:29:36 -0800
Subject: [PATCH 27/27] chore(main): release 0.18.0 (#279)

Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com>
---
 CHANGELOG.md         | 43 +++++++++++++++++++++++++++++++++++++++++++
 bigframes/version.py |  2 +-
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8c49c5b63b..77a6576ee0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,49 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [0.18.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v0.17.0...v0.18.0) (2024-01-02)
+
+
+### Features
+
+* Add dataframe.to_html ([#259](https://github.com/googleapis/python-bigquery-dataframes/issues/259)) ([2cd6489](https://github.com/googleapis/python-bigquery-dataframes/commit/2cd64891170dcd4f2a709024a2993e36db210976))
+* Add IntervalIndex support to bigframes.pandas.cut ([#254](https://github.com/googleapis/python-bigquery-dataframes/issues/254)) ([6c1969a](https://github.com/googleapis/python-bigquery-dataframes/commit/6c1969a35fe720cf3a804006bcc9046ba554fcc3))
+* Add replace method to DataFrame ([#261](https://github.com/googleapis/python-bigquery-dataframes/issues/261)) ([5092215](https://github.com/googleapis/python-bigquery-dataframes/commit/5092215767d77c90b132e9cd6b3e3749827ebe09))
+* Specific pyarrow mappings for decimal, bytes types ([#283](https://github.com/googleapis/python-bigquery-dataframes/issues/283)) ([a1c0631](https://github.com/googleapis/python-bigquery-dataframes/commit/a1c06319ab0e3697c3175112490488002bb344c0))
+
+
+### Bug Fixes
+
+* Dataframes to_gbq now creates dataset if it doesn't exist ([#222](https://github.com/googleapis/python-bigquery-dataframes/issues/222)) ([bac62f7](https://github.com/googleapis/python-bigquery-dataframes/commit/bac62f76af1af6ca8834c3690c7c79aeb12dd331))
+* Exclude pandas 2.2.0rc0 to unblock prerelease tests ([#292](https://github.com/googleapis/python-bigquery-dataframes/issues/292)) ([ac1a745](https://github.com/googleapis/python-bigquery-dataframes/commit/ac1a745ddce9865f4585777b43c2234b9bf2841d))
+* Fix DataFrameGroupby.agg() issue with as_index=False ([#273](https://github.com/googleapis/python-bigquery-dataframes/issues/273)) ([ab49350](https://github.com/googleapis/python-bigquery-dataframes/commit/ab493506e71ed8970a11fe2f88b2145150e09291))
+* Make `Series.str.replace` work for simple strings ([#285](https://github.com/googleapis/python-bigquery-dataframes/issues/285)) ([ad67465](https://github.com/googleapis/python-bigquery-dataframes/commit/ad6746569b3af11be9d40805a1449ee1e89288dc))
+* Update dataframe.to_gbq to dedup column names. ([#286](https://github.com/googleapis/python-bigquery-dataframes/issues/286)) ([746115d](https://github.com/googleapis/python-bigquery-dataframes/commit/746115d5564c95bc3c4a5309c99e7a29e535e6fe))
+* Use setuptools.find_namespace_packages ([#246](https://github.com/googleapis/python-bigquery-dataframes/issues/246)) ([9ec352a](https://github.com/googleapis/python-bigquery-dataframes/commit/9ec352a338f11d82aee9cd665ffb0e6e97cb391b))
+
+
+### Dependencies
+
+* Migrate to `ibis-framework &gt;= "7.1.0"` ([#53](https://github.com/googleapis/python-bigquery-dataframes/issues/53)) ([9798a2b](https://github.com/googleapis/python-bigquery-dataframes/commit/9798a2b14dffb20432f732343cac92341e42fe09))
+
+
+### Documentation
+
+* Add code snippets for explore query result page ([#278](https://github.com/googleapis/python-bigquery-dataframes/issues/278)) ([7cbbb7d](https://github.com/googleapis/python-bigquery-dataframes/commit/7cbbb7d4608d8b7d1a360b2fe2d39d89a52f9546))
+* Code samples for `astype` common to DataFrame and Series ([#280](https://github.com/googleapis/python-bigquery-dataframes/issues/280)) ([95b673a](https://github.com/googleapis/python-bigquery-dataframes/commit/95b673aeb1545744e4b1a353cf1f4d0202d8a1b2))
+* Code samples for `DataFrame.copy` and `Series.copy` ([#290](https://github.com/googleapis/python-bigquery-dataframes/issues/290)) ([7cbc2b0](https://github.com/googleapis/python-bigquery-dataframes/commit/7cbc2b0ba572d11778ba7caf7c95b7fb8f3a31a7))
+* Code samples for `drop` and `fillna` ([#284](https://github.com/googleapis/python-bigquery-dataframes/issues/284)) ([9c5012e](https://github.com/googleapis/python-bigquery-dataframes/commit/9c5012ec68275db83d1f6f7e743f5edaaaacd8cb))
+* Code samples for `isna`, `isnull`, `dropna`, `isin` ([#289](https://github.com/googleapis/python-bigquery-dataframes/issues/289)) ([ad51035](https://github.com/googleapis/python-bigquery-dataframes/commit/ad51035bcf80d6a49f134df26624b578010b5b12))
+* Code samples for `rename` , `size` ([#293](https://github.com/googleapis/python-bigquery-dataframes/issues/293)) ([eb69f60](https://github.com/googleapis/python-bigquery-dataframes/commit/eb69f60db52544882fb06c2d5fa0e41226dfe93f))
+* Code samples for `reset_index` and `sort_values` ([#282](https://github.com/googleapis/python-bigquery-dataframes/issues/282)) ([acc0eb7](https://github.com/googleapis/python-bigquery-dataframes/commit/acc0eb7010951c8cfb91aecc45268b041217dd09))
+* Code samples for `sample`, `get`, `Series.round` ([#295](https://github.com/googleapis/python-bigquery-dataframes/issues/295)) ([c2b1892](https://github.com/googleapis/python-bigquery-dataframes/commit/c2b1892825545a34ce4ed5b0ef99e99348466108))
+* Code samples for `Series.{add, replace, unique, T, transpose}` ([#287](https://github.com/googleapis/python-bigquery-dataframes/issues/287)) ([0e1bbfc](https://github.com/googleapis/python-bigquery-dataframes/commit/0e1bbfc1055aff9757b5138907c11caab2f3965a))
+* Code samples for `Series.{map, to_list, count}` ([#290](https://github.com/googleapis/python-bigquery-dataframes/issues/290)) ([7cbc2b0](https://github.com/googleapis/python-bigquery-dataframes/commit/7cbc2b0ba572d11778ba7caf7c95b7fb8f3a31a7))
+* Code samples for `Series.{name, std, agg}` ([#293](https://github.com/googleapis/python-bigquery-dataframes/issues/293)) ([eb69f60](https://github.com/googleapis/python-bigquery-dataframes/commit/eb69f60db52544882fb06c2d5fa0e41226dfe93f))
+* Code samples for `Series.groupby` and `Series.{sum,mean,min,max}` ([#280](https://github.com/googleapis/python-bigquery-dataframes/issues/280)) ([95b673a](https://github.com/googleapis/python-bigquery-dataframes/commit/95b673aeb1545744e4b1a353cf1f4d0202d8a1b2))
+* Code samples for DataFrame `set_index`, `items` ([#295](https://github.com/googleapis/python-bigquery-dataframes/issues/295)) ([c2b1892](https://github.com/googleapis/python-bigquery-dataframes/commit/c2b1892825545a34ce4ed5b0ef99e99348466108))
+* Fix the rendering for `get_dummies` ([#291](https://github.com/googleapis/python-bigquery-dataframes/issues/291)) ([252f3a2](https://github.com/googleapis/python-bigquery-dataframes/commit/252f3a2a0e1296c7d786acdc0bdebe9e4a9ae1be))
+
 ## [0.17.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v0.16.0...v0.17.0) (2023-12-14)
 
 
diff --git a/bigframes/version.py b/bigframes/version.py
index 04eac385f6..494335acd7 100644
--- a/bigframes/version.py
+++ b/bigframes/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.17.0"
+__version__ = "0.18.0"