|
24 | 24 | import sys
|
25 | 25 | import tempfile
|
26 | 26 | import textwrap
|
27 |
| -from typing import cast, List, NamedTuple, Optional, Sequence, TYPE_CHECKING, Union |
| 27 | +from typing import ( |
| 28 | + Any, |
| 29 | + cast, |
| 30 | + List, |
| 31 | + Mapping, |
| 32 | + NamedTuple, |
| 33 | + Optional, |
| 34 | + Sequence, |
| 35 | + TYPE_CHECKING, |
| 36 | + Union, |
| 37 | +) |
28 | 38 | import warnings
|
29 | 39 |
|
30 | 40 | import ibis
|
@@ -736,8 +746,8 @@ def get_routine_reference(
|
736 | 746 | # which has moved as @js to the ibis package
|
737 | 747 | # https://github.com/ibis-project/ibis/blob/master/ibis/backends/bigquery/udf/__init__.py
|
738 | 748 | def remote_function(
|
739 |
| - input_types: Union[type, Sequence[type]], |
740 |
| - output_type: type, |
| 749 | + input_types: Union[None, type, Sequence[type]] = None, |
| 750 | + output_type: Optional[type] = None, |
741 | 751 | session: Optional[Session] = None,
|
742 | 752 | bigquery_client: Optional[bigquery.Client] = None,
|
743 | 753 | bigquery_connection_client: Optional[
|
@@ -801,11 +811,11 @@ def remote_function(
|
801 | 811 | `$ gcloud projects add-iam-policy-binding PROJECT_ID --member="serviceAccount:CONNECTION_SERVICE_ACCOUNT_ID" --role="roles/run.invoker"`.
|
802 | 812 |
|
803 | 813 | Args:
|
804 |
| - input_types (type or sequence(type)): |
| 814 | + input_types (None, type, or sequence(type)): |
805 | 815 | For scalar user defined function it should be the input type or
|
806 | 816 | sequence of input types. For row processing user defined function,
|
807 | 817 | type `Series` should be specified.
|
808 |
| - output_type (type): |
| 818 | + output_type (Optional[type]): |
809 | 819 | Data type of the output in the user defined function.
|
810 | 820 | session (bigframes.Session, Optional):
|
811 | 821 | BigQuery DataFrames session to use for getting default project,
|
@@ -908,27 +918,10 @@ def remote_function(
|
908 | 918 | service(s) that are on a VPC network. See for more details
|
909 | 919 | https://cloud.google.com/functions/docs/networking/connecting-vpc.
|
910 | 920 | """
|
911 |
| - is_row_processor = False |
912 |
| - |
913 |
| - import bigframes.series |
914 |
| - import bigframes.session |
915 |
| - |
916 |
| - if input_types == bigframes.series.Series: |
917 |
| - warnings.warn( |
918 |
| - "input_types=Series scenario is in preview.", |
919 |
| - stacklevel=1, |
920 |
| - category=bigframes.exceptions.PreviewWarning, |
921 |
| - ) |
922 |
| - |
923 |
| - # we will model the row as a json serialized string containing the data |
924 |
| - # and the metadata representing the row |
925 |
| - input_types = [str] |
926 |
| - is_row_processor = True |
927 |
| - elif isinstance(input_types, type): |
928 |
| - input_types = [input_types] |
929 |
| - |
930 | 921 | # Some defaults may be used from the session if not provided otherwise
|
931 | 922 | import bigframes.pandas as bpd
|
| 923 | + import bigframes.series |
| 924 | + import bigframes.session |
932 | 925 |
|
933 | 926 | session = cast(bigframes.session.Session, session or bpd.get_global_session())
|
934 | 927 |
|
@@ -1021,10 +1014,61 @@ def remote_function(
|
1021 | 1014 | bq_connection_manager = None if session is None else session.bqconnectionmanager
|
1022 | 1015 |
|
1023 | 1016 | def wrapper(f):
|
| 1017 | + nonlocal input_types, output_type |
| 1018 | + |
1024 | 1019 | if not callable(f):
|
1025 | 1020 | raise TypeError("f must be callable, got {}".format(f))
|
1026 | 1021 |
|
1027 |
| - signature = inspect.signature(f) |
| 1022 | + if sys.version_info >= (3, 10): |
| 1023 | + # Add `eval_str = True` so that deferred annotations are turned into their |
| 1024 | + # corresponding type objects. Need Python 3.10 for eval_str parameter. |
| 1025 | + # https://docs.python.org/3/library/inspect.html#inspect.signature |
| 1026 | + signature_kwargs: Mapping[str, Any] = {"eval_str": True} |
| 1027 | + else: |
| 1028 | + signature_kwargs = {} |
| 1029 | + |
| 1030 | + signature = inspect.signature( |
| 1031 | + f, |
| 1032 | + **signature_kwargs, |
| 1033 | + ) |
| 1034 | + |
| 1035 | + # Try to get input types via type annotations. |
| 1036 | + if input_types is None: |
| 1037 | + input_types = [] |
| 1038 | + for parameter in signature.parameters.values(): |
| 1039 | + if (param_type := parameter.annotation) is inspect.Signature.empty: |
| 1040 | + raise ValueError( |
| 1041 | + "'input_types' was not set and parameter " |
| 1042 | + f"'{parameter.name}' is missing a type annotation. " |
| 1043 | + "Types are required to use @remote_function." |
| 1044 | + ) |
| 1045 | + input_types.append(param_type) |
| 1046 | + |
| 1047 | + if output_type is None: |
| 1048 | + if (output_type := signature.return_annotation) is inspect.Signature.empty: |
| 1049 | + raise ValueError( |
| 1050 | + "'output_type' was not set and function is missing a " |
| 1051 | + "return type annotation. Types are required to use " |
| 1052 | + "@remote_function." |
| 1053 | + ) |
| 1054 | + |
| 1055 | + # The function will actually be receiving a pandas Series, but allow both |
| 1056 | + # BigQuery DataFrames and pandas object types for compatibility. |
| 1057 | + is_row_processor = False |
| 1058 | + if input_types == bigframes.series.Series or input_types == pandas.Series: |
| 1059 | + warnings.warn( |
| 1060 | + "input_types=Series scenario is in preview.", |
| 1061 | + stacklevel=1, |
| 1062 | + category=bigframes.exceptions.PreviewWarning, |
| 1063 | + ) |
| 1064 | + |
| 1065 | + # we will model the row as a json serialized string containing the data |
| 1066 | + # and the metadata representing the row |
| 1067 | + input_types = [str] |
| 1068 | + is_row_processor = True |
| 1069 | + elif isinstance(input_types, type): |
| 1070 | + input_types = [input_types] |
| 1071 | + |
1028 | 1072 | # TODO(b/340898611): fix type error
|
1029 | 1073 | ibis_signature = ibis_signature_from_python_signature(
|
1030 | 1074 | signature, input_types, output_type # type: ignore
|
|
0 commit comments