Skip to content

Commit f1cff16

Browse files
authored
chore: add blob snippets (#1596)
* chore: add blob snippets * fix lint * polish * fix
1 parent 4447846 commit f1cff16

File tree

2 files changed

+117
-1
lines changed

2 files changed

+117
-1
lines changed

notebooks/experimental/multimodal_dataframe.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@
140140
}
141141
],
142142
"source": [
143-
"# Create blob columns from wildcard path. .\n",
143+
"# Create blob columns from wildcard path.\n",
144144
"df_image = bpd.from_glob_path(\n",
145145
" \"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\", name=\"image\"\n",
146146
")\n",

samples/snippets/multimodal_test.py

+116
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def test_multimodal_dataframe() -> None:
17+
# [START bigquery_dataframes_multimodal_dataframe_create]
18+
import bigframes
19+
20+
# Flag to enable the feature
21+
bigframes.options.experiments.blob = True
22+
23+
import bigframes.pandas as bpd
24+
25+
# Create blob columns from wildcard path.
26+
df_image = bpd.from_glob_path(
27+
"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*", name="image"
28+
)
29+
# Other ways are: from string uri column
30+
# df = bpd.DataFrame({"uri": ["gs://<my_bucket>/<my_file_0>", "gs://<my_bucket>/<my_file_1>"]})
31+
# df["blob_col"] = df["uri"].str.to_blob()
32+
33+
# From an existing object table
34+
# df = bpd.read_gbq_object_table("<my_object_table>", name="blob_col")
35+
36+
# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame
37+
df_image = df_image.head(5)
38+
df_image
39+
# [END bigquery_dataframes_multimodal_dataframe_create]
40+
41+
# [START bigquery_dataframes_multimodal_dataframe_merge]
42+
# Combine unstructured data with structured data
43+
df_image["author"] = ["alice", "bob", "bob", "alice", "bob"] # type: ignore
44+
df_image["content_type"] = df_image["image"].blob.content_type()
45+
df_image["size"] = df_image["image"].blob.size()
46+
df_image["updated"] = df_image["image"].blob.updated()
47+
df_image
48+
49+
# Filter images and display, you can also display audio and video types
50+
df_image[df_image["author"] == "alice"]["image"].blob.display()
51+
# [END bigquery_dataframes_multimodal_dataframe_merge]
52+
53+
# [START bigquery_dataframes_multimodal_dataframe_image_transform]
54+
df_image["blurred"] = df_image["image"].blob.image_blur(
55+
(20, 20), dst="gs://bigframes_blob_test/image_blur_transformed/"
56+
)
57+
df_image["resized"] = df_image["image"].blob.image_resize(
58+
(300, 200), dst="gs://bigframes_blob_test/image_resize_transformed/"
59+
)
60+
df_image["normalized"] = df_image["image"].blob.image_normalize(
61+
alpha=50.0,
62+
beta=150.0,
63+
norm_type="minmax",
64+
dst="gs://bigframes_blob_test/image_normalize_transformed/",
65+
)
66+
67+
# You can also chain functions together
68+
df_image["blur_resized"] = df_image["blurred"].blob.image_resize(
69+
(300, 200), dst="gs://bigframes_blob_test/image_blur_resize_transformed/"
70+
)
71+
df_image
72+
# [END bigquery_dataframes_multimodal_dataframe_image_transform]
73+
74+
# [START bigquery_dataframes_multimodal_dataframe_ai]
75+
from bigframes.ml import llm
76+
77+
gemini = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-002")
78+
79+
# Deal with first 2 images as example
80+
df_image = df_image.head(2)
81+
82+
# Ask the same question on the images
83+
df_image = df_image.head(2)
84+
answer = gemini.predict(df_image, prompt=["what item is it?", df_image["image"]])
85+
answer[["ml_generate_text_llm_result", "image"]]
86+
87+
# Ask different questions
88+
df_image["question"] = [ # type: ignore
89+
"what item is it?",
90+
"what color is the picture?",
91+
]
92+
answer_alt = gemini.predict(
93+
df_image, prompt=[df_image["question"], df_image["image"]]
94+
)
95+
answer_alt[["ml_generate_text_llm_result", "image"]]
96+
97+
# Generate embeddings on images
98+
embed_model = llm.MultimodalEmbeddingGenerator()
99+
embeddings = embed_model.predict(df_image["image"])
100+
embeddings
101+
# [END bigquery_dataframes_multimodal_dataframe_ai]
102+
103+
# [START bigquery_dataframes_multimodal_dataframe_pdf_chunk]
104+
# PDF chunking
105+
df_pdf = bpd.from_glob_path(
106+
"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*", name="pdf"
107+
)
108+
df_pdf["chunked"] = df_pdf["pdf"].blob.pdf_chunk()
109+
chunked = df_pdf["chunked"].explode()
110+
chunked
111+
# [END bigquery_dataframes_multimodal_dataframe_pdf_chunk]
112+
assert df_image is not None
113+
assert answer is not None
114+
assert answer_alt is not None
115+
assert embeddings is not None
116+
assert chunked is not None

0 commit comments

Comments
 (0)