@@ -560,9 +560,9 @@ def pdf_extract(
560
560
self ,
561
561
* ,
562
562
connection : Optional [str ] = None ,
563
- max_batching_rows : int = 8192 ,
564
- container_cpu : Union [float , int ] = 0.33 ,
565
- container_memory : str = "512Mi " ,
563
+ max_batching_rows : int = 1 ,
564
+ container_cpu : Union [float , int ] = 2 ,
565
+ container_memory : str = "1Gi " ,
566
566
) -> bigframes .series .Series :
567
567
"""Extracts text from PDF URLs and saves the text as string.
568
568
@@ -574,10 +574,10 @@ def pdf_extract(
574
574
connection (str or None, default None): BQ connection used for
575
575
function internet transactions, and the output blob if "dst"
576
576
is str. If None, uses default connection of the session.
577
- max_batching_rows (int, default 8,192 ): Max number of rows per batch
577
+ max_batching_rows (int, default 1 ): Max number of rows per batch
578
578
send to cloud run to execute the function.
579
- container_cpu (int or float, default 0.33 ): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
580
- container_memory (str, default "512Mi "): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
579
+ container_cpu (int or float, default 2 ): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
580
+ container_memory (str, default "1Gi "): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
581
581
582
582
Returns:
583
583
bigframes.series.Series: conatins all text from a pdf file
@@ -604,11 +604,11 @@ def pdf_chunk(
604
604
self ,
605
605
* ,
606
606
connection : Optional [str ] = None ,
607
- chunk_size : int = 1000 ,
607
+ chunk_size : int = 2000 ,
608
608
overlap_size : int = 200 ,
609
- max_batching_rows : int = 8192 ,
610
- container_cpu : Union [float , int ] = 0.33 ,
611
- container_memory : str = "512Mi " ,
609
+ max_batching_rows : int = 1 ,
610
+ container_cpu : Union [float , int ] = 2 ,
611
+ container_memory : str = "1Gi " ,
612
612
) -> bigframes .series .Series :
613
613
"""Extracts and chunks text from PDF URLs and saves the text as
614
614
arrays of strings.
@@ -620,15 +620,15 @@ def pdf_chunk(
620
620
connection (str or None, default None): BQ connection used for
621
621
function internet transactions, and the output blob if "dst"
622
622
is str. If None, uses default connection of the session.
623
- chunk_size (int, default 1000 ): the desired size of each text chunk
623
+ chunk_size (int, default 2000 ): the desired size of each text chunk
624
624
(number of characters).
625
625
overlap_size (int, default 200): the number of overlapping characters
626
626
between consective chunks. The helps to ensure context is
627
627
perserved across chunk boundaries.
628
- max_batching_rows (int, default 8,192 ): Max number of rows per batch
628
+ max_batching_rows (int, default 1 ): Max number of rows per batch
629
629
send to cloud run to execute the function.
630
- container_cpu (int or float, default 0.33 ): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
631
- container_memory (str, default "512Mi "): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
630
+ container_cpu (int or float, default 2 ): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
631
+ container_memory (str, default "1Gi "): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
632
632
633
633
Returns:
634
634
bigframe.series.Series: Series of array[str], where each string is a
0 commit comments