@@ -2008,7 +2008,7 @@ def merge(
2008
2008
mappings = (* left_mappings , * right_mappings ),
2009
2009
type = how ,
2010
2010
)
2011
- joined_expr = self .expr .join (other .expr , join_def = join_def )
2011
+ joined_expr = self .expr .relational_join (other .expr , join_def = join_def )
2012
2012
result_columns = []
2013
2013
matching_join_labels = []
2014
2014
@@ -2267,25 +2267,33 @@ def join(
2267
2267
raise NotImplementedError (
2268
2268
f"Only how='outer','left','right','inner' currently supported. { constants .FEEDBACK_LINK } "
2269
2269
)
2270
- # Special case for null index,
2270
+ # Handle null index, which only supports row join
2271
+ if (self .index .nlevels == other .index .nlevels == 0 ) and not block_identity_join :
2272
+ if not block_identity_join :
2273
+ result = try_row_join (self , other , how = how )
2274
+ if result is not None :
2275
+ return result
2276
+ raise bigframes .exceptions .NullIndexError (
2277
+ "Cannot implicitly align objects. Set an explicit index using set_index."
2278
+ )
2279
+
2280
+ # Oddly, pandas row-wise join ignores right index names
2271
2281
if (
2272
- ( self . index . nlevels == other . index . nlevels == 0 )
2273
- and not sort
2274
- and not block_identity_join
2282
+ not block_identity_join
2283
+ and ( self . index . nlevels == other . index . nlevels )
2284
+ and ( self . index . dtypes == other . index . dtypes )
2275
2285
):
2276
- return join_indexless (self , other , how = how )
2286
+ result = try_row_join (self , other , how = how )
2287
+ if result is not None :
2288
+ return result
2277
2289
2278
2290
self ._throw_if_null_index ("join" )
2279
2291
other ._throw_if_null_index ("join" )
2280
2292
if self .index .nlevels == other .index .nlevels == 1 :
2281
- return join_mono_indexed (
2282
- self , other , how = how , sort = sort , block_identity_join = block_identity_join
2283
- )
2284
- else :
2293
+ return join_mono_indexed (self , other , how = how , sort = sort )
2294
+ else : # Handles cases where one or both sides are multi-indexed
2285
2295
# Always sort mult-index join
2286
- return join_multi_indexed (
2287
- self , other , how = how , sort = sort , block_identity_join = block_identity_join
2288
- )
2296
+ return join_multi_indexed (self , other , how = how , sort = sort )
2289
2297
2290
2298
def _force_reproject (self ) -> Block :
2291
2299
"""Forces a reprojection of the underlying tables expression. Used to force predicate/order application before subsequent operations."""
@@ -2623,46 +2631,55 @@ def is_uniquely_named(self: BlockIndexProperties):
2623
2631
return len (set (self .names )) == len (self .names )
2624
2632
2625
2633
2626
- def join_indexless (
2634
+ def try_row_join (
2627
2635
left : Block ,
2628
2636
right : Block ,
2629
2637
* ,
2630
2638
how = "left" ,
2631
- ) -> Tuple [Block , Tuple [Mapping [str , str ], Mapping [str , str ]],]:
2632
- """Joins two blocks"""
2639
+ ) -> Optional [ Tuple [Block , Tuple [Mapping [str , str ], Mapping [str , str ]],] ]:
2640
+ """Joins two blocks that have a common root expression by merging the projections. """
2633
2641
left_expr = left .expr
2634
2642
right_expr = right .expr
2643
+ # Create a new array value, mapping from both, then left, and then right
2644
+ join_keys = tuple (
2645
+ join_defs .CoalescedColumnMapping (
2646
+ left_source_id = left_id ,
2647
+ right_source_id = right_id ,
2648
+ destination_id = guid .generate_guid (),
2649
+ )
2650
+ for left_id , right_id in zip (left .index_columns , right .index_columns )
2651
+ )
2635
2652
left_mappings = [
2636
2653
join_defs .JoinColumnMapping (
2637
2654
source_table = join_defs .JoinSide .LEFT ,
2638
2655
source_id = id ,
2639
2656
destination_id = guid .generate_guid (),
2640
2657
)
2641
- for id in left_expr . column_ids
2658
+ for id in left . value_columns
2642
2659
]
2643
2660
right_mappings = [
2644
2661
join_defs .JoinColumnMapping (
2645
2662
source_table = join_defs .JoinSide .RIGHT ,
2646
2663
source_id = id ,
2647
2664
destination_id = guid .generate_guid (),
2648
2665
)
2649
- for id in right_expr . column_ids
2666
+ for id in right . value_columns
2650
2667
]
2651
2668
combined_expr = left_expr .try_align_as_projection (
2652
2669
right_expr ,
2653
2670
join_type = how ,
2671
+ join_keys = join_keys ,
2654
2672
mappings = (* left_mappings , * right_mappings ),
2655
2673
)
2656
2674
if combined_expr is None :
2657
- raise bigframes .exceptions .NullIndexError (
2658
- "Cannot implicitly align objects. Set an explicit index using set_index."
2659
- )
2675
+ return None
2660
2676
get_column_left = {m .source_id : m .destination_id for m in left_mappings }
2661
2677
get_column_right = {m .source_id : m .destination_id for m in right_mappings }
2662
2678
block = Block (
2663
2679
combined_expr ,
2664
2680
column_labels = [* left .column_labels , * right .column_labels ],
2665
- index_columns = (),
2681
+ index_columns = (key .destination_id for key in join_keys ),
2682
+ index_labels = left .index .names ,
2666
2683
)
2667
2684
return (
2668
2685
block ,
@@ -2704,7 +2721,7 @@ def join_with_single_row(
2704
2721
mappings = (* left_mappings , * right_mappings ),
2705
2722
type = "cross" ,
2706
2723
)
2707
- combined_expr = left_expr .join (
2724
+ combined_expr = left_expr .relational_join (
2708
2725
right_expr ,
2709
2726
join_def = join_def ,
2710
2727
)
@@ -2731,7 +2748,6 @@ def join_mono_indexed(
2731
2748
* ,
2732
2749
how = "left" ,
2733
2750
sort = False ,
2734
- block_identity_join : bool = False ,
2735
2751
) -> Tuple [Block , Tuple [Mapping [str , str ], Mapping [str , str ]],]:
2736
2752
left_expr = left .expr
2737
2753
right_expr = right .expr
@@ -2759,14 +2775,14 @@ def join_mono_indexed(
2759
2775
mappings = (* left_mappings , * right_mappings ),
2760
2776
type = how ,
2761
2777
)
2762
- combined_expr = left_expr .join (
2778
+
2779
+ combined_expr = left_expr .relational_join (
2763
2780
right_expr ,
2764
2781
join_def = join_def ,
2765
- allow_row_identity_join = (not block_identity_join ),
2766
2782
)
2783
+
2767
2784
get_column_left = join_def .get_left_mapping ()
2768
2785
get_column_right = join_def .get_right_mapping ()
2769
- # Drop original indices from each side. and used the coalesced combination generated by the join.
2770
2786
left_index = get_column_left [left .index_columns [0 ]]
2771
2787
right_index = get_column_right [right .index_columns [0 ]]
2772
2788
# Drop original indices from each side. and used the coalesced combination generated by the join.
@@ -2800,7 +2816,6 @@ def join_multi_indexed(
2800
2816
* ,
2801
2817
how = "left" ,
2802
2818
sort = False ,
2803
- block_identity_join : bool = False ,
2804
2819
) -> Tuple [Block , Tuple [Mapping [str , str ], Mapping [str , str ]],]:
2805
2820
if not (left .index .is_uniquely_named () and right .index .is_uniquely_named ()):
2806
2821
raise ValueError ("Joins not supported on indices with non-unique level names" )
@@ -2819,8 +2834,6 @@ def join_multi_indexed(
2819
2834
left_join_ids = [left .index .resolve_level_exact (name ) for name in common_names ]
2820
2835
right_join_ids = [right .index .resolve_level_exact (name ) for name in common_names ]
2821
2836
2822
- names_fully_match = len (left_only_names ) == 0 and len (right_only_names ) == 0
2823
-
2824
2837
left_expr = left .expr
2825
2838
right_expr = right .expr
2826
2839
@@ -2850,13 +2863,11 @@ def join_multi_indexed(
2850
2863
type = how ,
2851
2864
)
2852
2865
2853
- combined_expr = left_expr .join (
2866
+ combined_expr = left_expr .relational_join (
2854
2867
right_expr ,
2855
2868
join_def = join_def ,
2856
- # If we're only joining on a subset of the index columns, we need to
2857
- # perform a true join.
2858
- allow_row_identity_join = (names_fully_match and not block_identity_join ),
2859
2869
)
2870
+
2860
2871
get_column_left = join_def .get_left_mapping ()
2861
2872
get_column_right = join_def .get_right_mapping ()
2862
2873
left_ids_post_join = [get_column_left [id ] for id in left_join_ids ]
0 commit comments