Skip to content

BUG: pd.merge with ExtensionArray does not preserve extension dtype #20743

Closed
@jorisvandenbossche

Description

@jorisvandenbossche
In [1]: from pandas.tests.extension.decimal.array import DecimalArray, make_data

In [5]: dec_arr = DecimalArray(make_data())

In [6]: df1 = pd.DataFrame({'int1': [1, 2, 3], 'key':[0, 1, 2], 'ext1': dec_arr[:3]})

In [7]: df2 = pd.DataFrame({'int2': [1, 2, 3, 4], 'key':[0, 0, 1, 3], 'ext2': dec_arr[3:7]})

In [8]: pd.merge(df1, df2)
Out[8]: 
                                                ext1  int1  key                                               ext2  int2
0  0.90013275661511904512934734157170169055461883...     1    0  0.67786011817398117429434023506473749876022338...     1
1  0.90013275661511904512934734157170169055461883...     1    0  0.94029656863099908559178174982662312686443328...     2
2  0.96839085663514357094072693143971264362335205...     2    1  0.12455159685855177187363551638554781675338745...     3

In [9]: pd.merge(df1, df2, how='outer')
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-9-a573147da092> in <module>()
----> 1 pd.merge(df1, df2, how='outer')

/home/joris/scipy/pandas/pandas/core/reshape/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
     59                          copy=copy, indicator=indicator,
     60                          validate=validate)
---> 61     return op.get_result()
     62 
     63 

/home/joris/scipy/pandas/pandas/core/reshape/merge.py in get_result(self)
    579             [(ldata, lindexers), (rdata, rindexers)],
    580             axes=[llabels.append(rlabels), join_index],
--> 581             concat_axis=0, copy=self.copy)
    582 
    583         typ = self.left._constructor

/home/joris/scipy/pandas/pandas/core/internals.py in concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy)
   5407         else:
   5408             b = make_block(
-> 5409                 concatenate_join_units(join_units, concat_axis, copy=copy),
   5410                 placement=placement)
   5411         blocks.append(b)

/home/joris/scipy/pandas/pandas/core/internals.py in concatenate_join_units(join_units, concat_axis, copy)
   5533         raise AssertionError("Concatenating join units along axis0")
   5534 
-> 5535     empty_dtype, upcasted_na = get_empty_dtype_and_na(join_units)
   5536 
   5537     to_concat = [ju.get_reindexed_values(empty_dtype=empty_dtype,

/home/joris/scipy/pandas/pandas/core/internals.py in get_empty_dtype_and_na(join_units)
   5458             has_none_blocks = True
   5459         else:
-> 5460             dtypes[i] = unit.dtype
   5461 
   5462     upcast_classes = defaultdict(list)

/home/joris/scipy/pandas/pandas/_libs/properties.pyx in pandas._libs.properties.CachedProperty.__get__()

/home/joris/scipy/pandas/pandas/core/internals.py in dtype(self)
   5754         else:
   5755             return _get_dtype(maybe_promote(self.block.dtype,
-> 5756                                             self.block.fill_value)[0])
   5757 
   5758     @cache_readonly

/home/joris/scipy/pandas/pandas/core/dtypes/common.py in _get_dtype(arr_or_dtype)
   1830     if hasattr(arr_or_dtype, 'dtype'):
   1831         arr_or_dtype = arr_or_dtype.dtype
-> 1832     return np.dtype(arr_or_dtype)
   1833 
   1834 

TypeError: data type not understood

Metadata

Metadata

Assignees

No one assigned

    Labels

    ExtensionArrayExtending pandas with custom dtypes or arrays.

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions