Closed
Description
In
pandas/pandas/core/internals/managers.py
Line 1638 in d430195
we check that we have one type of block.
For ExtensionBlocks, that's insufficient. If you try to concatenate two series with different EA dtypes, it'll calling the first EA's _concat_same_type
with incorrect types.
In [13]: from pandas.tests.extension.decimal.test_decimal import *
In [14]: import pandas as pd
In [15]: a = pd.Series(pd.core.arrays.integer_array([1, 2]))
In [16]: b = pd.Series(DecimalArray([decimal.Decimal(1), decimal.Decimal(2)]))
In [17]: pd.concat([a, b])
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-17-714da278d09e> in <module>
----> 1 pd.concat([a, b])
~/sandbox/pandas/pandas/core/reshape/concat.py in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, sort, copy)
225 verify_integrity=verify_integrity,
226 copy=copy, sort=sort)
--> 227 return op.get_result()
228
229
~/sandbox/pandas/pandas/core/reshape/concat.py in get_result(self)
389
390 mgr = self.objs[0]._data.concat([x._data for x in self.objs],
--> 391 self.new_axes)
392 cons = _concat._get_series_result_type(mgr, self.objs)
393 return cons(mgr, name=name).__finalize__(self, method='concat')
~/sandbox/pandas/pandas/core/internals/managers.py in concat(self, to_concat, new_axis)
1637
1638 if all(type(b) is type(blocks[0]) for b in blocks[1:]): # noqa
-> 1639 new_block = blocks[0].concat_same_type(blocks)
1640 else:
1641 values = [x.values for x in blocks]
~/sandbox/pandas/pandas/core/internals/blocks.py in concat_same_type(self, to_concat, placement)
2047 """
2048 values = self._holder._concat_same_type(
-> 2049 [blk.values for blk in to_concat])
2050 placement = placement or slice(0, len(values), 1)
2051 return self.make_block_same_class(values, ndim=self.ndim,
~/sandbox/pandas/pandas/core/arrays/integer.py in _concat_same_type(cls, to_concat)
386 def _concat_same_type(cls, to_concat):
387 data = np.concatenate([x._data for x in to_concat])
--> 388 mask = np.concatenate([x._mask for x in to_concat])
389 return cls(data, mask)
390
~/sandbox/pandas/pandas/core/arrays/integer.py in <listcomp>(.0)
386 def _concat_same_type(cls, to_concat):
387 data = np.concatenate([x._data for x in to_concat])
--> 388 mask = np.concatenate([x._mask for x in to_concat])
389 return cls(data, mask)
390
AttributeError: 'DecimalArray' object has no attribute '_mask'
For EA blocks, we need to ensure that they're the same dtype. When they differ, we should fall back to object.
Checking the dtypes actually solves a secondary problem. On master, we allow concat([ Series[Period[D]], Series[Period[M]] ])
, i.e. concatenating series of periods with different frequencies. If we want to allow that still, we need to bail out before we get down to PeriodArray._concat_same_type
.