Closed
Description
Assigning a PandasArray (so also the result of df['a'].array
) of the correct length to add a column fails:
In [1]: df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': ['a', 'b', 'c', 'd']})
In [2]: df['c'] = pd.array([1, 2, None, 3])
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/scipy/pandas/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2672 try:
-> 2673 return self._engine.get_loc(key)
2674 except KeyError:
~/scipy/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
~/scipy/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
~/scipy/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
~/scipy/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'c'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
~/scipy/pandas/pandas/core/internals/managers.py in set(self, item, value)
1048 try:
-> 1049 loc = self.items.get_loc(item)
1050 except KeyError:
~/scipy/pandas/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2674 except KeyError:
-> 2675 return self._engine.get_loc(self._maybe_cast_indexer(key))
2676 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
~/scipy/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
~/scipy/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
~/scipy/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
~/scipy/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'c'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-2-03925b585d9b> in <module>
----> 1 df['c'] = pd.array([1, 2, None, 3])
~/scipy/pandas/pandas/core/frame.py in __setitem__(self, key, value)
3334 else:
3335 # set column
-> 3336 self._set_item(key, value)
3337
3338 def _setitem_slice(self, key, value):
~/scipy/pandas/pandas/core/frame.py in _set_item(self, key, value)
3410 self._ensure_valid_index(value)
3411 value = self._sanitize_column(key, value)
-> 3412 NDFrame._set_item(self, key, value)
3413
3414 # check if we are modifying a copy
~/scipy/pandas/pandas/core/generic.py in _set_item(self, key, value)
3232
3233 def _set_item(self, key, value):
-> 3234 self._data.set(key, value)
3235 self._clear_item_cache()
3236
~/scipy/pandas/pandas/core/internals/managers.py in set(self, item, value)
1050 except KeyError:
1051 # This item wasn't present, just insert at end
-> 1052 self.insert(len(self.items), item, value)
1053 return
1054
~/scipy/pandas/pandas/core/internals/managers.py in insert(self, loc, item, value, allow_duplicates)
1152
1153 block = make_block(values=value, ndim=self.ndim,
-> 1154 placement=slice(loc, loc + 1))
1155
1156 for blkno, count in _fast_count_smallints(self._blknos[loc:]):
~/scipy/pandas/pandas/core/internals/blocks.py in make_block(values, placement, klass, ndim, dtype, fastpath)
3052 values = DatetimeArray._simple_new(values, dtype=dtype)
3053
-> 3054 return klass(values, ndim=ndim, placement=placement)
3055
3056
~/scipy/pandas/pandas/core/internals/blocks.py in __init__(self, values, placement, ndim)
2584 values = np.array(values, dtype=object)
2585
-> 2586 super().__init__(values, ndim=ndim, placement=placement)
2587
2588 @property
~/scipy/pandas/pandas/core/internals/blocks.py in __init__(self, values, placement, ndim)
74
75 def __init__(self, values, placement, ndim=None):
---> 76 self.ndim = self._check_ndim(values, ndim)
77 self.mgr_locs = placement
78 self.values = values
~/scipy/pandas/pandas/core/internals/blocks.py in _check_ndim(self, values, ndim)
111 msg = ("Wrong number of dimensions. values.ndim != ndim "
112 "[{} != {}]")
--> 113 raise ValueError(msg.format(values.ndim, ndim))
114
115 return ndim
ValueError: Wrong number of dimensions. values.ndim != ndim [1 != 2]
Note this only fails for the PandasArray types (so when creating a FloatBlock or IntBlock, .. which expect 2D data, so when not creating an ExtensionBlock as is done for an "actual" ExtensionArray).