Skip to content

BUG: assigning Series.array / PandasArray to column fails #26390

Closed
@jorisvandenbossche

Description

@jorisvandenbossche

Assigning a PandasArray (so also the result of df['a'].array) of the correct length to add a column fails:

In [1]: df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': ['a', 'b', 'c', 'd']})                                                                                     

In [2]: df['c'] = pd.array([1, 2, None, 3])                                                                                                                   
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~/scipy/pandas/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2672             try:
-> 2673                 return self._engine.get_loc(key)
   2674             except KeyError:

~/scipy/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

~/scipy/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

~/scipy/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

~/scipy/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'c'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
~/scipy/pandas/pandas/core/internals/managers.py in set(self, item, value)
   1048         try:
-> 1049             loc = self.items.get_loc(item)
   1050         except KeyError:

~/scipy/pandas/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2674             except KeyError:
-> 2675                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2676         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

~/scipy/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

~/scipy/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

~/scipy/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

~/scipy/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'c'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-2-03925b585d9b> in <module>
----> 1 df['c'] = pd.array([1, 2, None, 3])

~/scipy/pandas/pandas/core/frame.py in __setitem__(self, key, value)
   3334         else:
   3335             # set column
-> 3336             self._set_item(key, value)
   3337 
   3338     def _setitem_slice(self, key, value):

~/scipy/pandas/pandas/core/frame.py in _set_item(self, key, value)
   3410         self._ensure_valid_index(value)
   3411         value = self._sanitize_column(key, value)
-> 3412         NDFrame._set_item(self, key, value)
   3413 
   3414         # check if we are modifying a copy

~/scipy/pandas/pandas/core/generic.py in _set_item(self, key, value)
   3232 
   3233     def _set_item(self, key, value):
-> 3234         self._data.set(key, value)
   3235         self._clear_item_cache()
   3236 

~/scipy/pandas/pandas/core/internals/managers.py in set(self, item, value)
   1050         except KeyError:
   1051             # This item wasn't present, just insert at end
-> 1052             self.insert(len(self.items), item, value)
   1053             return
   1054 

~/scipy/pandas/pandas/core/internals/managers.py in insert(self, loc, item, value, allow_duplicates)
   1152 
   1153         block = make_block(values=value, ndim=self.ndim,
-> 1154                            placement=slice(loc, loc + 1))
   1155 
   1156         for blkno, count in _fast_count_smallints(self._blknos[loc:]):

~/scipy/pandas/pandas/core/internals/blocks.py in make_block(values, placement, klass, ndim, dtype, fastpath)
   3052         values = DatetimeArray._simple_new(values, dtype=dtype)
   3053 
-> 3054     return klass(values, ndim=ndim, placement=placement)
   3055 
   3056 

~/scipy/pandas/pandas/core/internals/blocks.py in __init__(self, values, placement, ndim)
   2584             values = np.array(values, dtype=object)
   2585 
-> 2586         super().__init__(values, ndim=ndim, placement=placement)
   2587 
   2588     @property

~/scipy/pandas/pandas/core/internals/blocks.py in __init__(self, values, placement, ndim)
     74 
     75     def __init__(self, values, placement, ndim=None):
---> 76         self.ndim = self._check_ndim(values, ndim)
     77         self.mgr_locs = placement
     78         self.values = values

~/scipy/pandas/pandas/core/internals/blocks.py in _check_ndim(self, values, ndim)
    111             msg = ("Wrong number of dimensions. values.ndim != ndim "
    112                    "[{} != {}]")
--> 113             raise ValueError(msg.format(values.ndim, ndim))
    114 
    115         return ndim

ValueError: Wrong number of dimensions. values.ndim != ndim [1 != 2]

Note this only fails for the PandasArray types (so when creating a FloatBlock or IntBlock, .. which expect 2D data, so when not creating an ExtensionBlock as is done for an "actual" ExtensionArray).

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugExtensionArrayExtending pandas with custom dtypes or arrays.

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions