Skip to content

KeyError when using str.cat and index was changed #7857

Closed
@toobaz

Description

@toobaz
df = DataFrame(index=MultiIndex.from_product([[2011, 2012], [1,2,3]],
                                             names=['year', 'month']))

df = df.reset_index()

str_year = df.year.astype('str')
str_month = df.month.astype('str')
str_both = str_year.str.cat(str_month, sep=' ')

... so far, everything is fine. Now filter the index and retry:

df = df[df.month > 1]

str_year = df.year.astype('str')
str_month = df.month.astype('str')
str_both = str_year.str.cat(str_month, sep=' ')

... you will get a KeyError (tested against git, commit 90fa87e ):

KeyError                                  Traceback (most recent call last)
<ipython-input-12-9d3f1fbb70fc> in <module>()
     11 str_year = df.year.astype('str')
     12 str_month = df.month.astype('str')
---> 13 str_both = str_year.str.cat(str_month, sep=' ')

/home/pietro/nobackup/repo/pandas/pandas/core/strings.py in cat(self, others, sep, na_rep)
    933     @copy(str_cat)
    934     def cat(self, others=None, sep=None, na_rep=None):
--> 935         result = str_cat(self.series, others=others, sep=sep, na_rep=na_rep)
    936         return self._wrap_result(result)
    937 

/home/pietro/nobackup/repo/pandas/pandas/core/strings.py in str_cat(arr, others, sep, na_rep)
     41 
     42     if others is not None:
---> 43         arrays = _get_array_list(arr, others)
     44 
     45         n = _length_check(arrays)

/home/pietro/nobackup/repo/pandas/pandas/core/strings.py in _get_array_list(arr, others)
     13 
     14 def _get_array_list(arr, others):
---> 15     if len(others) and isinstance(others[0], (list, np.ndarray)):
     16         arrays = [arr] + list(others)
     17     else:

/home/pietro/nobackup/repo/pandas/pandas/core/series.py in __getitem__(self, key)
    491     def __getitem__(self, key):
    492         try:
--> 493             result = self.index.get_value(self, key)
    494 
    495             if not np.isscalar(result):

/home/pietro/nobackup/repo/pandas/pandas/core/index.py in get_value(self, series, key)
   1194 
   1195         try:
-> 1196             return self._engine.get_value(s, k)
   1197         except KeyError as e1:
   1198             if len(self) > 0 and self.inferred_type in ['integer','boolean']:

/home/pietro/nobackup/repo/pandas/pandas/index.so in pandas.index.IndexEngine.get_value (pandas/index.c:2991)()

/home/pietro/nobackup/repo/pandas/pandas/index.so in pandas.index.IndexEngine.get_value (pandas/index.c:2806)()

/home/pietro/nobackup/repo/pandas/pandas/index.so in pandas.index.IndexEngine.get_loc (pandas/index.c:3532)()

/home/pietro/nobackup/repo/pandas/pandas/hashtable.so in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:7033)()

/home/pietro/nobackup/repo/pandas/pandas/hashtable.so in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:6974)()

KeyError: 0

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugStringsString extension data type and string data

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions