Skip to content

ValueError on dropna call #6355

Closed
Closed
@fonnesbeck

Description

@fonnesbeck

I'm running into a problem with using dropna with a list of columns specified by subset. I wish to remove rows with missing values:

missing

The dataset looks like this:

dataset

However, when I try to call dropna using these columns, it complains about a duplicate axis:

variables.dropna(subset=['cigarette_smokers', 'birth_wt_child', 
                                 'oxygen', 'length_of_stay', 'hospitalized_vitamin_d', 'breastfed'])

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-168-96be0b0b00e8> in <module>()
      1 variables.dropna(subset=['cigarette_smokers', 'birth_wt_child', 
----> 2                                      'oxygen', 'length_of_stay', 'hospitalized_vitamin_d', 'breastfed'])

/Library/Python/2.7/site-packages/pandas-0.13.0_395_gef55e60-py2.7-macosx-10.9-intel.egg/pandas/core/frame.pyc in dropna(self, axis, how, thresh, subset, inplace)
   2407             if subset is not None:
   2408                 agg_axis_name = self._get_axis_name(agg_axis)
-> 2409                 agg_obj = self.reindex(**{agg_axis_name: subset})
   2410 
   2411             count = agg_obj.count(axis=agg_axis)

/Library/Python/2.7/site-packages/pandas-0.13.0_395_gef55e60-py2.7-macosx-10.9-intel.egg/pandas/core/frame.pyc in reindex(self, index, columns, **kwargs)
   2160     def reindex(self, index=None, columns=None, **kwargs):
   2161         return super(DataFrame, self).reindex(index=index, columns=columns,
-> 2162                                               **kwargs)
   2163 
   2164     @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs)

/Library/Python/2.7/site-packages/pandas-0.13.0_395_gef55e60-py2.7-macosx-10.9-intel.egg/pandas/core/generic.pyc in reindex(self, *args, **kwargs)
   1561         return self._reindex_axes(axes, level, limit,
   1562                                   method, fill_value, copy,
-> 1563                                   takeable=takeable).__finalize__(self)
   1564 
   1565     def _reindex_axes(self, axes, level, limit, method, fill_value, copy,

/Library/Python/2.7/site-packages/pandas-0.13.0_395_gef55e60-py2.7-macosx-10.9-intel.egg/pandas/core/frame.pyc in _reindex_axes(self, axes, level, limit, method, fill_value, copy, takeable)
   2110         if columns is not None:
   2111             frame = frame._reindex_columns(columns, copy, level, fill_value,
-> 2112                                            limit, takeable=takeable)
   2113 
   2114         index = axes['index']

/Library/Python/2.7/site-packages/pandas-0.13.0_395_gef55e60-py2.7-macosx-10.9-intel.egg/pandas/core/frame.pyc in _reindex_columns(self, new_columns, copy, level, fill_value, limit, takeable)
   2137         return self._reindex_with_indexers({1: [new_columns, indexer]},
   2138                                            copy=copy, fill_value=fill_value,
-> 2139                                            allow_dups=takeable)
   2140 
   2141     def _reindex_multi(self, axes, copy, fill_value):

/Library/Python/2.7/site-packages/pandas-0.13.0_395_gef55e60-py2.7-macosx-10.9-intel.egg/pandas/core/generic.pyc in _reindex_with_indexers(self, reindexers, method, fill_value, limit, copy, allow_dups)
   1687                 new_data = new_data.reindex_indexer(index, indexer, axis=baxis,
   1688                                                     fill_value=fill_value,
-> 1689                                                     allow_dups=allow_dups)
   1690 
   1691             elif (baxis == 0 and index is not None and

/Library/Python/2.7/site-packages/pandas-0.13.0_395_gef55e60-py2.7-macosx-10.9-intel.egg/pandas/core/internals.pyc in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups)
   3227         # trying to reindex on an axis with duplicates
   3228         if not allow_dups and not self.axes[axis].is_unique:
-> 3229             raise ValueError("cannot reindex from a duplicate axis")
   3230 
   3231         if not self.is_consolidated():

ValueError: cannot reindex from a duplicate axis

Running a current build from master in Python 2.7.5 on OS X 10.9.1.

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugMissing-datanp.nan, pd.NaT, pd.NA, dropna, isnull, interpolate

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions