Skip to content

Commit 8f30f65

Browse files
committed
Merge pull request #6123 from jreback/dups_slice
BUG: Bug in propogating _ref_locs during construction of a DataFrame with dups index/columns (GH6121)
2 parents ebe4641 + 75293c8 commit 8f30f65

File tree

3 files changed

+54
-17
lines changed

3 files changed

+54
-17
lines changed

doc/source/release.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ Bug Fixes
159159
- Fixed missing arg validation in get_options_data (:issue:`6105`)
160160
- Bug in assignment with duplicate columns in a frame where the locations
161161
are a slice (e.g. next to each other) (:issue:`6120`)
162+
- Bug in propogating _ref_locs during construction of a DataFrame with dups
163+
index/columns (:issue:`6121`)
162164

163165
pandas 0.13.0
164166
-------------

pandas/core/internals.py

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,25 @@ def ref_locs(self):
116116
self._ref_locs = indexer
117117
return self._ref_locs
118118

119+
def take_ref_locs(self, indexer):
120+
"""
121+
need to preserve the ref_locs and just shift them
122+
return None if ref_locs is None
123+
124+
see GH6509
125+
"""
126+
127+
ref_locs = self._ref_locs
128+
if ref_locs is None:
129+
return None
130+
131+
tindexer = np.ones(len(ref_locs),dtype=bool)
132+
tindexer[indexer] = False
133+
tindexer = tindexer.astype(int).cumsum()[indexer]
134+
ref_locs = ref_locs[indexer]
135+
ref_locs -= tindexer
136+
return ref_locs
137+
119138
def reset_ref_locs(self):
120139
""" reset the block ref_locs """
121140
self._ref_locs = np.empty(len(self.items), dtype='int64')
@@ -866,13 +885,20 @@ def func(x):
866885
ndim=self.ndim, klass=self.__class__, fastpath=True)]
867886
return self._maybe_downcast(blocks, downcast)
868887

869-
def take(self, indexer, ref_items, axis=1):
888+
def take(self, indexer, ref_items, new_axis, axis=1):
870889
if axis < 1:
871890
raise AssertionError('axis must be at least 1, got %d' % axis)
872891
new_values = com.take_nd(self.values, indexer, axis=axis,
873892
allow_fill=False)
893+
894+
# need to preserve the ref_locs and just shift them
895+
# GH6121
896+
ref_locs = None
897+
if not new_axis.is_unique:
898+
ref_locs = self._ref_locs
899+
874900
return [make_block(new_values, self.items, ref_items, ndim=self.ndim,
875-
klass=self.__class__, fastpath=True)]
901+
klass=self.__class__, placement=ref_locs, fastpath=True)]
876902

877903
def get_values(self, dtype=None):
878904
return self.values
@@ -1820,7 +1846,7 @@ def shift(self, indexer, periods, axis=0):
18201846
new_values[periods:] = fill_value
18211847
return [self.make_block(new_values)]
18221848

1823-
def take(self, indexer, ref_items, axis=1):
1849+
def take(self, indexer, ref_items, new_axis, axis=1):
18241850
""" going to take our items
18251851
along the long dimension"""
18261852
if axis < 1:
@@ -2601,18 +2627,7 @@ def get_slice(self, slobj, axis=0, raise_on_error=False):
26012627
if len(self.blocks) == 1:
26022628

26032629
blk = self.blocks[0]
2604-
2605-
# see GH 6059
2606-
ref_locs = blk._ref_locs
2607-
if ref_locs is not None:
2608-
2609-
# need to preserve the ref_locs and just shift them
2610-
indexer = np.ones(len(ref_locs),dtype=bool)
2611-
indexer[slobj] = False
2612-
indexer = indexer.astype(int).cumsum()[slobj]
2613-
ref_locs = ref_locs[slobj]
2614-
ref_locs -= indexer
2615-
2630+
ref_locs = blk.take_ref_locs(slobj)
26162631
newb = make_block(blk._slice(slobj), new_items, new_items,
26172632
klass=blk.__class__, fastpath=True,
26182633
placement=ref_locs)
@@ -3371,6 +3386,7 @@ def take(self, indexer, new_index=None, axis=1, verify=True):
33713386
if axis < 1:
33723387
raise AssertionError('axis must be at least 1, got %d' % axis)
33733388

3389+
self._consolidate_inplace()
33743390
if isinstance(indexer, list):
33753391
indexer = np.array(indexer)
33763392

@@ -3388,8 +3404,12 @@ def take(self, indexer, new_index=None, axis=1, verify=True):
33883404
new_index = self.axes[axis].take(indexer)
33893405

33903406
new_axes[axis] = new_index
3391-
return self.apply('take', axes=new_axes, indexer=indexer,
3392-
ref_items=new_axes[0], axis=axis)
3407+
return self.apply('take',
3408+
axes=new_axes,
3409+
indexer=indexer,
3410+
ref_items=new_axes[0],
3411+
new_axis=new_axes[axis],
3412+
axis=axis)
33933413

33943414
def merge(self, other, lsuffix=None, rsuffix=None):
33953415
if not self._is_indexed_like(other):

pandas/tests/test_frame.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3303,6 +3303,21 @@ def check(result, expected=None):
33033303
result = dfbool[['one', 'three', 'one']]
33043304
check(result,expected)
33053305

3306+
# multi-axis dups
3307+
# GH 6121
3308+
df = DataFrame(np.arange(25.).reshape(5,5),
3309+
index=['a', 'b', 'c', 'd', 'e'],
3310+
columns=['A', 'B', 'C', 'D', 'E'])
3311+
z = df[['A', 'C', 'A']].copy()
3312+
expected = z.ix[['a', 'c', 'a']]
3313+
3314+
df = DataFrame(np.arange(25.).reshape(5,5),
3315+
index=['a', 'b', 'c', 'd', 'e'],
3316+
columns=['A', 'B', 'C', 'D', 'E'])
3317+
z = df[['A', 'C', 'A']]
3318+
result = z.ix[['a', 'c', 'a']]
3319+
check(result,expected)
3320+
33063321
def test_insert_benchmark(self):
33073322
# from the vb_suite/frame_methods/frame_insert_columns
33083323
N = 10

0 commit comments

Comments
 (0)