Skip to content

Commit ec16908

Browse files
committed
BUG: DataFrame._reduce was converting integers to strings in mixed-type
case. BUG: Remove creating np.array. This allows us to use our own logic for promoting dtypes. #6806 BUG: Only convert 0/1 ints to bool. #6806 DOC: added release notes
1 parent 5b98151 commit ec16908

File tree

3 files changed

+18
-1
lines changed

3 files changed

+18
-1
lines changed

doc/source/release.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,8 @@ Bug Fixes
318318
(:issue:`6762`).
319319
- Bug in Makefile where it didn't remove Cython generated C files with ``make
320320
clean`` (:issue:`6768`)
321+
- Bug in ``DataFrame._reduce`` where non bool-like (0/1) integers were being
322+
coverted into bools. (:issue:`6806`)
321323

322324
pandas 0.13.1
323325
-------------

pandas/core/common.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,9 @@ def conv(r, dtype):
840840
elif dtype == _TD_DTYPE:
841841
r = _coerce_scalar_to_timedelta_type(r)
842842
elif dtype == np.bool_:
843+
# messy. non 0/1 integers do not get converted.
844+
if is_integer(r) and r not in [0,1]:
845+
return int(r)
843846
r = bool(r)
844847
elif dtype.kind == 'f':
845848
r = float(r)
@@ -850,7 +853,7 @@ def conv(r, dtype):
850853

851854
return r
852855

853-
return np.array([conv(r, dtype) for r, dtype in zip(result, dtypes)])
856+
return [conv(r, dtype) for r, dtype in zip(result, dtypes)]
854857

855858

856859
def _infer_dtype_from_scalar(val):

pandas/tests/test_frame.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10476,6 +10476,18 @@ def test_bool_describe_in_mixed_frame(self):
1047610476
assert_almost_equal(bool_describe['mean'], 0.4)
1047710477
assert_almost_equal(bool_describe['50%'], 0)
1047810478

10479+
def test_reduce_mixed_frame(self):
10480+
# GH 6806
10481+
df = DataFrame({
10482+
'bool_data': [True, True, False, False, False],
10483+
'int_data': [10, 20, 30, 40, 50],
10484+
'string_data': ['a', 'b', 'c', 'd', 'e'],
10485+
})
10486+
df.reindex(columns=['bool_data', 'int_data', 'string_data'])
10487+
test = df.sum(axis=0)
10488+
assert_almost_equal(test.values, [2, 150, 'abcde'])
10489+
assert_series_equal(test, df.T.sum(axis=1))
10490+
1047910491
def test_count(self):
1048010492
f = lambda s: notnull(s).sum()
1048110493
self._check_stat_op('count', f,

0 commit comments

Comments
 (0)