diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst
index 8739694c20e33..e546c8c8b80e3 100644
--- a/doc/source/whatsnew/index.rst
+++ b/doc/source/whatsnew/index.rst
@@ -16,6 +16,7 @@ Version 1.2
.. toctree::
:maxdepth: 2
+ v1.2.5
v1.2.4
v1.2.3
v1.2.2
diff --git a/doc/source/whatsnew/v1.2.4.rst b/doc/source/whatsnew/v1.2.4.rst
index dd74091b64014..433ee37508e66 100644
--- a/doc/source/whatsnew/v1.2.4.rst
+++ b/doc/source/whatsnew/v1.2.4.rst
@@ -30,4 +30,4 @@ Fixed regressions
Contributors
~~~~~~~~~~~~
-.. contributors:: v1.2.3..v1.2.4|HEAD
+.. contributors:: v1.2.3..v1.2.4
diff --git a/doc/source/whatsnew/v1.2.5.rst b/doc/source/whatsnew/v1.2.5.rst
new file mode 100644
index 0000000000000..d3ceb2b919b5d
--- /dev/null
+++ b/doc/source/whatsnew/v1.2.5.rst
@@ -0,0 +1,31 @@
+.. _whatsnew_125:
+
+What's new in 1.2.5 (June 22, 2021)
+-----------------------------------
+
+These are the changes in pandas 1.2.5. See :ref:`release` for a full changelog
+including other versions of pandas.
+
+{{ header }}
+
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_125.regressions:
+
+Fixed regressions
+~~~~~~~~~~~~~~~~~
+- Fixed regression in :func:`concat` between two :class:`DataFrame` where one has an :class:`Index` that is all-None and the other is :class:`DatetimeIndex` incorrectly raising (:issue:`40841`)
+- Fixed regression in :meth:`DataFrame.sum` and :meth:`DataFrame.prod` when ``min_count`` and ``numeric_only`` are both given (:issue:`41074`)
+- Fixed regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`)
+- Fixed regression in :meth:`DataFrame.replace` and :meth:`Series.replace` when the values to replace is a NumPy float array (:issue:`40371`)
+- Fixed regression in :func:`ExcelFile` when a corrupt file is opened but not closed (:issue:`41778`)
+- Fixed regression in :meth:`DataFrame.astype` with ``dtype=str`` failing to convert ``NaN`` in categorical columns (:issue:`41797`)
+
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_125.contributors:
+
+Contributors
+~~~~~~~~~~~~
+
+.. contributors:: v1.2.4..v1.2.5|HEAD
diff --git a/environment.yml b/environment.yml
index a369f656cb575..ec3c31a15ee02 100644
--- a/environment.yml
+++ b/environment.yml
@@ -4,7 +4,7 @@ channels:
dependencies:
# required
- numpy>=1.16.5
- - python=3
+ - python=3.8
- python-dateutil>=2.7.3
- pytz
@@ -20,7 +20,7 @@ dependencies:
# code checks
- black=20.8b1
- cpplint
- - flake8
+ - flake8=3.9.2
- flake8-comprehensions>=3.1.0 # used by flake8, linting of unnecessary comprehensions
- isort>=5.2.1 # check that imports are in the right order
- mypy=0.782
@@ -77,7 +77,7 @@ dependencies:
- bottleneck>=1.2.1
- ipykernel
- ipython>=7.11.1
- - jinja2 # pandas.Styler
+ - jinja2<3.0.0 # pandas.Styler
- matplotlib>=2.2.2 # pandas.plotting, Series.plot, DataFrame.plot
- numexpr>=2.6.8
- scipy>=1.2
@@ -97,14 +97,14 @@ dependencies:
- xlwt
- odfpy
- - fastparquet>=0.3.2 # pandas.read_parquet, DataFrame.to_parquet
+ - fastparquet>=0.3.2, <=0.5.0 # pandas.read_parquet, DataFrame.to_parquet
- pyarrow>=0.15.0 # pandas.read_parquet, DataFrame.to_parquet, pandas.read_feather, DataFrame.to_feather
- python-snappy # required by pyarrow
- pyqt>=5.9.2 # pandas.read_clipboard
- pytables>=3.5.1 # pandas.read_hdf, DataFrame.to_hdf
- s3fs>=0.4.0 # file IO when using 's3://...' path
- - fsspec>=0.7.4 # for generic remote file operations
+ - fsspec>=0.7.4, <2021.6.0 # for generic remote file operations
- gcsfs>=0.6.0 # file IO when using 'gcs://...' path
- sqlalchemy # pandas.read_sql, DataFrame.to_sql
- xarray # DataFrame.to_xarray
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index 2ac9b9e2c875c..9aa261fd745d5 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -77,6 +77,18 @@ def is_platform_mac() -> bool:
return sys.platform == "darwin"
+def is_platform_arm() -> bool:
+ """
+ Checking if he running platform use ARM architecture.
+
+ Returns
+ -------
+ bool
+ True if the running platform uses ARM architecture.
+ """
+ return platform.machine() in ("arm64", "aarch64")
+
+
def import_lzma():
"""
Importing the `lzma` module.
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index d5819697066ef..8b683d32f7b45 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -9,7 +9,7 @@
from pandas._config import get_option
-from pandas._libs import NaT, algos as libalgos, hashtable as htable
+from pandas._libs import NaT, algos as libalgos, hashtable as htable, lib
from pandas._libs.lib import no_default
from pandas._typing import ArrayLike, Dtype, Ordered, Scalar
from pandas.compat.numpy import function as nv
@@ -429,6 +429,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
try:
new_cats = np.asarray(self.categories)
new_cats = new_cats.astype(dtype=dtype, copy=copy)
+ fill_value = lib.item_from_zerodim(np.array(np.nan).astype(dtype))
except (
TypeError, # downstream error msg for CategoricalIndex is misleading
ValueError,
@@ -436,7 +437,11 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
msg = f"Cannot cast {self.categories.dtype} dtype to {dtype}"
raise ValueError(msg)
- result = take_1d(new_cats, libalgos.ensure_platform_int(self._codes))
+ result = take_1d(
+ new_cats,
+ libalgos.ensure_platform_int(self._codes),
+ fill_value=fill_value,
+ )
return result
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 4c156d7470364..92892ac0f26e0 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -8786,7 +8786,6 @@ def _reduce(
**kwds,
):
- min_count = kwds.get("min_count", 0)
assert filter_type is None or filter_type == "bool", filter_type
out_dtype = "bool" if filter_type == "bool" else None
@@ -8831,7 +8830,7 @@ def _get_data() -> DataFrame:
data = self._get_bool_data()
return data
- if (numeric_only is not None or axis == 0) and min_count == 0:
+ if numeric_only is not None or axis == 0:
# For numeric_only non-None and axis non-None, we know
# which blocks to use and no try/except is needed.
# For numeric_only=None only the case with axis==0 and no object
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 11d191597d61e..25860d6a4ecb3 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2742,7 +2742,8 @@ def _union(self, other, sort):
# worth making this faster? a very unusual case
value_set = set(lvals)
result.extend([x for x in rvals if x not in value_set])
- result = Index(result)._values # do type inference here
+ # If objects are unorderable, we must have object dtype.
+ return np.array(result, dtype=object)
else:
# find indexes of things in "other" that are not in "self"
if self.is_unique:
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index b6bca855a9f05..a38b7a19dc80a 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -391,7 +391,7 @@ def reduce(self, func, ignore_failures: bool = False) -> List["Block"]:
return []
raise
- if np.ndim(result) == 0:
+ if self.values.ndim == 1:
# TODO(EA2D): special case not needed with 2D EAs
res_values = np.array([[result]])
else:
@@ -860,6 +860,15 @@ def _replace_list(
"""
See BlockManager._replace_list docstring.
"""
+
+ # https://github.com/pandas-dev/pandas/issues/40371
+ # the following pairs check code caused a regression so we catch that case here
+ # until the issue is fixed properly in can_hold_element
+
+ # error: "Iterable[Any]" has no attribute "tolist"
+ if hasattr(src_list, "tolist"):
+ src_list = src_list.tolist() # type: ignore[attr-defined]
+
# Exclude anything that we know we won't contain
pairs = [
(x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index edc1b1e96509e..d42497c3a8322 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -1377,8 +1377,15 @@ def _maybe_null_out(
Dtype
The product of all elements on a given axis. ( NaNs are treated as 1)
"""
- if mask is not None and axis is not None and getattr(result, "ndim", False):
- null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0
+ if axis is not None and isinstance(result, np.ndarray):
+ if mask is not None:
+ null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0
+ else:
+ # we have no nulls, kept mask=None in _maybe_get_mask
+ below_count = shape[axis] - min_count < 0
+ new_shape = shape[:axis] + shape[axis + 1 :]
+ null_mask = np.broadcast_to(below_count, new_shape)
+
if np.any(null_mask):
if is_numeric_dtype(result):
if np.iscomplexobj(result):
diff --git a/pandas/io/common.py b/pandas/io/common.py
index be353fefdd1ef..e6b6471294ac7 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -568,7 +568,12 @@ def get_handle(
# memory mapping needs to be the first step
handle, memory_map, handles = _maybe_memory_map(
- handle, memory_map, ioargs.encoding, ioargs.mode, errors
+ handle,
+ memory_map,
+ ioargs.encoding,
+ ioargs.mode,
+ errors,
+ ioargs.compression["method"] not in _compression_to_extension,
)
is_path = isinstance(handle, str)
@@ -759,7 +764,18 @@ class _MMapWrapper(abc.Iterator):
"""
- def __init__(self, f: IO):
+ def __init__(
+ self,
+ f: IO,
+ encoding: str = "utf-8",
+ errors: str = "strict",
+ decode: bool = True,
+ ):
+ self.encoding = encoding
+ self.errors = errors
+ self.decoder = codecs.getincrementaldecoder(encoding)(errors=errors)
+ self.decode = decode
+
self.attributes = {}
for attribute in ("seekable", "readable", "writeable"):
if not hasattr(f, attribute):
@@ -775,19 +791,31 @@ def __getattr__(self, name: str):
def __iter__(self) -> "_MMapWrapper":
return self
+ def read(self, size: int = -1) -> Union[str, bytes]:
+ # CSV c-engine uses read instead of iterating
+ content: bytes = self.mmap.read(size)
+ if self.decode:
+ errors = self.errors if self.errors is not None else "strict"
+ # memory mapping is applied before compression. Encoding should
+ # be applied to the de-compressed data.
+ return content.decode(self.encoding, errors=errors)
+ return content
+
def __next__(self) -> str:
newbytes = self.mmap.readline()
# readline returns bytes, not str, but Python's CSV reader
# expects str, so convert the output to str before continuing
- newline = newbytes.decode("utf-8")
+ newline = self.decoder.decode(newbytes)
# mmap doesn't raise if reading past the allocated
# data but instead returns an empty string, so raise
# if that is returned
if newline == "":
raise StopIteration
- return newline
+
+ # IncrementalDecoder seems to push newline to the next line
+ return newline.lstrip("\n")
def _maybe_memory_map(
@@ -796,6 +824,7 @@ def _maybe_memory_map(
encoding: str,
mode: str,
errors: Optional[str],
+ decode: bool,
) -> Tuple[FileOrBuffer, bool, List[Buffer]]:
"""Try to memory map file/buffer."""
handles: List[Buffer] = []
@@ -814,7 +843,10 @@ def _maybe_memory_map(
handles.append(handle)
try:
- wrapped = cast(mmap.mmap, _MMapWrapper(handle)) # type: ignore[arg-type]
+ wrapped = cast(
+ mmap.mmap,
+ _MMapWrapper(handle, encoding, errors, decode), # type: ignore[arg-type]
+ )
handle.close()
handles.remove(handle)
handles.append(wrapped)
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 850570fc743b7..11dee69651dad 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -388,7 +388,11 @@ def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None):
elif hasattr(self.handles.handle, "read"):
# N.B. xlrd.Book has a read attribute too
self.handles.handle.seek(0)
- self.book = self.load_workbook(self.handles.handle)
+ try:
+ self.book = self.load_workbook(self.handles.handle)
+ except Exception:
+ self.close()
+ raise
elif isinstance(self.handles.handle, bytes):
self.book = self.load_workbook(BytesIO(self.handles.handle))
else:
@@ -406,6 +410,11 @@ def load_workbook(self, filepath_or_buffer):
pass
def close(self):
+ if hasattr(self, "book") and hasattr(self.book, "close"):
+ # pyxlsb: opens a TemporaryFile
+ # openpyxl: https://stackoverflow.com/questions/31416842/
+ # openpyxl-does-not-close-excel-workbook-in-read-only-mode
+ self.book.close()
self.handles.close()
@property
diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index be1587dbc010c..cac6d8d1b8113 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -487,12 +487,6 @@ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):
filepath_or_buffer, read_only=True, data_only=True, keep_links=False
)
- def close(self):
- # https://stackoverflow.com/questions/31416842/
- # openpyxl-does-not-close-excel-workbook-in-read-only-mode
- self.book.close()
- super().close()
-
@property
def sheet_names(self) -> List[str]:
return self.book.sheetnames
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 8ad86fd0a0dce..bbff9dfe1ddd0 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1868,31 +1868,6 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
assert self.handles is not None
for key in ("storage_options", "encoding", "memory_map", "compression"):
kwds.pop(key, None)
- if self.handles.is_mmap and hasattr(self.handles.handle, "mmap"):
- # pandas\io\parsers.py:1861: error: Item "IO[Any]" of
- # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase,
- # TextIOWrapper, mmap]" has no attribute "mmap" [union-attr]
-
- # pandas\io\parsers.py:1861: error: Item "RawIOBase" of
- # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase,
- # TextIOWrapper, mmap]" has no attribute "mmap" [union-attr]
-
- # pandas\io\parsers.py:1861: error: Item "BufferedIOBase" of
- # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase,
- # TextIOWrapper, mmap]" has no attribute "mmap" [union-attr]
-
- # pandas\io\parsers.py:1861: error: Item "TextIOBase" of
- # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase,
- # TextIOWrapper, mmap]" has no attribute "mmap" [union-attr]
-
- # pandas\io\parsers.py:1861: error: Item "TextIOWrapper" of
- # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase,
- # TextIOWrapper, mmap]" has no attribute "mmap" [union-attr]
-
- # pandas\io\parsers.py:1861: error: Item "mmap" of "Union[IO[Any],
- # RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap]" has
- # no attribute "mmap" [union-attr]
- self.handles.handle = self.handles.handle.mmap # type: ignore[union-attr]
try:
self._reader = parsers.TextReader(self.handles.handle, **kwds)
diff --git a/pandas/tests/arithmetic/common.py b/pandas/tests/arithmetic/common.py
index e26bb513838a5..d42da39ec8ff0 100644
--- a/pandas/tests/arithmetic/common.py
+++ b/pandas/tests/arithmetic/common.py
@@ -83,6 +83,7 @@ def xbox2(x):
"Invalid comparison between",
"Cannot compare type",
"not supported between",
+ "could not be promoted",
"invalid type promotion",
(
# GH#36706 npdev 1.20.0 2020-09-28
diff --git a/pandas/tests/arithmetic/test_interval.py b/pandas/tests/arithmetic/test_interval.py
index 6dc3b3b13dd0c..4473d86fa04a1 100644
--- a/pandas/tests/arithmetic/test_interval.py
+++ b/pandas/tests/arithmetic/test_interval.py
@@ -235,7 +235,7 @@ def test_compare_list_like_nan(self, op, array, nulls_fixture, request):
Categorical(list("abab")),
Categorical(date_range("2017-01-01", periods=4)),
pd.array(list("abcd")),
- pd.array(["foo", 3.14, None, object()]),
+ pd.array(["foo", 3.14, None, object()], dtype=object),
],
ids=lambda x: str(x.dtype),
)
diff --git a/pandas/tests/arrays/boolean/test_arithmetic.py b/pandas/tests/arrays/boolean/test_arithmetic.py
index 9b854a81f2def..01de64568a011 100644
--- a/pandas/tests/arrays/boolean/test_arithmetic.py
+++ b/pandas/tests/arrays/boolean/test_arithmetic.py
@@ -66,10 +66,7 @@ def test_div(left_array, right_array):
@pytest.mark.parametrize(
"opname",
[
- pytest.param(
- "floordiv",
- marks=pytest.mark.xfail(reason="NumpyDev GH#40874", strict=False),
- ),
+ "floordiv",
"mod",
pytest.param(
"pow", marks=pytest.mark.xfail(reason="TODO follow int8 behaviour? GH34686")
diff --git a/pandas/tests/arrays/masked/test_arithmetic.py b/pandas/tests/arrays/masked/test_arithmetic.py
index 34686f6052131..148b7092abb56 100644
--- a/pandas/tests/arrays/masked/test_arithmetic.py
+++ b/pandas/tests/arrays/masked/test_arithmetic.py
@@ -3,8 +3,6 @@
import numpy as np
import pytest
-from pandas.compat.numpy import is_numpy_dev
-
import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import ExtensionArray
@@ -51,8 +49,6 @@ def test_array_scalar_like_equivalence(data, all_arithmetic_operators):
def test_array_NA(data, all_arithmetic_operators):
if "truediv" in all_arithmetic_operators:
pytest.skip("division with pd.NA raises")
- if "floordiv" in all_arithmetic_operators and is_numpy_dev:
- pytest.skip("NumpyDev behavior GH#40874")
data, _ = data
op = tm.get_op_from_name(all_arithmetic_operators)
check_skip(data, all_arithmetic_operators)
diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py
index d15c822f22c14..86a0bc9213256 100644
--- a/pandas/tests/extension/test_boolean.py
+++ b/pandas/tests/extension/test_boolean.py
@@ -16,8 +16,6 @@
import numpy as np
import pytest
-from pandas.compat.numpy import is_numpy_dev
-
import pandas as pd
import pandas._testing as tm
from pandas.core.arrays.boolean import BooleanDtype
@@ -141,21 +139,6 @@ def _check_op(self, s, op, other, op_name, exc=NotImplementedError):
with pytest.raises(exc):
op(s, other)
- def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
- if "floordiv" in all_arithmetic_operators and is_numpy_dev:
- pytest.skip("NumpyDev behavior GH#40874")
- super().test_arith_series_with_scalar(data, all_arithmetic_operators)
-
- def test_arith_series_with_array(self, data, all_arithmetic_operators):
- if "floordiv" in all_arithmetic_operators and is_numpy_dev:
- pytest.skip("NumpyDev behavior GH#40874")
- super().test_arith_series_with_scalar(data, all_arithmetic_operators)
-
- def test_divmod_series_array(self, data, data_for_twos):
- if is_numpy_dev:
- pytest.skip("NumpyDev behavior GH#40874")
- super().test_divmod_series_array(data, data_for_twos)
-
def _check_divmod_op(self, s, op, other, exc=None):
# override to not raise an error
super()._check_divmod_op(s, op, other, None)
diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
index 5264d4b432d34..0d160966521d3 100644
--- a/pandas/tests/frame/methods/test_astype.py
+++ b/pandas/tests/frame/methods/test_astype.py
@@ -616,3 +616,11 @@ def test_astype_bytes(self):
# GH#39474
result = DataFrame(["foo", "bar", "baz"]).astype(bytes)
assert result.dtypes[0] == np.dtype("S3")
+
+ def test_astype_categorical_to_string_missing(self):
+ # https://github.com/pandas-dev/pandas/issues/41797
+ df = DataFrame(["a", "b", np.nan])
+ expected = df.astype(str)
+ cat = df.astype("category")
+ result = cat.astype(str)
+ tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
index c4f2e09911b34..0f85af6b26aa3 100644
--- a/pandas/tests/frame/methods/test_replace.py
+++ b/pandas/tests/frame/methods/test_replace.py
@@ -1665,3 +1665,22 @@ def test_replace_bytes(self, frame_or_series):
expected = obj.copy()
obj = obj.replace({None: np.nan})
tm.assert_equal(obj, expected)
+
+ @pytest.mark.parametrize(
+ "data, to_replace, value, expected",
+ [
+ ([1], [1.0], [0], [0]),
+ ([1], [1], [0], [0]),
+ ([1.0], [1.0], [0], [0.0]),
+ ([1.0], [1], [0], [0.0]),
+ ],
+ )
+ @pytest.mark.parametrize("box", [list, tuple, np.array])
+ def test_replace_list_with_mixed_type(
+ self, data, to_replace, value, expected, box, frame_or_series
+ ):
+ # GH#40371
+ obj = frame_or_series(data)
+ expected = frame_or_series(expected)
+ result = obj.replace(box(to_replace), value)
+ tm.assert_equal(result, expected)
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index e5ec3c5641bd2..51420859dc1bd 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -79,6 +79,7 @@ def check(df, df2):
msgs = [
r"Invalid comparison between dtype=datetime64\[ns\] and ndarray",
"invalid type promotion",
+ "could not be promoted",
(
# npdev 1.20.0
r"The DTypes