diff --git a/doc/source/conf.py b/doc/source/conf.py index 8ab1c8c2f3428..e3cb6cf7fadd0 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -409,7 +409,7 @@ if pattern is None: intersphinx_mapping = { "dateutil": ("https://dateutil.readthedocs.io/en/latest/", None), - "matplotlib": ("https://matplotlib.org/", None), + "matplotlib": ("https://matplotlib.org/stable/", None), "numpy": ("https://numpy.org/doc/stable/", None), "pandas-gbq": ("https://pandas-gbq.readthedocs.io/en/latest/", None), "py": ("https://pylib.readthedocs.io/en/latest/", None), diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 77791b4b7e491..7236f93c36356 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -1410,7 +1410,7 @@ Often it's useful to obtain the lower (or upper) triangular form of a correlatio corr_mat.where(mask) -The ``method`` argument within ``DataFrame.corr`` can accept a callable in addition to the named correlation types. Here we compute the ``distance correlation ``__ matrix for a ``DataFrame`` object. +The ``method`` argument within ``DataFrame.corr`` can accept a callable in addition to the named correlation types. Here we compute the `distance correlation `__ matrix for a ``DataFrame`` object. .. ipython:: python diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst index c37255c765171..c9c31b408fb7e 100644 --- a/doc/source/whatsnew/index.rst +++ b/doc/source/whatsnew/index.rst @@ -16,6 +16,7 @@ Version 1.2 .. toctree:: :maxdepth: 2 + v1.2.3 v1.2.2 v1.2.1 v1.2.0 diff --git a/doc/source/whatsnew/v1.2.2.rst b/doc/source/whatsnew/v1.2.2.rst index fa41a83a5e5fb..1a9204bc82986 100644 --- a/doc/source/whatsnew/v1.2.2.rst +++ b/doc/source/whatsnew/v1.2.2.rst @@ -46,4 +46,4 @@ Bug fixes Contributors ~~~~~~~~~~~~ -.. contributors:: v1.2.1..v1.2.2|HEAD +.. contributors:: v1.2.1..v1.2.2 diff --git a/doc/source/whatsnew/v1.2.3.rst b/doc/source/whatsnew/v1.2.3.rst new file mode 100644 index 0000000000000..c94491df474ab --- /dev/null +++ b/doc/source/whatsnew/v1.2.3.rst @@ -0,0 +1,32 @@ +.. _whatsnew_123: + +What's new in 1.2.3 (March 02, 2021) +------------------------------------ + +These are the changes in pandas 1.2.3. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_123.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- Fixed regression in :meth:`~DataFrame.to_excel` raising ``KeyError`` when giving duplicate columns with ``columns`` attribute (:issue:`39695`) +- Fixed regression in nullable integer unary ops propagating mask on assignment (:issue:`39943`) +- Fixed regression in :meth:`DataFrame.__setitem__` not aligning :class:`DataFrame` on right-hand side for boolean indexer (:issue:`39931`) +- Fixed regression in :meth:`~DataFrame.to_json` failing to use ``compression`` with URL-like paths that are internally opened in binary mode or with user-provided file objects that are opened in binary mode (:issue:`39985`) +- Fixed regression in :meth:`Series.sort_index` and :meth:`DataFrame.sort_index`, which exited with an ungraceful error when having kwarg ``ascending=None`` passed. Passing ``ascending=None`` is still considered invalid, and the improved error message suggests a proper usage (``ascending`` must be a boolean or a list-like of boolean) (:issue:`39434`) +- Fixed regression in :meth:`DataFrame.transform` and :meth:`Series.transform` giving incorrect column labels when passed a dictionary with a mix of list and non-list values (:issue:`40018`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_123.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.2.2..v1.2.3|HEAD diff --git a/environment.yml b/environment.yml index 319261ec6640c..bc5bfcd162500 100644 --- a/environment.yml +++ b/environment.yml @@ -114,4 +114,4 @@ dependencies: - natsort # DataFrame.sort_values - pip: - git+https://github.com/pandas-dev/pydata-sphinx-theme.git@2488b7defbd3d753dd5fcfc890fc4a7e79d25103 - - git+https://github.com/numpy/numpydoc + - numpydoc < 1.2 # 2021-02-09 1.2dev breaking CI diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 74f21bae39ba9..f5d4cedf7398d 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -491,6 +491,22 @@ def transform_dict_like( # GH 15931 - deprecation of renaming keys raise SpecificationError("nested renamer is not supported") + is_aggregator = lambda x: isinstance(x, (list, tuple, dict)) + + # if we have a dict of any non-scalars + # eg. {'A' : ['mean']}, normalize all to + # be list-likes + # Cannot use func.values() because arg may be a Series + if any(is_aggregator(x) for _, x in func.items()): + new_func: AggFuncTypeDict = {} + for k, v in func.items(): + if not is_aggregator(v): + # mypy can't realize v is not a list here + new_func[k] = [v] # type:ignore[list-item] + else: + new_func[k] = v + func = new_func + results: Dict[Label, FrameOrSeriesUnion] = {} for name, how in func.items(): colg = obj._gotitem(name, ndim=1) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index fa427e94fe08f..96ac0dad18e65 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -348,13 +348,13 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): super().__init__(values, mask, copy=copy) def __neg__(self): - return type(self)(-self._data, self._mask) + return type(self)(-self._data, self._mask.copy()) def __pos__(self): return self def __abs__(self): - return type(self)(np.abs(self._data), self._mask) + return type(self)(np.abs(self._data), self._mask.copy()) @classmethod def _from_sequence( diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index caed932cd7857..d47abfe226593 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -142,7 +142,7 @@ def __len__(self) -> int: return len(self._data) def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: - return type(self)(~self._data, self._mask) + return type(self)(~self._data, self._mask.copy()) def to_numpy( self, dtype=None, copy: bool = False, na_value: Scalar = lib.no_default diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 396108bab47b7..0094ebc744a34 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3179,6 +3179,9 @@ def _setitem_array(self, key, value): key = check_bool_indexer(self.index, key) indexer = key.nonzero()[0] self._check_setitem_copy() + if isinstance(value, DataFrame): + # GH#39931 reindex since iloc does not align + value = value.reindex(self.index.take(indexer)) self.iloc[indexer] = value else: if isinstance(value, DataFrame): @@ -3951,8 +3954,8 @@ def lookup(self, row_labels, col_labels) -> np.ndarray: .. deprecated:: 1.2.0 DataFrame.lookup is deprecated, use DataFrame.melt and DataFrame.loc instead. - For an example see :meth:`~pandas.DataFrame.lookup` - in the user guide. + For further details see + :ref:`Looking up values by index/column labels `. Parameters ---------- @@ -5479,7 +5482,7 @@ def sort_index( self, axis=0, level=None, - ascending: bool = True, + ascending: Union[Union[bool, int], Sequence[Union[bool, int]]] = True, inplace: bool = False, kind: str = "quicksort", na_position: str = "last", @@ -5500,7 +5503,7 @@ def sort_index( and 1 identifies the columns. level : int or level name or list of ints or list of level names If not None, sort on values in specified index level(s). - ascending : bool or list of bools, default True + ascending : bool or list-like of bools, default True Sort ascending vs. descending. When the index is a MultiIndex the sort direction can be controlled for each level individually. inplace : bool, default False diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8da3bae190f82..4350cd60c99d7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -56,6 +56,7 @@ from pandas.errors import AbstractMethodError, InvalidIndexError from pandas.util._decorators import doc, rewrite_axis_style_signature from pandas.util._validators import ( + validate_ascending, validate_bool_kwarg, validate_fillna_kwargs, validate_percentile, @@ -4518,7 +4519,7 @@ def sort_index( self, axis=0, level=None, - ascending: bool_t = True, + ascending: Union[Union[bool_t, int], Sequence[Union[bool_t, int]]] = True, inplace: bool_t = False, kind: str = "quicksort", na_position: str = "last", @@ -4529,6 +4530,8 @@ def sort_index( inplace = validate_bool_kwarg(inplace, "inplace") axis = self._get_axis_number(axis) + ascending = validate_ascending(ascending) + target = self._get_axis(axis) indexer = get_indexer_indexer( @@ -6438,6 +6441,7 @@ def fillna( return result.__finalize__(self, method="fillna") @final + @doc(klass=_shared_doc_kwargs["klass"]) def ffill( self: FrameOrSeries, axis=None, @@ -6460,6 +6464,7 @@ def ffill( pad = ffill @final + @doc(klass=_shared_doc_kwargs["klass"]) def bfill( self: FrameOrSeries, axis=None, diff --git a/pandas/core/series.py b/pandas/core/series.py index 4b0d5f0b407be..1f209e68e5acf 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -12,9 +12,11 @@ Iterable, List, Optional, + Sequence, Tuple, Type, Union, + cast, ) import warnings @@ -3065,7 +3067,7 @@ def update(self, other) -> None: def sort_values( self, axis=0, - ascending=True, + ascending: Union[Union[bool, int], Sequence[Union[bool, int]]] = True, inplace: bool = False, kind: str = "quicksort", na_position: str = "last", @@ -3083,7 +3085,7 @@ def sort_values( axis : {0 or 'index'}, default 0 Axis to direct sorting. The value 'index' is accepted for compatibility with DataFrame.sort_values. - ascending : bool, default True + ascending : bool or list of bools, default True If True, sort values in ascending order, otherwise descending. inplace : bool, default False If True, perform operation in-place. @@ -3243,6 +3245,7 @@ def sort_values( ) if is_list_like(ascending): + ascending = cast(Sequence[Union[bool, int]], ascending) if len(ascending) != 1: raise ValueError( f"Length of ascending ({len(ascending)}) must be 1 for Series" @@ -3257,7 +3260,7 @@ def sort_values( # GH 35922. Make sorting stable by leveraging nargsort values_to_sort = ensure_key_mapped(self, key)._values if key else self._values - sorted_index = nargsort(values_to_sort, kind, ascending, na_position) + sorted_index = nargsort(values_to_sort, kind, bool(ascending), na_position) result = self._constructor( self._values[sorted_index], index=self.index[sorted_index] @@ -3275,7 +3278,7 @@ def sort_index( self, axis=0, level=None, - ascending: bool = True, + ascending: Union[Union[bool, int], Sequence[Union[bool, int]]] = True, inplace: bool = False, kind: str = "quicksort", na_position: str = "last", @@ -3295,7 +3298,7 @@ def sort_index( Axis to direct sorting. This can only be 0 for Series. level : int, optional If not None, sort on values in specified index level(s). - ascending : bool or list of bools, default True + ascending : bool or list-like of bools, default True Sort ascending vs. descending. When the index is a MultiIndex the sort direction can be controlled for each level individually. inplace : bool, default False diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 0a1cbc6de1cda..fdbe5dfc38524 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -8,6 +8,7 @@ Iterable, List, Optional, + Sequence, Tuple, Union, ) @@ -39,7 +40,7 @@ def get_indexer_indexer( target: "Index", level: Union[str, int, List[str], List[int]], - ascending: bool, + ascending: Union[Sequence[Union[bool, int]], Union[bool, int]], kind: str, na_position: str, sort_remaining: bool, diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py index a8020f4bb4e4f..9a90819a5dda4 100644 --- a/pandas/io/clipboard/__init__.py +++ b/pandas/io/clipboard/__init__.py @@ -59,7 +59,7 @@ EXCEPT_MSG = """ Pyperclip could not find a copy/paste mechanism for your system. For more information, please visit - https://pyperclip.readthedocs.io/en/latest/introduction.html#not-implemented-error + https://pyperclip.readthedocs.io/en/latest/#not-implemented-error """ ENCODING = "utf-8" diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 0cad67169feff..6e77406948202 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -465,7 +465,7 @@ def __init__( if not len(Index(cols).intersection(df.columns)): raise KeyError("passes columns are not ALL present dataframe") - if len(Index(cols).intersection(df.columns)) != len(cols): + if len(Index(cols).intersection(df.columns)) != len(set(cols)): # Deprecated in GH#17295, enforced in 1.0.0 raise KeyError("Not all names specified in 'columns' are found") diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index e1ac7b1b02f21..dd75da9ad50f1 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -100,7 +100,7 @@ def to_json( if path_or_buf is not None: # apply compression and byte/text conversion with get_handle( - path_or_buf, "wt", compression=compression, storage_options=storage_options + path_or_buf, "w", compression=compression, storage_options=storage_options ) as handles: handles.handle.write(s) else: diff --git a/pandas/tests/arrays/masked/test_arithmetic.py b/pandas/tests/arrays/masked/test_arithmetic.py index 1d2833c5da276..148b7092abb56 100644 --- a/pandas/tests/arrays/masked/test_arithmetic.py +++ b/pandas/tests/arrays/masked/test_arithmetic.py @@ -159,3 +159,16 @@ def test_error_len_mismatch(data, all_arithmetic_operators): s = pd.Series(data) with pytest.raises(ValueError, match="Lengths must match"): op(s, other) + + +@pytest.mark.parametrize("op", ["__neg__", "__abs__", "__invert__"]) +@pytest.mark.parametrize( + "values, dtype", [([1, 2, 3], "Int64"), ([True, False, True], "boolean")] +) +def test_unary_op_does_not_propagate_mask(op, values, dtype): + # https://github.com/pandas-dev/pandas/issues/39943 + s = pd.Series(values, dtype=dtype) + result = getattr(s, op)() + expected = result.copy(deep=True) + s[0] = None + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 46edde62b510e..5080e5546627c 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -1174,7 +1174,9 @@ def test_from_coo(self): row = [0, 3, 1, 0] col = [0, 3, 1, 2] data = [4, 5, 7, 9] - sp_array = scipy.sparse.coo_matrix((data, (row, col))) + # TODO: Remove dtype when scipy is fixed + # https://github.com/scipy/scipy/issues/13585 + sp_array = scipy.sparse.coo_matrix((data, (row, col)), dtype="int") result = pd.Series.sparse.from_coo(sp_array) index = pd.MultiIndex.from_arrays([[0, 0, 1, 3], [0, 2, 1, 3]]) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index c70d55b07661d..b6d49e48dbdf0 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -14,13 +14,8 @@ @pytest.fixture( params=[ - # pandas\tests\arrays\string_\test_string.py:16: error: List item 1 has - # incompatible type "ParameterSet"; expected - # "Sequence[Collection[object]]" [list-item] "string", - pytest.param( - "arrow_string", marks=skip_if_no_pyarrow - ), # type:ignore[list-item] + pytest.param("arrow_string", marks=skip_if_no_pyarrow), ] ) def dtype(request): diff --git a/pandas/tests/frame/apply/test_frame_transform.py b/pandas/tests/frame/apply/test_frame_transform.py index d3a3b1482affd..c2ee2bbbc54e4 100644 --- a/pandas/tests/frame/apply/test_frame_transform.py +++ b/pandas/tests/frame/apply/test_frame_transform.py @@ -99,6 +99,17 @@ def test_transform_dictlike(axis, float_frame, box): tm.assert_frame_equal(result, expected) +def test_transform_dictlike_mixed(): + # GH 40018 - mix of lists and non-lists in values of a dictionary + df = DataFrame({"a": [1, 2], "b": [1, 4], "c": [1, 4]}) + result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"}) + expected = DataFrame( + [[1.0, 1, 1.0], [2.0, 4, 2.0]], + columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( "ops", [ diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index cedef4784e4a1..d40fc388fefbd 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -404,3 +404,12 @@ def test_setitem_boolean_mask(self, mask_type, float_frame): expected = df.copy() expected.values[np.array(mask)] = np.nan tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("indexer", [lambda x: x, lambda x: x.loc]) + def test_setitem_boolean_mask_aligning(self, indexer): + # GH#39931 + df = DataFrame({"a": [1, 4, 2, 3], "b": [5, 6, 7, 8]}) + expected = df.copy() + mask = df["a"] >= 3 + indexer(df)[mask] = indexer(df)[mask].sort_values("a") + tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index de847c12723b2..0fcf98d9b677a 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -765,6 +765,23 @@ def test_sort_index_with_categories(self, categories): ) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( + "ascending", + [ + None, + [True, None], + [False, "True"], + ], + ) + def test_sort_index_ascending_bad_value_raises(self, ascending): + # GH 39434 + df = DataFrame(np.arange(64)) + length = len(df.index) + df.index = [(i - length / 2) % length for i in range(length)] + match = 'For argument "ascending" expected type bool' + with pytest.raises(ValueError, match=match): + df.sort_index(axis=0, ascending=ascending, na_position="first") + class TestDataFrameSortIndexKey: def test_sort_multi_index_key(self): diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index af0de05965398..6d0684d4d1315 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1295,6 +1295,15 @@ def test_raise_when_saving_timezones(self, dtype, tz_aware_fixture, path): with pytest.raises(ValueError, match="Excel does not support"): df.to_excel(path) + def test_excel_duplicate_columns_with_names(self, path): + # GH#39695 + df = DataFrame({"A": [0, 1], "B": [10, 11]}) + df.to_excel(path, columns=["A", "B", "A"], index=False) + + result = pd.read_excel(path) + expected = DataFrame([[0, 10, 0], [1, 11, 1]], columns=["A", "B", "A.1"]) + tm.assert_frame_equal(result, expected) + class TestExcelWriterEngineTests: @pytest.mark.parametrize( diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py index 5faca6bd89dad..febeb4d690562 100644 --- a/pandas/tests/io/json/test_compression.py +++ b/pandas/tests/io/json/test_compression.py @@ -1,3 +1,5 @@ +from io import BytesIO + import pytest import pandas.util._test_decorators as td @@ -115,3 +117,13 @@ def test_to_json_compression(compression_only, read_infer, to_infer): df.to_json(path, compression=to_compression) result = pd.read_json(path, compression=read_compression) tm.assert_frame_equal(result, df) + + +def test_to_json_compression_mode(compression): + # GH 39985 (read_json does not support user-provided binary files) + expected = pd.DataFrame({"A": [1]}) + + with BytesIO() as buffer: + expected.to_json(buffer, compression=compression) + # df = pd.read_json(buffer, compression=compression) + # tm.assert_frame_equal(expected, df) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index dba4b9214e50c..aaf97b16aaefe 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -745,6 +745,9 @@ def test_read_json_table_timezones_orient(self, idx, vals, recwarn): result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) + @pytest.mark.filterwarnings( + "ignore:an integer is required (got type float)*:DeprecationWarning" + ) def test_comprehensive(self): df = DataFrame( { diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py index 2dfd18cd67821..6064fb1dd43b6 100644 --- a/pandas/tests/io/test_fsspec.py +++ b/pandas/tests/io/test_fsspec.py @@ -249,11 +249,19 @@ def test_pickle_options(fsspectest): tm.assert_frame_equal(df, out) -def test_json_options(fsspectest): +def test_json_options(fsspectest, compression): df = DataFrame({"a": [0]}) - df.to_json("testmem://afile", storage_options={"test": "json_write"}) + df.to_json( + "testmem://afile", + compression=compression, + storage_options={"test": "json_write"}, + ) assert fsspectest.test[0] == "json_write" - out = read_json("testmem://afile", storage_options={"test": "json_read"}) + out = read_json( + "testmem://afile", + compression=compression, + storage_options={"test": "json_read"}, + ) assert fsspectest.test[0] == "json_read" tm.assert_frame_equal(df, out) diff --git a/pandas/tests/series/apply/test_series_transform.py b/pandas/tests/series/apply/test_series_transform.py index 992aaa540a65f..27d769c3bd5f3 100644 --- a/pandas/tests/series/apply/test_series_transform.py +++ b/pandas/tests/series/apply/test_series_transform.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas import DataFrame, Series, concat +from pandas import DataFrame, MultiIndex, Series, concat import pandas._testing as tm from pandas.core.base import SpecificationError from pandas.core.groupby.base import transformation_kernels @@ -52,6 +52,17 @@ def test_transform_dictlike(string_series, box): tm.assert_frame_equal(result, expected) +def test_transform_dictlike_mixed(): + # GH 40018 - mix of lists and non-lists in values of a dictionary + df = Series([1, 4]) + result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"}) + expected = DataFrame( + [[1.0, 1, 1.0], [2.0, 4, 2.0]], + columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]), + ) + tm.assert_frame_equal(result, expected) + + def test_transform_wont_agg(string_series): # GH 35964 # we are trying to transform with an aggregator diff --git a/pandas/tests/series/methods/test_sort_index.py b/pandas/tests/series/methods/test_sort_index.py index 6c6be1506255a..422eedd431b11 100644 --- a/pandas/tests/series/methods/test_sort_index.py +++ b/pandas/tests/series/methods/test_sort_index.py @@ -199,6 +199,20 @@ def test_sort_index_ascending_list(self): expected = ser.iloc[[0, 4, 1, 5, 2, 6, 3, 7]] tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "ascending", + [ + None, + (True, None), + (False, "True"), + ], + ) + def test_sort_index_ascending_bad_value_raises(self, ascending): + ser = Series(range(10), index=[0, 3, 2, 1, 4, 5, 7, 6, 8, 9]) + match = 'For argument "ascending" expected type bool' + with pytest.raises(ValueError, match=match): + ser.sort_index(ascending=ascending) + class TestSeriesSortIndexKey: def test_sort_index_multiindex_key(self): diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index a765f268cfb07..236aa05444ec8 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -100,9 +100,7 @@ def ignore_na(request): @pytest.fixture( params=[ - pytest.param( - "numba", marks=td.skip_if_no("numba", "0.46.0") - ), # type: ignore[list-item] + pytest.param("numba", marks=td.skip_if_no("numba", "0.46.0")), "cython", ] ) @@ -318,7 +316,7 @@ def halflife_with_times(request): "float64", "m8[ns]", "M8[ns]", - pytest.param( # type: ignore[list-item] + pytest.param( "datetime64[ns, UTC]", marks=pytest.mark.skip( "direct creation of extension dtype datetime64[ns, UTC] " diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index fa7201a5188a5..289ea63be62e2 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -2,7 +2,7 @@ Module that contains many useful utilities for validating data or function arguments """ -from typing import Iterable, Union +from typing import Iterable, Sequence, Union import warnings import numpy as np @@ -205,9 +205,39 @@ def validate_args_and_kwargs(fname, args, kwargs, max_fname_arg_count, compat_ar validate_kwargs(fname, kwargs, compat_args) -def validate_bool_kwarg(value, arg_name): - """ Ensures that argument passed in arg_name is of type bool. """ - if not (is_bool(value) or value is None): +def validate_bool_kwarg(value, arg_name, none_allowed=True, int_allowed=False): + """ + Ensure that argument passed in arg_name can be interpreted as boolean. + + Parameters + ---------- + value : bool + Value to be validated. + arg_name : str + Name of the argument. To be reflected in the error message. + none_allowed : bool, default True + Whether to consider None to be a valid boolean. + int_allowed : bool, default False + Whether to consider integer value to be a valid boolean. + + Returns + ------- + value + The same value as input. + + Raises + ------ + ValueError + If the value is not a valid boolean. + """ + good_value = is_bool(value) + if none_allowed: + good_value = good_value or value is None + + if int_allowed: + good_value = good_value or isinstance(value, int) + + if not good_value: raise ValueError( f'For argument "{arg_name}" expected type bool, received ' f"type {type(value).__name__}." @@ -381,3 +411,14 @@ def validate_percentile(q: Union[float, Iterable[float]]) -> np.ndarray: if not all(0 <= qs <= 1 for qs in q_arr): raise ValueError(msg.format(q_arr / 100.0)) return q_arr + + +def validate_ascending( + ascending: Union[Union[bool, int], Sequence[Union[bool, int]]] = True, +): + """Validate ``ascending`` kwargs for ``sort_index`` method.""" + kwargs = {"none_allowed": False, "int_allowed": True} + if not isinstance(ascending, (list, tuple)): + return validate_bool_kwarg(ascending, "ascending", **kwargs) + + return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending] diff --git a/requirements-dev.txt b/requirements-dev.txt index 2591dcbf7b1ad..98a149eb5d412 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -77,4 +77,4 @@ pyreadstat tabulate>=0.8.3 natsort git+https://github.com/pandas-dev/pydata-sphinx-theme.git@2488b7defbd3d753dd5fcfc890fc4a7e79d25103 -git+https://github.com/numpy/numpydoc +numpydoc < 1.2 diff --git a/setup.cfg b/setup.cfg index 244e6f18bb0ef..03fbb46799a14 100644 --- a/setup.cfg +++ b/setup.cfg @@ -122,6 +122,7 @@ strict_equality = True warn_redundant_casts = True warn_unused_ignores = True show_error_codes = True +no_site_packages = True [mypy-pandas.tests.*] check_untyped_defs=False