From e06d3071edaea1dbb9931d5ff64c18b65ca1b3cf Mon Sep 17 00:00:00 2001 From: ihsan Date: Wed, 26 Jun 2019 00:50:50 +0300 Subject: [PATCH 1/9] Exclude nuisance columns from result of window functions --- pandas/core/window.py | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 2b3cc4f0bf00a..56e218584dc1b 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -243,7 +243,7 @@ def _wrap_result(self, result, block=None, obj=None): return type(obj)(result, index=index, columns=block.columns) return result - def _wrap_results(self, results, blocks, obj): + def _wrap_results(self, results, blocks, obj, exclude=None): """ Wrap the results. @@ -252,6 +252,7 @@ def _wrap_results(self, results, blocks, obj): results : list of ndarrays blocks : list of blocks obj : conformed data (may be resampled) + exclude: list of columns to exclude, default to None """ from pandas import Series, concat @@ -285,6 +286,13 @@ def _wrap_results(self, results, blocks, obj): indexer = columns.get_indexer(selection.tolist() + [name]) columns = columns.take(sorted(indexer)) + # exlude nuisance columns from final result + if exclude is not None and exclude: + columns = [c for c in columns if c not in exclude] + + if not columns: + return Series() + if not len(final): return obj.astype('float64') return concat(final, axis=1).reindex(columns=columns, copy=False) @@ -845,8 +853,17 @@ def _apply(self, func, name=None, window=None, center=None, blocks, obj, index = self._create_blocks() index, indexi = self._get_index(index=index) results = [] + exclude = [] for b in blocks: - values = self._prep_values(b.values) + try: + values = self._prep_values(b.values) + except (TypeError, NotImplementedError): + if hasattr(b, 'columns'): + exclude.extend(b.columns) + continue + else: + from pandas import Series + return Series() if values.size == 0: results.append(values.copy()) @@ -892,7 +909,7 @@ def calc(x): results.append(result) - return self._wrap_results(results, blocks, obj) + return self._wrap_results(results, blocks, obj, exclude) class _Rolling_and_Expanding(_Rolling): @@ -2292,12 +2309,17 @@ def _apply(self, func, **kwargs): """ blocks, obj, index = self._create_blocks() results = [] + exclude = [] for b in blocks: try: values = self._prep_values(b.values) - except TypeError: - results.append(b.values.copy()) - continue + except (TypeError, NotImplementedError): + if hasattr(b, 'columns'): + exclude.extend(b.columns) + continue + else: + from pandas import Series + return Series() if values.size == 0: results.append(values.copy()) @@ -2316,7 +2338,7 @@ def func(arg): results.append(np.apply_along_axis(func, self.axis, values)) - return self._wrap_results(results, blocks, obj) + return self._wrap_results(results, blocks, obj, exclude) @Substitution(name='ewm') @Appender(_doc_template) From 0d3f912c84a55a53c795984f499a76cebd579c9e Mon Sep 17 00:00:00 2001 From: ihsan Date: Wed, 26 Jun 2019 00:51:14 +0300 Subject: [PATCH 2/9] Edit existing tests --- pandas/tests/test_window.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 4dfdd1c96728b..8623a7b804d53 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -118,9 +118,11 @@ def tests_skip_nuisance(self): def test_skip_sum_object_raises(self): df = DataFrame({'A': range(5), 'B': range(5, 10), 'C': 'foo'}) r = df.rolling(window=3) - - with pytest.raises(TypeError, match='cannot handle this type'): - r.sum() + result = r.sum() + expected = DataFrame({'A': [np.nan, np.nan, 3, 6, 9], + 'B': [np.nan, np.nan, 18, 21, 24]}, + columns=list('AB')) + tm.assert_frame_equal(result, expected) def test_agg(self): df = DataFrame({'A': range(5), 'B': range(0, 10, 2)}) @@ -1069,16 +1071,14 @@ class DatetimeLike(Dtype): def check_dtypes(self, f, f_name, d, d_name, exp): roll = d.rolling(window=self.window) + result = f(roll) if f_name == 'count': - result = f(roll) tm.assert_almost_equal(result, exp) else: - - # other methods not Implemented ATM - with pytest.raises(NotImplementedError): - f(roll) + exp = Series() + tm.assert_equal(result, exp) class TestDtype_timedelta(DatetimeLike): From e812bf9687666d285794d7bf67a7c5b1db60e7a7 Mon Sep 17 00:00:00 2001 From: ihsan Date: Wed, 26 Jun 2019 01:04:50 +0300 Subject: [PATCH 3/9] Exclude nuisance columns from result of _apply_window --- pandas/core/window.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 56e218584dc1b..18912c60ddd79 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -681,12 +681,17 @@ def _apply_window(self, mean=True, **kwargs): blocks, obj, index = self._create_blocks() results = [] + exclude = [] for b in blocks: try: values = self._prep_values(b.values) - except TypeError: - results.append(b.values.copy()) - continue + except (TypeError, NotImplementedError): + if hasattr(b, 'columns'): + exclude.extend(b.columns) + continue + else: + from pandas import Series + return Series() if values.size == 0: results.append(values.copy()) @@ -708,7 +713,7 @@ def f(arg, *args, **kwargs): result = self._center_window(result, window) results.append(result) - return self._wrap_results(results, blocks, obj) + return self._wrap_results(results, blocks, obj, exclude) _agg_see_also_doc = dedent(""" See Also From 12a012b7ecfa4e261baf4152ec02a0d00fcc569f Mon Sep 17 00:00:00 2001 From: ihsan Date: Wed, 26 Jun 2019 01:17:42 +0300 Subject: [PATCH 4/9] Add whatsnew --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index a58cdc8c93ab7..31031bfebf863 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -750,6 +750,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.groupby.GroupBy.agg` where incorrect results are returned for uint64 columns. (:issue:`26310`) - Bug in :meth:`pandas.core.window.Rolling.median` and :meth:`pandas.core.window.Rolling.quantile` where MemoryError is raised with empty window (:issue:`26005`) - Bug in :meth:`pandas.core.window.Rolling.median` and :meth:`pandas.core.window.Rolling.quantile` where incorrect results are returned with ``closed='left'`` and ``closed='neither'`` (:issue:`26005`) +- Improved :class:`pandas.core.window.Rolling`, :class:`pandas.core.window.Window` and :class:`pandas.core.window.EWM` functions to exclude nuisance columns from results or return an empty series if all columns are nuisance instead of raising an error (:issue:`12537`) Reshaping ^^^^^^^^^ From 392e7e6dd7a63d58deccd4255cde8b9f1e1e7075 Mon Sep 17 00:00:00 2001 From: ihsan Date: Thu, 27 Jun 2019 11:49:44 +0300 Subject: [PATCH 5/9] Raise DataError instead of returning empty Series --- pandas/core/window.py | 13 +++++-------- pandas/tests/test_window.py | 9 ++++----- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 18912c60ddd79..90316941fc285 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -22,7 +22,7 @@ ABCDataFrame, ABCDateOffset, ABCDatetimeIndex, ABCPeriodIndex, ABCSeries, ABCTimedeltaIndex) -from pandas.core.base import PandasObject, SelectionMixin +from pandas.core.base import PandasObject, SelectionMixin, DataError import pandas.core.common as com from pandas.core.generic import _shared_docs from pandas.core.groupby.base import GroupByMixin @@ -291,7 +291,7 @@ def _wrap_results(self, results, blocks, obj, exclude=None): columns = [c for c in columns if c not in exclude] if not columns: - return Series() + raise DataError('No numeric types to aggregate') if not len(final): return obj.astype('float64') @@ -690,8 +690,7 @@ def _apply_window(self, mean=True, **kwargs): exclude.extend(b.columns) continue else: - from pandas import Series - return Series() + raise DataError('No numeric types to aggregate') if values.size == 0: results.append(values.copy()) @@ -867,8 +866,7 @@ def _apply(self, func, name=None, window=None, center=None, exclude.extend(b.columns) continue else: - from pandas import Series - return Series() + raise DataError('No numeric types to aggregate') if values.size == 0: results.append(values.copy()) @@ -2323,8 +2321,7 @@ def _apply(self, func, **kwargs): exclude.extend(b.columns) continue else: - from pandas import Series - return Series() + raise DataError('No numeric types to aggregate') if values.size == 0: results.append(values.copy()) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 8623a7b804d53..50a95294958ed 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -14,7 +14,7 @@ import pandas as pd from pandas import ( DataFrame, Index, Series, Timestamp, bdate_range, concat, isna, notna) -from pandas.core.base import SpecificationError +from pandas.core.base import SpecificationError, DataError from pandas.core.sorting import safe_sort import pandas.core.window as rwindow import pandas.util.testing as tm @@ -1071,14 +1071,13 @@ class DatetimeLike(Dtype): def check_dtypes(self, f, f_name, d, d_name, exp): roll = d.rolling(window=self.window) - result = f(roll) - if f_name == 'count': + result = f(roll) tm.assert_almost_equal(result, exp) else: - exp = Series() - tm.assert_equal(result, exp) + with pytest.raises(DataError): + f(roll) class TestDtype_timedelta(DatetimeLike): From 76adb2c8277c347295320865bb9a022754ba4ae8 Mon Sep 17 00:00:00 2001 From: ihsan Date: Thu, 27 Jun 2019 20:23:37 +0300 Subject: [PATCH 6/9] Sort imports --- pandas/core/window.py | 2 +- pandas/tests/test_window.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 90316941fc285..1e20f43db3670 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -22,7 +22,7 @@ ABCDataFrame, ABCDateOffset, ABCDatetimeIndex, ABCPeriodIndex, ABCSeries, ABCTimedeltaIndex) -from pandas.core.base import PandasObject, SelectionMixin, DataError +from pandas.core.base import DataError, PandasObject, SelectionMixin import pandas.core.common as com from pandas.core.generic import _shared_docs from pandas.core.groupby.base import GroupByMixin diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 50a95294958ed..889754841a078 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -14,7 +14,7 @@ import pandas as pd from pandas import ( DataFrame, Index, Series, Timestamp, bdate_range, concat, isna, notna) -from pandas.core.base import SpecificationError, DataError +from pandas.core.base import DataError, SpecificationError from pandas.core.sorting import safe_sort import pandas.core.window as rwindow import pandas.util.testing as tm From eaeca8e585686fc253b4454b0681904237a31b26 Mon Sep 17 00:00:00 2001 From: ihsan Date: Fri, 28 Jun 2019 00:34:53 +0300 Subject: [PATCH 7/9] Update whatsnew --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 89aed415d57a2..90feb99f6ba71 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -756,7 +756,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.groupby.GroupBy.agg` where incorrect results are returned for uint64 columns. (:issue:`26310`) - Bug in :meth:`pandas.core.window.Rolling.median` and :meth:`pandas.core.window.Rolling.quantile` where MemoryError is raised with empty window (:issue:`26005`) - Bug in :meth:`pandas.core.window.Rolling.median` and :meth:`pandas.core.window.Rolling.quantile` where incorrect results are returned with ``closed='left'`` and ``closed='neither'`` (:issue:`26005`) -- Improved :class:`pandas.core.window.Rolling`, :class:`pandas.core.window.Window` and :class:`pandas.core.window.EWM` functions to exclude nuisance columns from results or return an empty series if all columns are nuisance instead of raising an error (:issue:`12537`) +- Improved :class:`pandas.core.window.Rolling`, :class:`pandas.core.window.Window` and :class:`pandas.core.window.EWM` functions to exclude nuisance columns from results instead of raising errors and raise a ``DataError`` only if all columns are nuisance (:issue:`12537`) Reshaping ^^^^^^^^^ From ad9b5e2aeaaa32ced72ef2be65d7067c3bce0345 Mon Sep 17 00:00:00 2001 From: ihsan Date: Fri, 28 Jun 2019 22:58:16 +0300 Subject: [PATCH 8/9] Reimplement to fix issue with python 3.5 dict --- pandas/core/window.py | 44 ++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 1e20f43db3670..96fb65266bb0c 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -112,9 +112,9 @@ def _create_blocks(self): if obj.ndim == 2: obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False) - blocks = obj._to_dict_of_blocks(copy=False).values() + blocks_dict = obj._to_dict_of_blocks(copy=False) - return blocks, obj, index + return blocks_dict, obj, index def _gotitem(self, key, ndim, subset=None): """ @@ -286,7 +286,7 @@ def _wrap_results(self, results, blocks, obj, exclude=None): indexer = columns.get_indexer(selection.tolist() + [name]) columns = columns.take(sorted(indexer)) - # exlude nuisance columns from final result + # exclude nuisance columns so that they are not reindexed if exclude is not None and exclude: columns = [c for c in columns if c not in exclude] @@ -679,15 +679,21 @@ def _apply_window(self, mean=True, **kwargs): window = self._prep_window(**kwargs) center = self.center - blocks, obj, index = self._create_blocks() + blocks_dict, obj, index = self._create_blocks() + dtypes = blocks_dict.keys() + blocks = blocks_dict.values() + results = [] exclude = [] - for b in blocks: + for dtype in list(dtypes): + b = blocks_dict[dtype] try: values = self._prep_values(b.values) + except (TypeError, NotImplementedError): - if hasattr(b, 'columns'): + if isinstance(obj, ABCDataFrame): exclude.extend(b.columns) + del blocks_dict[dtype] continue else: raise DataError('No numeric types to aggregate') @@ -854,16 +860,22 @@ def _apply(self, func, name=None, window=None, center=None, if check_minp is None: check_minp = _use_window - blocks, obj, index = self._create_blocks() + blocks_dict, obj, index = self._create_blocks() + dtypes = blocks_dict.keys() + blocks = blocks_dict.values() index, indexi = self._get_index(index=index) + results = [] exclude = [] - for b in blocks: + for dtype in list(dtypes): + b = blocks_dict[dtype] try: values = self._prep_values(b.values) + except (TypeError, NotImplementedError): - if hasattr(b, 'columns'): + if isinstance(obj, ABCDataFrame): exclude.extend(b.columns) + del blocks_dict[dtype] continue else: raise DataError('No numeric types to aggregate') @@ -957,7 +969,8 @@ class _Rolling_and_Expanding(_Rolling): def count(self): - blocks, obj, index = self._create_blocks() + blocks_dict, obj, index = self._create_blocks() + blocks = blocks_dict.values() # Validate the index self._get_index(index=index) @@ -2310,15 +2323,20 @@ def _apply(self, func, **kwargs): ------- y : same type as input argument """ - blocks, obj, index = self._create_blocks() + blocks_dict, obj, index = self._create_blocks() + dtypes = blocks_dict.keys() + blocks = blocks_dict.values() + results = [] exclude = [] - for b in blocks: + for dtype in list(dtypes): + b = blocks_dict[dtype] try: values = self._prep_values(b.values) except (TypeError, NotImplementedError): - if hasattr(b, 'columns'): + if isinstance(obj, ABCDataFrame): exclude.extend(b.columns) + del blocks_dict[dtype] continue else: raise DataError('No numeric types to aggregate') From 6d7602eadb35239dea56c20d3a86c01953391aa8 Mon Sep 17 00:00:00 2001 From: ihsan Date: Sat, 29 Jun 2019 00:55:54 +0300 Subject: [PATCH 9/9] Change in favour of a cleaner implementation --- pandas/core/window.py | 44 +++++++++++++++++++------------------------ 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 96fb65266bb0c..8f888ba510b0e 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -112,9 +112,9 @@ def _create_blocks(self): if obj.ndim == 2: obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False) - blocks_dict = obj._to_dict_of_blocks(copy=False) + blocks = obj._to_dict_of_blocks(copy=False).values() - return blocks_dict, obj, index + return blocks, obj, index def _gotitem(self, key, ndim, subset=None): """ @@ -679,21 +679,19 @@ def _apply_window(self, mean=True, **kwargs): window = self._prep_window(**kwargs) center = self.center - blocks_dict, obj, index = self._create_blocks() - dtypes = blocks_dict.keys() - blocks = blocks_dict.values() + blocks, obj, index = self._create_blocks() + block_list = list(blocks) results = [] exclude = [] - for dtype in list(dtypes): - b = blocks_dict[dtype] + for i, b in enumerate(blocks): try: values = self._prep_values(b.values) except (TypeError, NotImplementedError): if isinstance(obj, ABCDataFrame): exclude.extend(b.columns) - del blocks_dict[dtype] + del block_list[i] continue else: raise DataError('No numeric types to aggregate') @@ -718,7 +716,7 @@ def f(arg, *args, **kwargs): result = self._center_window(result, window) results.append(result) - return self._wrap_results(results, blocks, obj, exclude) + return self._wrap_results(results, block_list, obj, exclude) _agg_see_also_doc = dedent(""" See Also @@ -860,22 +858,20 @@ def _apply(self, func, name=None, window=None, center=None, if check_minp is None: check_minp = _use_window - blocks_dict, obj, index = self._create_blocks() - dtypes = blocks_dict.keys() - blocks = blocks_dict.values() + blocks, obj, index = self._create_blocks() + block_list = list(blocks) index, indexi = self._get_index(index=index) results = [] exclude = [] - for dtype in list(dtypes): - b = blocks_dict[dtype] + for i, b in enumerate(blocks): try: values = self._prep_values(b.values) except (TypeError, NotImplementedError): if isinstance(obj, ABCDataFrame): exclude.extend(b.columns) - del blocks_dict[dtype] + del block_list[i] continue else: raise DataError('No numeric types to aggregate') @@ -924,7 +920,7 @@ def calc(x): results.append(result) - return self._wrap_results(results, blocks, obj, exclude) + return self._wrap_results(results, block_list, obj, exclude) class _Rolling_and_Expanding(_Rolling): @@ -969,8 +965,7 @@ class _Rolling_and_Expanding(_Rolling): def count(self): - blocks_dict, obj, index = self._create_blocks() - blocks = blocks_dict.values() + blocks, obj, index = self._create_blocks() # Validate the index self._get_index(index=index) @@ -2323,20 +2318,19 @@ def _apply(self, func, **kwargs): ------- y : same type as input argument """ - blocks_dict, obj, index = self._create_blocks() - dtypes = blocks_dict.keys() - blocks = blocks_dict.values() + blocks, obj, index = self._create_blocks() + block_list = list(blocks) results = [] exclude = [] - for dtype in list(dtypes): - b = blocks_dict[dtype] + for i, b in enumerate(blocks): try: values = self._prep_values(b.values) + except (TypeError, NotImplementedError): if isinstance(obj, ABCDataFrame): exclude.extend(b.columns) - del blocks_dict[dtype] + del block_list[i] continue else: raise DataError('No numeric types to aggregate') @@ -2358,7 +2352,7 @@ def func(arg): results.append(np.apply_along_axis(func, self.axis, values)) - return self._wrap_results(results, blocks, obj, exclude) + return self._wrap_results(results, block_list, obj, exclude) @Substitution(name='ewm') @Appender(_doc_template)