From 23a149c970cd08cc94de02c9c9ac51bf62179f5d Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Wed, 17 Jan 2024 19:21:03 +0800 Subject: [PATCH 1/3] BUG: fix groupby.quantile inconsistency when interpolation='nearest' --- pandas/_libs/groupby.pyx | 2 +- pandas/tests/groupby/methods/test_quantile.py | 14 +------------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 8f4dfd9dcb41a..4f9ed7fa64822 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -1286,7 +1286,7 @@ def group_quantile( elif interp == INTERPOLATION_MIDPOINT: out[i, k] = (val + next_val) / 2.0 elif interp == INTERPOLATION_NEAREST: - if frac > .5 or (frac == .5 and q_val > .5): # Always OK? + if frac > .5 or (frac == .5 and idx % 2 == 1): # Always OK? out[i, k] = next_val else: out[i, k] = val diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py index 361a8c27fbf9d..1d2e639314cba 100644 --- a/pandas/tests/groupby/methods/test_quantile.py +++ b/pandas/tests/groupby/methods/test_quantile.py @@ -38,19 +38,7 @@ ], ) @pytest.mark.parametrize("q", [0, 0.25, 0.5, 0.75, 1]) -def test_quantile(interpolation, a_vals, b_vals, q, request): - if ( - interpolation == "nearest" - and q == 0.5 - and isinstance(b_vals, list) - and b_vals == [4, 3, 2, 1] - ): - request.applymarker( - pytest.mark.xfail( - reason="Unclear numpy expectation for nearest " - "result with equidistant data" - ) - ) +def test_quantile(interpolation, a_vals, b_vals, q): all_vals = pd.concat([pd.Series(a_vals), pd.Series(b_vals)]) a_expected = pd.Series(a_vals).quantile(q, interpolation=interpolation) From 44747c3faeeaf52b14844ea07ebe59f6e91f3cea Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Wed, 17 Jan 2024 19:38:29 +0800 Subject: [PATCH 2/3] add whatsnew --- doc/source/whatsnew/v2.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index 53444e04dbce1..0148311347faa 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -187,7 +187,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- +- Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`) - Reshaping From 8621008ee4a23d79d5f7b10b7216d8bdb9b8d3a5 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Thu, 18 Jan 2024 12:30:36 +0800 Subject: [PATCH 3/3] Add comment --- pandas/_libs/groupby.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 4f9ed7fa64822..dc8f13a919403 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -1286,7 +1286,9 @@ def group_quantile( elif interp == INTERPOLATION_MIDPOINT: out[i, k] = (val + next_val) / 2.0 elif interp == INTERPOLATION_NEAREST: - if frac > .5 or (frac == .5 and idx % 2 == 1): # Always OK? + if frac > .5 or (frac == .5 and idx % 2 == 1): + # If quantile lies in the middle of two indexes, + # take the even index, as np.quantile. out[i, k] = next_val else: out[i, k] = val