-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG: groupby with CategoricalIndex doesn't include unobserved categories #49373
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
86cc43d
eb40033
57b8e07
8273de1
6f779a1
513c322
81fbdce
30c3253
e65dd2e
efc6303
35e22e4
27f39ed
df0ce75
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -476,11 +476,15 @@ def __init__( | |
# In extant tests, the new self.grouping_vector matches | ||
# `index.get_level_values(ilevel)` whenever | ||
# mapper is None and isinstance(index, MultiIndex) | ||
if isinstance(index, MultiIndex): | ||
index_level = index.get_level_values(ilevel) | ||
else: | ||
index_level = index | ||
( | ||
self.grouping_vector, # Index | ||
self._codes, | ||
self._group_index, | ||
) = index._get_grouper_for_level(mapper, level=ilevel, dropna=dropna) | ||
) = index_level._get_grouper_for_level(mapper, dropna=dropna) | ||
|
||
# a passed Grouper like, directly get the grouper in the same way | ||
# as single grouper groupby, use the group_info to get codes | ||
|
@@ -504,15 +508,6 @@ def __init__( | |
# use Index instead of ndarray so we can recover the name | ||
self.grouping_vector = Index(ng, name=newgrouper.result_index.name) | ||
|
||
elif is_categorical_dtype(self.grouping_vector): | ||
# a passed Categorical | ||
self._passed_categorical = True | ||
|
||
self._orig_cats = self.grouping_vector.categories | ||
self.grouping_vector, self._all_grouper = recode_for_groupby( | ||
self.grouping_vector, sort, observed | ||
) | ||
|
||
elif not isinstance( | ||
self.grouping_vector, (Series, Index, ExtensionArray, np.ndarray) | ||
): | ||
|
@@ -542,6 +537,14 @@ def __init__( | |
# TODO 2022-10-08 we only have one test that gets here and | ||
# values are already in nanoseconds in that case. | ||
self.grouping_vector = Series(self.grouping_vector).to_numpy() | ||
elif is_categorical_dtype(self.grouping_vector): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any particular reason this was moved from above? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously, this block was in an |
||
# a passed Categorical | ||
self._passed_categorical = True | ||
|
||
self._orig_cats = self.grouping_vector.categories | ||
self.grouping_vector, self._all_grouper = recode_for_groupby( | ||
self.grouping_vector, sort, observed | ||
) | ||
|
||
def __repr__(self) -> str: | ||
return f"Grouping({self.name})" | ||
|
Uh oh!
There was an error while loading. Please reload this page.