Skip to content

[ENH] Add reset index flag to row_to_name function #849

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Jul 31, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,4 @@ Contributors
- `@MollyCroke <https://github.com/MollyCroke>`_ | `contributions <https://github.com/pyjanitor-devs/pyjanitor/issues?q=is%3Aclosed+mentions%3AMollyCroke>`_
- `@ericclessantostv <https://github.com/ericlessantostv>`_ | `contributions <https://github.com/pyjanitor-devs/pyjanitor/issues?q=is%3Aclosed+mentions%3Aericclessantostv>`_
- `@nvamsikrishna05 <https://github.com/nvamsikrishna05>`_ | `contributions <https://github.com/pyjanitor-devs/pyjanitor/issues?q=is%3Aclosed+mentions%3Anvamsikrishna05>`_
- `@fireddd <https://github.com/fireddd>`_ | `contributions <https://github.com/pyjanitor-devs/pyjanitor/issues?q=is%3Aclosed+mentions%3fireddd>`_
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
- [INF] Update pre-commit hooks and remove mutable references. Issue #844. @loganthomas
- [INF] Add GitHub Release pointer to auto-release script. Issue #818. @loganthomas
- [INF] Updated black version in github actions code-checks to match pre-commit hooks. @nvamsikrishna05
- [ENH] Add reset_index flag to row_to_names function. @fireddd
- [ENH] Updated `label_encode` to use pandas factorize instead of scikit-learn LabelEncoder. @nvamsikrishna05
- [INF] Removed the scikit-learn package from the dependencies from environment-dev.yml and base.in files. @nvamsikrishna05

Expand Down
79 changes: 72 additions & 7 deletions janitor/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,15 @@

import numpy as np
import pandas as pd
from pandas.api.types import is_numeric_dtype
import pandas_flavor as pf
from multipledispatch import dispatch
from natsort import index_natsorted
from pandas.api.types import is_bool_dtype, is_list_like, union_categoricals
from pandas.api.types import (
is_bool_dtype,
is_list_like,
is_numeric_dtype,
union_categoricals,
)
from pandas.errors import OutOfBoundsDatetime
from scipy.stats import mode

Expand All @@ -48,10 +52,10 @@
_replace_original_empty_string_with_none,
_select_columns,
_strip_underscores,
asCategorical,
check,
check_column,
deprecated_alias,
asCategorical,
)


Expand Down Expand Up @@ -2289,6 +2293,7 @@ def row_to_names(
row_number: int = None,
remove_row: bool = False,
remove_rows_above: bool = False,
reset_index: bool = False,
) -> pd.DataFrame:
"""Elevates a row to be the column names of a DataFrame.

Expand All @@ -2307,6 +2312,7 @@ def row_to_names(
row_number=0,
remove_row=False,
remove_rows_above=False,
reset_index=False,
)
)

Expand All @@ -2316,6 +2322,8 @@ def row_to_names(
Defaults to False.
:param remove_rows_above: Whether the rows above the selected row should
be removed from the DataFrame. Defaults to False.
:param reset_index: Whether the index should be reset on the returning
DataFrame. Defaults to False.
:returns: A pandas DataFrame with set column names.
"""
# :Setup:
Expand Down Expand Up @@ -2353,7 +2361,31 @@ def row_to_names(
# 6 1 1 1 rabbit Cambridge
# 7 2 2 2 leopard Shanghai

# :Example: Move first row to column names and remove row:
# :Example: Move first row to column names and
# remove row while resetting the index:

# .. code-block:: python

# example_dataframe = pd.DataFrame(data_dict)
# example_dataframe.row_to_names(0, remove_row=True,\
# reset_index=True)

# :Output:

# .. code-block:: python

# 1 1 1 rabbit Cambridge
# 0 2 2 2 leopard Shanghai
# 1 3 3 3 lion Basel
# 2 1 1 1 rabbit Cambridge
# 3 2 2 2 leopard Shanghai
# 4 3 3 3 lion Basel
# 5 1 1 1 rabbit Cambridge
# 6 2 2 2 leopard Shanghai
# 7 3 3 3 lion Basel

# :Example: Move first row to column names and remove
# row without resetting the index:

# .. code-block:: python

Expand All @@ -2374,14 +2406,37 @@ def row_to_names(
# 7 2 2 2 leopard Shanghai
# 8 3 3 3 lion Basel

# :Example: Move first row to column names, remove row, \
# and remove rows above selected row:
# :Example: Move first row to column names, remove row
# and remove rows above selected row without resetting
# index:

# .. code-block:: python

# example_dataframe = pd.DataFrame(data_dict)
# example_dataframe.row_to_names(2, remove_row=True, \
# remove_rows_above=True, reset_index= True)

# :Output:

# .. code-block:: python

# 3 3 3 lion Basel
# 0 1 1 1 rabbit Cambridge
# 1 2 2 2 leopard Shanghai
# 2 3 3 3 lion Basel
# 3 1 1 1 rabbit Cambridge
# 4 2 2 2 leopard Shanghai
# 5 3 3 3 lion Basel

# :Example: Move first row to column names, remove row,
# and remove rows above selected row without resetting
# index:

# .. code-block:: python

# example_dataframe = pd.DataFrame(data_dict)
# example_dataframe.row_to_names(2, remove_row=True, \
# remove_rows_above=True)
# remove_rows_above=True)

# :Output:

Expand All @@ -2397,6 +2452,13 @@ def row_to_names(

check("row_number", row_number, [int])

warnings.warn(
"The function row_to_names will, in the official 1.0 release, "
"change its behaviour to reset the dataframe's index by default. "
"You can prepare for this change right now by explicitly setting "
"`reset_index=True` when calling on `row_to_names`."
)

df.columns = df.iloc[row_number, :]
df.columns.name = None

Expand All @@ -2406,6 +2468,9 @@ def row_to_names(
if remove_rows_above:
df = df.drop(df.index[range(row_number)])

if reset_index:
df.reset_index(drop=["index"], inplace=True)

return df


Expand Down
37 changes: 37 additions & 0 deletions tests/functions/test_row_to_names.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pandas as pd
import pytest


Expand All @@ -21,6 +22,15 @@ def test_row_to_names_delete_this_row(dataframe):
assert df.iloc[2, 4] == "Cambridge"


@pytest.mark.functions
def test_row_to_names_delete_the_row_without_resetting_index(dataframe):
"""Test that executes row_to_names while deleting the given row
index while not resetting the index"""
df = dataframe.row_to_names(2, remove_row=True)
expected_index = pd.Index([0, 1, 3, 4, 5, 6, 7, 8])
pd.testing.assert_index_equal(df.index, expected_index)


@pytest.mark.functions
def test_row_to_names_delete_above(dataframe):
df = dataframe.row_to_names(2, remove_rows_above=True)
Expand All @@ -29,3 +39,30 @@ def test_row_to_names_delete_above(dataframe):
assert df.iloc[0, 2] == 3
assert df.iloc[0, 3] == "lion"
assert df.iloc[0, 4] == "Basel"


@pytest.mark.functions
def test_row_to_names_delete_above_without_resetting_index(dataframe):
"""Test that executes row_to_names while deleting the all rows
above the given row index while not resetting the index"""
df = dataframe.row_to_names(2, remove_rows_above=True)
expected_index = pd.Index([2, 3, 4, 5, 6, 7, 8])
pd.testing.assert_index_equal(df.index, expected_index)


@pytest.mark.functions
def test_row_to_names_delete_above_with_resetting_index(dataframe):
"""Test that executes row_to_names while deleting the all rows
above the given row index while resetting the index"""
df = dataframe.row_to_names(2, remove_rows_above=True, reset_index=True)
expected_index = pd.RangeIndex(start=0, stop=7, step=1)
pd.testing.assert_index_equal(df.index, expected_index)


@pytest.mark.functions
def test_row_to_names_delete_the_row_with_resetting_index(dataframe):
"""Test that executes row_to_names while deleting the given row
index while resetting the index"""
df = dataframe.row_to_names(2, remove_row=True, reset_index=True)
expected_index = pd.RangeIndex(start=0, stop=8, step=1)
pd.testing.assert_index_equal(df.index, expected_index)