Skip to content

Commit 631c812

Browse files
authored
[ENH] Fill direction (#879)
* linting, remove `limit` parameter, use ENUM for constants * more tests * linting * make copy of Series in chain_funcs * fix error in docstrings for _chain_func
1 parent e5306a0 commit 631c812

File tree

4 files changed

+127
-101
lines changed

4 files changed

+127
-101
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
- [DOC] Delete Read the Docs project and remove all readthedocs.io references from the repo. Issue #863. @loganthomas
1515
- [DOC] Updated various documentation sources to reflect pyjanitor-dev ownership. @loganthomas
1616
- [INF] Fix `isort` automatic checks. Issue #845. @loganthomas
17+
- [ENH] Deprecate `limit` from fill_direction. fill_direction now uses kwargs. @samukweku
1718

1819
## [v0.21.0] - 2021-07-16
1920

janitor/functions.py

Lines changed: 89 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
from scipy.stats import mode
3838

3939
from .errors import JanitorError
40+
from enum import Enum
41+
from operator import methodcaller
4042
from .utils import (
4143
_clean_accounting_column,
4244
_computations_as_categorical,
@@ -5206,72 +5208,63 @@ def process_text(
52065208

52075209

52085210
@pf.register_dataframe_method
5209-
def fill_direction(
5210-
df: pd.DataFrame,
5211-
directions: Dict[Hashable, str] = None,
5212-
limit: Optional[int] = None,
5213-
) -> pd.DataFrame:
5211+
def fill_direction(df: pd.DataFrame, **kwargs) -> pd.DataFrame:
52145212
"""
52155213
Provide a method-chainable function for filling missing values
52165214
in selected columns.
52175215
5218-
Missing values are filled using the next or previous entry.
5219-
The columns are paired with the directions in a dictionary.
5220-
It is a wrapper for ``pd.Series.ffill`` and ``pd.Series.bfill``.
5216+
It is a wrapper for ``pd.Series.ffill`` and ``pd.Series.bfill``,
5217+
and pairs the column name with one of `up`, `down`, `updown`,
5218+
and `downup`.
52215219
52225220
.. code-block:: python
52235221
52245222
import pandas as pd
5225-
import numpy as np
52265223
import janitor as jn
52275224
5228-
df = pd.DataFrame({"text": ["ragnar", np.nan, "sammywemmy",
5229-
np.nan, "ginger"],
5230-
"code" : [np.nan, 2, 3, np.nan, 5]})
5231-
52325225
df
52335226
5234-
text code
5235-
0 ragnar NaN
5236-
1 NaN 2.0
5237-
2 sammywemmy 3.0
5238-
3 NaN NaN
5239-
4 ginger 5.0
5227+
text code
5228+
0 ragnar NaN
5229+
1 NaN 2.0
5230+
2 sammywemmy 3.0
5231+
3 NaN NaN
5232+
4 ginger 5.0
52405233
52415234
52425235
52435236
Fill on a single column::
52445237
5245-
df.fill_direction({"text" : "up"})
5238+
df.fill_direction(code = 'up')
52465239
5247-
text code
5248-
0 ragnar NaN
5249-
1 sammywemmy 2.0
5250-
2 sammywemmy 3.0
5251-
3 ginger NaN
5252-
4 ginger 5.0
5240+
text code
5241+
0 ragnar 2.0
5242+
1 NaN 2.0
5243+
2 sammywemmy 3.0
5244+
3 NaN 5.0
5245+
4 ginger 5.0
52535246
52545247
Fill on multiple columns::
52555248
5256-
df.fill_direction({"text" : "down", "code" : "down"})
5249+
df.fill_direction(text = 'down', code = 'down')
52575250
5258-
text code
5259-
0 ragnar NaN
5260-
1 ragnar 2.0
5261-
2 sammywemmy 3.0
5262-
3 sammywemmy 3.0
5263-
4 ginger 5.0
5251+
text code
5252+
0 ragnar NaN
5253+
1 ragnar 2.0
5254+
2 sammywemmy 3.0
5255+
3 sammywemmy 3.0
5256+
4 ginger 5.0
52645257
52655258
Fill multiple columns in different directions::
52665259
5267-
df.fill_direction({"text" : "up", "code" : "down"})
5260+
df.fill_direction(text = 'up', code = 'down')
52685261
5269-
text code
5270-
0 ragnar NaN
5271-
1 sammywemmy 2.0
5272-
2 sammywemmy 3.0
5273-
3 ginger 3.0
5274-
4 ginger 5.0
5262+
text code
5263+
0 ragnar NaN
5264+
1 sammywemmy 2.0
5265+
2 sammywemmy 3.0
5266+
3 ginger 3.0
5267+
4 ginger 5.0
52755268
52765269
Functional usage syntax:
52775270
@@ -5282,12 +5275,10 @@ def fill_direction(
52825275
52835276
df = pd.DataFrame(...)
52845277
df = jn.fill_direction(
5285-
df = df,
5286-
directions = {column_1 : direction_1,
5287-
column_2 : direction_2,
5288-
...},
5289-
limit = None # limit must be None or greater than 0
5290-
)
5278+
df = df,
5279+
column_1 = direction_1,
5280+
column_2 = direction_2,
5281+
)
52915282
52925283
Method-chaining usage syntax:
52935284
@@ -5296,68 +5287,75 @@ def fill_direction(
52965287
import pandas as pd
52975288
import janitor as jn
52985289
5299-
df = (
5300-
pd.DataFrame(...)
5301-
.fill_direction(
5302-
directions = {column_1 : direction_1,
5303-
column_2 : direction_2,
5304-
...},
5305-
limit = None # limit must be None or greater than 0
5306-
)
5307-
)
5290+
df = pd.DataFrame(...)
5291+
.fill_direction(
5292+
column_1 = direction_1,
5293+
column_2 = direction_2,
5294+
)
5295+
53085296
53095297
:param df: A pandas dataframe.
5310-
:param directions: Key - value pairs of columns and directions. Directions
5311-
can be either `down` (default), `up`, `updown` (fill up then down) and
5298+
:param kwargs: Key - value pairs of columns and directions. Directions
5299+
can be either `down`, `up`, `updown` (fill up then down) and
53125300
`downup` (fill down then up).
5313-
:param limit: number of consecutive null values to forward/backward fill.
5314-
Value must `None` or greater than 0.
53155301
:returns: A pandas dataframe with modified column(s).
53165302
:raises ValueError: if column supplied is not in the dataframe.
53175303
:raises ValueError: if direction supplied is not one of `down`, `up`,
53185304
`updown`, or `downup`.
53195305
53205306
.. # noqa: DAR402
53215307
"""
5322-
df = df.copy()
5323-
if not directions:
5324-
return df
53255308

5326-
check("directions", directions, [dict])
5327-
5328-
if limit is not None:
5329-
check("limit", limit, [int])
5330-
# pandas raises error if limit is not greater than zero
5331-
# so no need for a check on pyjanitor's end
5332-
5333-
check_column(df, directions)
5309+
if not kwargs:
5310+
return df
53345311

5335-
for _, direction in directions.items():
5336-
if direction not in {"up", "down", "updown", "downup"}:
5312+
fill_types = {fill.name for fill in FILLTYPE}
5313+
for column_name, fill_type in kwargs.items():
5314+
check("column_name", column_name, [str])
5315+
check("fill_details", fill_type, [str])
5316+
if fill_type.upper() not in fill_types:
53375317
raise ValueError(
53385318
"""
5339-
The direction should be a string and should be one of
5340-
`up`, `down`, `updown`, or `downup`.
5319+
fill_type should be one of
5320+
up, down, updown, or downup.
53415321
"""
53425322
)
53435323

5344-
# TODO: option to specify limit per column; current implementation
5345-
# is one `limit` for all the columns. Might need refactoring, or an
5346-
# API change.
5347-
for column, direction in directions.items():
5348-
if direction == "up":
5349-
df.loc[:, column] = df.loc[:, column].bfill(limit=limit)
5350-
elif direction == "down":
5351-
df.loc[:, column] = df.loc[:, column].ffill(limit=limit)
5352-
elif direction == "updown":
5353-
df.loc[:, column] = (
5354-
df.loc[:, column].bfill(limit=limit).ffill(limit=limit)
5355-
)
5356-
else: # downup
5357-
df.loc[:, column] = (
5358-
df.loc[:, column].ffill(limit=limit).bfill(limit=limit)
5359-
)
5360-
return df
5324+
check_column(df, kwargs)
5325+
5326+
new_values = {}
5327+
for column_name, fill_type in kwargs.items():
5328+
direction = FILLTYPE[f"{fill_type.upper()}"].value
5329+
if len(direction) == 1:
5330+
direction = methodcaller(direction[0])
5331+
output = direction(df[column_name])
5332+
else:
5333+
direction = [methodcaller(entry) for entry in direction]
5334+
output = _chain_func(df[column_name], *direction)
5335+
new_values[column_name] = output
5336+
5337+
return df.assign(**new_values)
5338+
5339+
5340+
class FILLTYPE(Enum):
5341+
"""List of fill types for fill_direction."""
5342+
5343+
UP = ("bfill",)
5344+
DOWN = ("ffill",)
5345+
UPDOWN = "bfill", "ffill"
5346+
DOWNUP = "ffill", "bfill"
5347+
5348+
5349+
def _chain_func(column: pd.Series, *funcs):
5350+
"""
5351+
Apply series of functions consecutively
5352+
to a Series.
5353+
https://blog.finxter.com/how-to-chain-multiple-function-calls-in-python/
5354+
"""
5355+
new_value = column.copy()
5356+
for func in funcs:
5357+
new_value = func(new_value)
5358+
return new_value
53615359

53625360

53635361
@pf.register_dataframe_method

pytest.ini

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[pytest]
2+
# always check coverage of janitor.
3+
addopts = --cov=janitor --cov-report term-missing --cov-report xml --durations=0
4+
markers =
5+
functions: test for general functions
6+
biology: tests for biology
7+
chemistry: tests for chemistry
8+
finance: tests for finance
9+
utils: utility tests
10+
engineering: tests for engineering
11+
ml: tests for machine learning
12+
spark_functions: tests for pyspark functions
13+
xarray: tests for xarray functions
14+
timeseries: tests for timeseries

tests/functions/test_fill_direction.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -88,28 +88,28 @@ def test_fill_column(df):
8888
"""Fill down on a single column."""
8989
expected = df.copy()
9090
expected.loc[:, "pet_type"] = expected.loc[:, "pet_type"].ffill()
91-
assert_frame_equal(df.fill_direction({"pet_type": "down"}), expected)
91+
assert_frame_equal(df.fill_direction(**{"pet_type": "down"}), expected)
9292

9393

9494
def test_fill_column_up(df):
9595
"""Fill up on a single column."""
9696
expected = df.copy()
9797
expected.loc[:, "pet_type"] = expected.loc[:, "pet_type"].bfill()
98-
assert_frame_equal(df.fill_direction({"pet_type": "up"}), expected)
98+
assert_frame_equal(df.fill_direction(**{"pet_type": "up"}), expected)
9999

100100

101101
def test_fill_column_updown(df):
102102
"""Fill upwards, then downwards on a single column."""
103103
expected = df.copy()
104104
expected.loc[:, "pet_type"] = expected.loc[:, "pet_type"].bfill().ffill()
105-
assert_frame_equal(df.fill_direction({"pet_type": "updown"}), expected)
105+
assert_frame_equal(df.fill_direction(**{"pet_type": "updown"}), expected)
106106

107107

108108
def test_fill_column_down_up(df):
109109
"""Fill downwards, then upwards on a single column."""
110110
expected = df.copy()
111111
expected.loc[:, "pet_type"] = expected.loc[:, "pet_type"].ffill().bfill()
112-
assert_frame_equal(df.fill_direction({"pet_type": "downup"}), expected)
112+
assert_frame_equal(df.fill_direction(**{"pet_type": "downup"}), expected)
113113

114114

115115
def test_fill_multiple_columns(df):
@@ -119,7 +119,7 @@ def test_fill_multiple_columns(df):
119119
:, ["pet_type", "owner"]
120120
].ffill()
121121
assert_frame_equal(
122-
df.fill_direction({"pet_type": "down", "owner": "down"}), expected
122+
df.fill_direction(**{"pet_type": "down", "owner": "down"}), expected
123123
)
124124

125125

@@ -129,28 +129,41 @@ def test_fill_multiple_columns_multiple_directions(df):
129129
expected.loc[:, "pet_type"] = expected.loc[:, "pet_type"].ffill()
130130
expected.loc[:, "owner"] = expected.loc[:, "owner"].bfill()
131131
assert_frame_equal(
132-
df.fill_direction({"pet_type": "down", "owner": "up"}), expected
132+
df.fill_direction(**{"pet_type": "down", "owner": "up"}), expected
133133
)
134134

135135

136136
def test_wrong_column_name(df):
137137
"""Raise Value Error if wrong column name is provided."""
138138
with pytest.raises(ValueError):
139-
df.fill_direction({"PetType": "down"})
139+
df.fill_direction(**{"PetType": "down"})
140+
141+
142+
def test_wrong_column_type(df):
143+
"""Raise Value Error if wrong type is provided for column_name."""
144+
with pytest.raises(TypeError):
145+
df.fill_direction(**{1: "down"})
140146

141147

142148
def test_wrong_direction(df):
143149
"""Raise Value Error if wrong direction is provided."""
144150
with pytest.raises(ValueError):
145-
df.fill_direction({"pet_type": "upanddawn"})
151+
df.fill_direction(**{"pet_type": "upanddawn"})
152+
153+
154+
def test_wrong_direction_type(df):
155+
"""Raise Type Error if wrong type is provided for direction."""
156+
with pytest.raises(TypeError):
157+
df.fill_direction(**{"pet_type": 1})
146158

147159

160+
@pytest.mark.xfail(reason="limit is deprecated")
148161
def test_wrong_type_limit(df):
149162
"""Raise TypeError if limit is wrong type."""
150163
with pytest.raises(TypeError):
151-
df.fill_direction({"pet_type": "up"}, limit="one")
164+
df.fill_direction(**{"pet_type": "up"}, limit="one")
152165

153166

154167
def test_empty_directions(df):
155168
"""Return dataframe if `directions` is empty."""
156-
assert_frame_equal(df.fill_direction({}), df)
169+
assert_frame_equal(df.fill_direction(), df)

0 commit comments

Comments
 (0)