37
37
from scipy .stats import mode
38
38
39
39
from .errors import JanitorError
40
+ from enum import Enum
41
+ from operator import methodcaller
40
42
from .utils import (
41
43
_clean_accounting_column ,
42
44
_computations_as_categorical ,
@@ -5206,72 +5208,63 @@ def process_text(
5206
5208
5207
5209
5208
5210
@pf .register_dataframe_method
5209
- def fill_direction (
5210
- df : pd .DataFrame ,
5211
- directions : Dict [Hashable , str ] = None ,
5212
- limit : Optional [int ] = None ,
5213
- ) -> pd .DataFrame :
5211
+ def fill_direction (df : pd .DataFrame , ** kwargs ) -> pd .DataFrame :
5214
5212
"""
5215
5213
Provide a method-chainable function for filling missing values
5216
5214
in selected columns.
5217
5215
5218
- Missing values are filled using the next or previous entry.
5219
- The columns are paired with the directions in a dictionary.
5220
- It is a wrapper for ``pd.Series.ffill`` and ``pd.Series.bfill` `.
5216
+ It is a wrapper for ``pd.Series.ffill`` and ``pd.Series.bfill``,
5217
+ and pairs the column name with one of `up`, `down`, `updown`,
5218
+ and `downup `.
5221
5219
5222
5220
.. code-block:: python
5223
5221
5224
5222
import pandas as pd
5225
- import numpy as np
5226
5223
import janitor as jn
5227
5224
5228
- df = pd.DataFrame({"text": ["ragnar", np.nan, "sammywemmy",
5229
- np.nan, "ginger"],
5230
- "code" : [np.nan, 2, 3, np.nan, 5]})
5231
-
5232
5225
df
5233
5226
5234
- text code
5235
- 0 ragnar NaN
5236
- 1 NaN 2.0
5237
- 2 sammywemmy 3.0
5238
- 3 NaN NaN
5239
- 4 ginger 5.0
5227
+ text code
5228
+ 0 ragnar NaN
5229
+ 1 NaN 2.0
5230
+ 2 sammywemmy 3.0
5231
+ 3 NaN NaN
5232
+ 4 ginger 5.0
5240
5233
5241
5234
5242
5235
5243
5236
Fill on a single column::
5244
5237
5245
- df.fill_direction({"text" : "up"} )
5238
+ df.fill_direction(code = 'up' )
5246
5239
5247
- text code
5248
- 0 ragnar NaN
5249
- 1 sammywemmy 2.0
5250
- 2 sammywemmy 3.0
5251
- 3 ginger NaN
5252
- 4 ginger 5.0
5240
+ text code
5241
+ 0 ragnar 2.0
5242
+ 1 NaN 2.0
5243
+ 2 sammywemmy 3.0
5244
+ 3 NaN 5.0
5245
+ 4 ginger 5.0
5253
5246
5254
5247
Fill on multiple columns::
5255
5248
5256
- df.fill_direction({" text" : " down", " code" : " down"} )
5249
+ df.fill_direction(text = ' down', code = ' down' )
5257
5250
5258
- text code
5259
- 0 ragnar NaN
5260
- 1 ragnar 2.0
5261
- 2 sammywemmy 3.0
5262
- 3 sammywemmy 3.0
5263
- 4 ginger 5.0
5251
+ text code
5252
+ 0 ragnar NaN
5253
+ 1 ragnar 2.0
5254
+ 2 sammywemmy 3.0
5255
+ 3 sammywemmy 3.0
5256
+ 4 ginger 5.0
5264
5257
5265
5258
Fill multiple columns in different directions::
5266
5259
5267
- df.fill_direction({" text" : "up", " code" : " down"} )
5260
+ df.fill_direction(text = 'up', code = ' down' )
5268
5261
5269
- text code
5270
- 0 ragnar NaN
5271
- 1 sammywemmy 2.0
5272
- 2 sammywemmy 3.0
5273
- 3 ginger 3.0
5274
- 4 ginger 5.0
5262
+ text code
5263
+ 0 ragnar NaN
5264
+ 1 sammywemmy 2.0
5265
+ 2 sammywemmy 3.0
5266
+ 3 ginger 3.0
5267
+ 4 ginger 5.0
5275
5268
5276
5269
Functional usage syntax:
5277
5270
@@ -5282,12 +5275,10 @@ def fill_direction(
5282
5275
5283
5276
df = pd.DataFrame(...)
5284
5277
df = jn.fill_direction(
5285
- df = df,
5286
- directions = {column_1 : direction_1,
5287
- column_2 : direction_2,
5288
- ...},
5289
- limit = None # limit must be None or greater than 0
5290
- )
5278
+ df = df,
5279
+ column_1 = direction_1,
5280
+ column_2 = direction_2,
5281
+ )
5291
5282
5292
5283
Method-chaining usage syntax:
5293
5284
@@ -5296,68 +5287,75 @@ def fill_direction(
5296
5287
import pandas as pd
5297
5288
import janitor as jn
5298
5289
5299
- df = (
5300
- pd.DataFrame(...)
5301
- .fill_direction(
5302
- directions = {column_1 : direction_1,
5303
- column_2 : direction_2,
5304
- ...},
5305
- limit = None # limit must be None or greater than 0
5306
- )
5307
- )
5290
+ df = pd.DataFrame(...)
5291
+ .fill_direction(
5292
+ column_1 = direction_1,
5293
+ column_2 = direction_2,
5294
+ )
5295
+
5308
5296
5309
5297
:param df: A pandas dataframe.
5310
- :param directions : Key - value pairs of columns and directions. Directions
5311
- can be either `down` (default) , `up`, `updown` (fill up then down) and
5298
+ :param kwargs : Key - value pairs of columns and directions. Directions
5299
+ can be either `down`, `up`, `updown` (fill up then down) and
5312
5300
`downup` (fill down then up).
5313
- :param limit: number of consecutive null values to forward/backward fill.
5314
- Value must `None` or greater than 0.
5315
5301
:returns: A pandas dataframe with modified column(s).
5316
5302
:raises ValueError: if column supplied is not in the dataframe.
5317
5303
:raises ValueError: if direction supplied is not one of `down`, `up`,
5318
5304
`updown`, or `downup`.
5319
5305
5320
5306
.. # noqa: DAR402
5321
5307
"""
5322
- df = df .copy ()
5323
- if not directions :
5324
- return df
5325
5308
5326
- check ("directions" , directions , [dict ])
5327
-
5328
- if limit is not None :
5329
- check ("limit" , limit , [int ])
5330
- # pandas raises error if limit is not greater than zero
5331
- # so no need for a check on pyjanitor's end
5332
-
5333
- check_column (df , directions )
5309
+ if not kwargs :
5310
+ return df
5334
5311
5335
- for _ , direction in directions .items ():
5336
- if direction not in {"up" , "down" , "updown" , "downup" }:
5312
+ fill_types = {fill .name for fill in FILLTYPE }
5313
+ for column_name , fill_type in kwargs .items ():
5314
+ check ("column_name" , column_name , [str ])
5315
+ check ("fill_details" , fill_type , [str ])
5316
+ if fill_type .upper () not in fill_types :
5337
5317
raise ValueError (
5338
5318
"""
5339
- The direction should be a string and should be one of
5340
- `up`, ` down`, ` updown` , or ` downup` .
5319
+ fill_type should be one of
5320
+ up, down, updown, or downup.
5341
5321
"""
5342
5322
)
5343
5323
5344
- # TODO: option to specify limit per column; current implementation
5345
- # is one `limit` for all the columns. Might need refactoring, or an
5346
- # API change.
5347
- for column , direction in directions .items ():
5348
- if direction == "up" :
5349
- df .loc [:, column ] = df .loc [:, column ].bfill (limit = limit )
5350
- elif direction == "down" :
5351
- df .loc [:, column ] = df .loc [:, column ].ffill (limit = limit )
5352
- elif direction == "updown" :
5353
- df .loc [:, column ] = (
5354
- df .loc [:, column ].bfill (limit = limit ).ffill (limit = limit )
5355
- )
5356
- else : # downup
5357
- df .loc [:, column ] = (
5358
- df .loc [:, column ].ffill (limit = limit ).bfill (limit = limit )
5359
- )
5360
- return df
5324
+ check_column (df , kwargs )
5325
+
5326
+ new_values = {}
5327
+ for column_name , fill_type in kwargs .items ():
5328
+ direction = FILLTYPE [f"{ fill_type .upper ()} " ].value
5329
+ if len (direction ) == 1 :
5330
+ direction = methodcaller (direction [0 ])
5331
+ output = direction (df [column_name ])
5332
+ else :
5333
+ direction = [methodcaller (entry ) for entry in direction ]
5334
+ output = _chain_func (df [column_name ], * direction )
5335
+ new_values [column_name ] = output
5336
+
5337
+ return df .assign (** new_values )
5338
+
5339
+
5340
+ class FILLTYPE (Enum ):
5341
+ """List of fill types for fill_direction."""
5342
+
5343
+ UP = ("bfill" ,)
5344
+ DOWN = ("ffill" ,)
5345
+ UPDOWN = "bfill" , "ffill"
5346
+ DOWNUP = "ffill" , "bfill"
5347
+
5348
+
5349
+ def _chain_func (column : pd .Series , * funcs ):
5350
+ """
5351
+ Apply series of functions consecutively
5352
+ to a Series.
5353
+ https://blog.finxter.com/how-to-chain-multiple-function-calls-in-python/
5354
+ """
5355
+ new_value = column .copy ()
5356
+ for func in funcs :
5357
+ new_value = func (new_value )
5358
+ return new_value
5361
5359
5362
5360
5363
5361
@pf .register_dataframe_method
0 commit comments