pyjanitor-devs · nvamsikrishna05 · Aug 19, 2021 · Apr 23, 2021 · May 24, 2021 · Jun 5, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,7 @@
 -   [DOC] Delete Read the Docs project and remove all readthedocs.io references from the repo. Issue #863. @loganthomas
 -   [DOC] Updated various documentation sources to reflect pyjanitor-dev ownership. @loganthomas
 -   [INF] Fix `isort` automatic checks. Issue #845. @loganthomas
+-   [ENH] Deprecate `limit` from fill_direction. fill_direction now uses kwargs. @samukweku
 
 ## [v0.21.0] - 2021-07-16
 

diff --git a/examples/notebooks/dirty_data.xlsx b/examples/notebooks/dirty_data.xlsx
diff --git a/examples/notebooks/inflating_converting_currency.ipynb b/examples/notebooks/inflating_converting_currency.ipynb
@@ -271,4 +271,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
diff --git a/janitor/functions.py b/janitor/functions.py
@@ -37,6 +37,8 @@
 from scipy.stats import mode
 
 from .errors import JanitorError
+from enum import Enum
+from operator import methodcaller
 from .utils import (
     _clean_accounting_column,
     _computations_as_categorical,
@@ -324,7 +326,6 @@ def clean_names(
         df = df.rename(columns=_strip_accents)
 
     df = df.rename(columns=lambda x: re.sub("_+", "_", x))  # noqa: PD005
-
     df = _strip_underscores(df, strip_underscores)
 
     df = df.rename(columns=lambda x: x[:truncate_limit])
@@ -337,7 +338,6 @@ def clean_names(
 
 def _change_case(col: str, case_type: str) -> str:
     """Change case of a column name."""
-
     case_types = ["preserve", "upper", "lower", "snake"]
     if case_type.lower() not in case_types:
         raise JanitorError(f"case_type must be one of: {case_types}")
@@ -355,7 +355,6 @@ def _change_case(col: str, case_type: str) -> str:
 
 def _remove_special(col_name: Hashable) -> str:
     """Remove special characters from column name."""
-
     return "".join(
         item for item in str(col_name) if item.isalnum() or "_" in item
     )
@@ -380,7 +379,6 @@ def _camel2snake(col_name: str) -> str:
 
 def _normalize_1(col_name: Hashable) -> str:
     """Perform normalization of column name."""
-
     result = str(col_name)
     for search, replace in FIXES:
         result = re.sub(search, replace, result)  # noqa: PD005
@@ -2523,7 +2521,7 @@ def row_to_names(
         df = df.drop(df.index[range(row_number)])
 
     if reset_index:
-        df.reset_index(drop=["index"], inplace=True)
+        df = df.reset_index(drop=["index"])
 
     return df
 
@@ -5210,72 +5208,63 @@ def process_text(
 
 
 @pf.register_dataframe_method
-def fill_direction(
-    df: pd.DataFrame,
-    directions: Dict[Hashable, str] = None,
-    limit: Optional[int] = None,
-) -> pd.DataFrame:
+def fill_direction(df: pd.DataFrame, **kwargs) -> pd.DataFrame:
     """
     Provide a method-chainable function for filling missing values
     in selected columns.
 
-    Missing values are filled using the next or previous entry.
-    The columns are paired with the directions in a dictionary.
-    It is a wrapper for ``pd.Series.ffill`` and ``pd.Series.bfill``.
+    It is a wrapper for ``pd.Series.ffill`` and ``pd.Series.bfill``,
+    and pairs the column name with one of `up`, `down`, `updown`,
+    and `downup`.
 
     .. code-block:: python
 
         import pandas as pd
-        import numpy as np
         import janitor as jn
 
-        df = pd.DataFrame({"text": ["ragnar", np.nan, "sammywemmy",
-                                    np.nan, "ginger"],
-                           "code" : [np.nan, 2, 3, np.nan, 5]})
-
         df
 
-           text          code
-        0 ragnar         NaN
-        1 NaN            2.0
-        2 sammywemmy     3.0
-        3 NaN            NaN
-        4 ginger         5.0
+                 text  code
+        0      ragnar   NaN
+        1         NaN   2.0
+        2  sammywemmy   3.0
+        3         NaN   NaN
+        4      ginger   5.0
 
 
 
     Fill on a single column::
 
-        df.fill_direction({"text" : "up"})
+        df.fill_direction(code = 'up')
 
-           text          code
-        0 ragnar         NaN
-        1 sammywemmy     2.0
-        2 sammywemmy     3.0
-        3 ginger         NaN
-        4 ginger         5.0
+                 text  code
+        0      ragnar   2.0
+        1         NaN   2.0
+        2  sammywemmy   3.0
+        3         NaN   5.0
+        4      ginger   5.0
 
     Fill on multiple columns::
 
-        df.fill_direction({"text" : "down", "code" : "down"})
+        df.fill_direction(text = 'down', code = 'down')
 
-           text          code
-        0 ragnar         NaN
-        1 ragnar         2.0
-        2 sammywemmy     3.0
-        3 sammywemmy     3.0
-        4 ginger         5.0
+                 text  code
+        0      ragnar   NaN
+        1      ragnar   2.0
+        2  sammywemmy   3.0
+        3  sammywemmy   3.0
+        4      ginger   5.0
 
     Fill multiple columns in different directions::
 
-        df.fill_direction({"text" : "up", "code" : "down"})
+        df.fill_direction(text = 'up', code = 'down')
 
-           text          code
-        0 ragnar         NaN
-        1 sammywemmy     2.0
-        2 sammywemmy     3.0
-        3 ginger         3.0
-        4 ginger         5.0
+                 text  code
+        0      ragnar   NaN
+        1  sammywemmy   2.0
+        2  sammywemmy   3.0
+        3      ginger   3.0
+        4      ginger   5.0
 
     Functional usage syntax:
 
@@ -5286,12 +5275,10 @@ def fill_direction(
 
         df = pd.DataFrame(...)
         df = jn.fill_direction(
-            df = df,
-            directions = {column_1 : direction_1,
-                          column_2 : direction_2,
-                          ...},
-            limit = None # limit must be None or greater than 0
-            )
+                    df = df,
+                    column_1 = direction_1,
+                    column_2 = direction_2,
+                )
 
     Method-chaining usage syntax:
 
@@ -5300,68 +5287,75 @@ def fill_direction(
         import pandas as pd
         import janitor as jn
 
-        df = (
-            pd.DataFrame(...)
-            .fill_direction(
-            directions = {column_1 : direction_1,
-                          column_2 : direction_2,
-                          ...},
-            limit = None # limit must be None or greater than 0
-            )
-        )
+        df = pd.DataFrame(...)
+               .fill_direction(
+                    column_1 = direction_1,
+                    column_2 = direction_2,
+                )
+
 
     :param df: A pandas dataframe.
-    :param directions: Key - value pairs of columns and directions. Directions
-        can be either `down` (default), `up`, `updown` (fill up then down) and
+    :param kwargs: Key - value pairs of columns and directions. Directions
+        can be either `down`, `up`, `updown` (fill up then down) and
         `downup` (fill down then up).
-    :param limit: number of consecutive null values to forward/backward fill.
-        Value must `None` or greater than 0.
     :returns: A pandas dataframe with modified column(s).
     :raises ValueError: if column supplied is not in the dataframe.
     :raises ValueError: if direction supplied is not one of `down`, `up`,
         `updown`, or `downup`.
 
     .. # noqa: DAR402
     """
-    df = df.copy()
-    if not directions:
-        return df
-
-    check("directions", directions, [dict])
-
-    if limit is not None:
-        check("limit", limit, [int])
-        # pandas raises error if limit is not greater than zero
-        # so no need for a check on pyjanitor's end
 
-    check_column(df, directions)
+    if not kwargs:
+        return df
 
-    for _, direction in directions.items():
-        if direction not in {"up", "down", "updown", "downup"}:
+    fill_types = {fill.name for fill in FILLTYPE}
+    for column_name, fill_type in kwargs.items():
+        check("column_name", column_name, [str])
+        check("fill_details", fill_type, [str])
+        if fill_type.upper() not in fill_types:
             raise ValueError(
                 """
-                The direction should be a string and should be one of
-                `up`, `down`, `updown`, or `downup`.
+                fill_type should be one of
+                up, down, updown, or downup.
                 """
             )
 
-    # TODO: option to specify limit per column; current implementation
-    # is one `limit` for all the columns. Might need refactoring, or an
-    # API change.
-    for column, direction in directions.items():
-        if direction == "up":
-            df.loc[:, column] = df.loc[:, column].bfill(limit=limit)
-        elif direction == "down":
-            df.loc[:, column] = df.loc[:, column].ffill(limit=limit)
-        elif direction == "updown":
-            df.loc[:, column] = (
-                df.loc[:, column].bfill(limit=limit).ffill(limit=limit)
-            )
-        else:  # downup
-            df.loc[:, column] = (
-                df.loc[:, column].ffill(limit=limit).bfill(limit=limit)
-            )
-    return df
+    check_column(df, kwargs)
+
+    new_values = {}
+    for column_name, fill_type in kwargs.items():
+        direction = FILLTYPE[f"{fill_type.upper()}"].value
+        if len(direction) == 1:
+            direction = methodcaller(direction[0])
+            output = direction(df[column_name])
+        else:
+            direction = [methodcaller(entry) for entry in direction]
+            output = _chain_func(df[column_name], *direction)
+        new_values[column_name] = output
+
+    return df.assign(**new_values)
+
+
+class FILLTYPE(Enum):
+    """List of fill types for fill_direction."""
+
+    UP = ("bfill",)
+    DOWN = ("ffill",)
+    UPDOWN = "bfill", "ffill"
+    DOWNUP = "ffill", "bfill"
+
+
+def _chain_func(column: pd.Series, *funcs):
+    """
+    Apply series of functions consecutively
+    to a Series.
+    https://blog.finxter.com/how-to-chain-multiple-function-calls-in-python/
+    """
+    new_value = column.copy()
+    for func in funcs:
+        new_value = func(new_value)
+    return new_value
 
 
 @pf.register_dataframe_method
@@ -5617,7 +5611,6 @@ def complete(
     Let's get all the missing years per state::
 
         df.complete(
-
             columns = [{'year': new_year_values}],
             by='state'
         )

diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,14 @@
+[pytest]
+# always check coverage of janitor.
+addopts = --cov=janitor --cov-report term-missing --cov-report xml --durations=0
+markers =
+    functions: test for general functions
+    biology: tests for biology
+    chemistry: tests for chemistry
+    finance: tests for finance
+    utils: utility tests
+    engineering: tests for engineering
+    ml: tests for machine learning
+    spark_functions: tests for pyspark functions
+    xarray: tests for xarray functions
+    timeseries: tests for timeseries
diff --git a/tests/functions/test_clean_names.py b/tests/functions/test_clean_names.py
@@ -193,7 +193,7 @@ def test_charac():
 
     df = df.clean_names(strip_underscores=True, case_type="lower")
 
-    assert "current_accountbalance_in_%_of_gdp" in df.columns.values
+    assert "current_accountbalance_in_%_of_gdp" in df.columns
 
 
 @pytest.mark.functions
@@ -205,4 +205,4 @@ def test_space():
 
     df = df.clean_names(strip_underscores=True, case_type="lower")
 
-    assert ("in %" in df.columns.values) is False
+    assert ("in %" in df.columns) is False
diff --git a/tests/functions/test_convert_excel_date.py b/tests/functions/test_convert_excel_date.py
@@ -29,4 +29,4 @@ def test_convert_excel_date_with_string_data():
     ).clean_names()
 
     with pytest.raises(ValueError):
-        df.convert_excel_date("hire_date_str")
+        df.convert_excel_date("certification")
-Original file line number
+Diff line change
@@ Expand Up / @@ -271,4 +271,4 @@ @@
      },
      "nbformat": 4,
      "nbformat_minor": 4
-    }
+    }