pyjanitor-devs · ericmjl · Aug 19, 2021 · Apr 23, 2021 · May 24, 2021 · Jun 5, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,6 +15,7 @@
 -   [DOC] Delete Read the Docs project and remove all readthedocs.io references from the repo. Issue #863. @loganthomas
 -   [DOC] Updated various documentation sources to reflect pyjanitor-dev ownership. @loganthomas
 -   [INF] Fix `isort` automatic checks. Issue #845. @loganthomas
+-   [ENH] `complete` function now uses variable args (*args) - @samukweku
 -   [EHN] Set `expand_column`'s `sep` default is `"|"`, same to `pandas.Series.str.get_dummies`. Issue #876. @Zeroto521
 -   [ENH] Deprecate `limit` from fill_direction. fill_direction now uses kwargs. @samukweku
 

diff --git a/janitor/functions.py b/janitor/functions.py
@@ -1109,7 +1109,7 @@ def coalesce(
     if target_column_name is None:
         target_column_name = column_names[0]
     # bfill/ffill combo is faster than combine_first
-    outcome = df.filter(column_names).bfill(1).ffill(1).iloc[:, 0]
+    outcome = df.filter(column_names).bfill(axis=1).ffill(axis=1).iloc[:, 0]
     if outcome.hasnans and (default_value is not None):
         outcome = outcome.fillna(default_value)
     return df.assign(**{target_column_name: outcome})
@@ -5316,7 +5316,7 @@ def fill_direction(df: pd.DataFrame, **kwargs) -> pd.DataFrame:
     fill_types = {fill.name for fill in FILLTYPE}
     for column_name, fill_type in kwargs.items():
         check("column_name", column_name, [str])
-        check("fill_details", fill_type, [str])
+        check("fill_type", fill_type, [str])
         if fill_type.upper() not in fill_types:
             raise ValueError(
                 """
@@ -5486,7 +5486,7 @@ def groupby_topk(
 @pf.register_dataframe_method
 def complete(
     df: pd.DataFrame,
-    columns: List[Union[List, Tuple, Dict, str]] = None,
+    *columns,
     by: Optional[Union[list, str]] = None,
 ) -> pd.DataFrame:
     """
@@ -5513,11 +5513,10 @@ def complete(
         1	2	    2	        b	2	5
         2	1	    2	        b	3	6
 
-    To find all the unique combinations of `group`, `item_id`, and `item_name`,
-    including combinations not present in the data, each variable should be
-    passed in a list to the `columns` parameter::
+    Find all the unique combinations of `group`, `item_id`, and `item_name`,
+    including combinations not present in the data::
 
-        df.complete(columns = ['group', 'item_id', 'item_name'])
+        df.complete('group', 'item_id', 'item_name')
 
               group	item_id	    item_name	value1	value2
         0	1	    1	        a	1.0	4.0
@@ -5530,10 +5529,10 @@ def complete(
         7	2	    2	        b	2.0	5.0
 
     To expose just the missing values based only on the existing data,
-    `item_id` and `item_name` can be wrapped in a tuple, while `group`
-    is passed in as a separate variable::
+    `item_id` and `item_name` column names can be wrapped in a list/tuple,
+    while `group` is passed in as a separate variable::
 
-        df.complete(columns = ["group", ("item_id", "item_name")])
+        df.complete("group", ("item_id", "item_name"))
             group	item_id	    item_name	value1	   value2
         0	1	    1	        a	  1.0	    4.0
         1	1	    2	        b	  3.0	    6.0
@@ -5556,7 +5555,7 @@ def complete(
     Note that Year 2000 and Agarum pairing is missing. Let's make it
     explicit::
 
-        df.complete(columns = ['Year', 'Taxon'])
+        df.complete('Year', 'Taxon')
 
            Year      Taxon     Abundance
         0  1999     Agarum         1.0
@@ -5568,7 +5567,7 @@ def complete(
 
     The null value can be replaced with the Pandas `fillna` argument::
 
-        df.complete(columns = ['Year', 'Taxon']).fillna(0)
+        df.complete('Year', 'Taxon').fillna(0)
 
            Year      Taxon     Abundance
         0  1999     Agarum         1.0
@@ -5584,7 +5583,7 @@ def complete(
 
         new_year_values = lambda year: range(year.min(), year.max() + 1)
 
-        df.complete(columns = [{"Year": new_year_values}, "Taxon"])
+        df.complete({"Year": new_year_values}, "Taxon")
 
             Year       Taxon  Abundance
         0   1999      Agarum        1.0
@@ -5615,7 +5614,7 @@ def complete(
     Let's get all the missing years per state::
 
         df.complete(
-            columns = [{'year': new_year_values}],
+            {'year': new_year_values},
             by='state'
         )
 
@@ -5650,11 +5649,9 @@ def complete(
 
         df = jn.complete(
             df = df,
-            columns= [
-                column_label,
-                (column1, column2, ...),
-                {column1: new_values, ...}
-            ],
+            column_label,
+            (column1, column2, ...),
+            {column1: new_values, ...},
             by = label/list_of_labels
         )
 
@@ -5664,26 +5661,24 @@ def complete(
 
         df = (
             pd.DataFrame(...)
-            .complete(columns=[
+            .complete(
                 column_label,
                 (column1, column2, ...),
                 {column1: new_values, ...},
-            ],
-            by = label/list_of_labels
-        )
+                by = label/list_of_labels
+            )
 
     :param df: A pandas dataframe.
-    :param columns: This is a list containing the columns to be
+    :param *columns: This is a sequence containing the columns to be
         completed. It could be column labels (string type),
         a list/tuple of column labels, or a dictionary that pairs
         column labels with new values.
     :param by: label or list of labels to group by.
         The explicit missing values are returned per group.
     :returns: A pandas dataframe with modified column(s).
-    :raises TypeError: if `columns` is not a list.
-    :raises ValueError: if entry in `columns` is not a
+    :raises ValueError: if entry in `*columns` is not a
         str/dict/list/tuple.
-    :raises ValueError: if entry in `columns` is a dict/list/tuple
+    :raises ValueError: if entry in `*columns` is a dict/list/tuple
         and is empty.
 
     .. # noqa: DAR402

diff --git a/janitor/utils.py b/janitor/utils.py
@@ -695,8 +695,6 @@ def _data_checks_complete(
             """
         )
 
-    check("columns", columns, [list])
-
     columns = [
         list(grouping) if isinstance(grouping, tuple) else grouping
         for grouping in columns
@@ -807,6 +805,8 @@ def _base_complete(
     unique_index = df_index.is_unique
     columns_to_stack = None
 
+    # stack...unstack implemented here if conditions are met
+    # usually faster than reindex
     if all_strings and (not any_nulls) and (len(columns) > 1) and unique_index:
         if df_empty:
             df["dummy"] = 1
@@ -826,7 +826,7 @@ def _base_complete(
             indexer = df_index.union(indexer, sort=None)
         df = df.reindex(indexer)
 
-    else:
+    else:  # reindex not possible on duplicate indices
         df = df.join(pd.DataFrame([], index=indexer), how="outer")
 
     return df

diff --git a/tests/functions/test_complete.py b/tests/functions/test_complete.py
@@ -32,18 +32,16 @@ def test_MultiIndex_column(df1):
     df = df1
     df.columns = [["A", "B", "C"], list(df.columns)]
     with pytest.raises(ValueError):
-        df1.complete(["Year", "Taxon"])
+        df1.complete("Year", "Taxon")
 
 
 def test_column_duplicated(df1):
     """Raise ValueError if column is duplicated in `columns`"""
     with pytest.raises(ValueError):
         df1.complete(
-            columns=[
-                "Year",
-                "Taxon",
-                {"Year": lambda x: range(x.min().x.max() + 1)},
-            ]
+            "Year",
+            "Taxon",
+            {"Year": lambda x: range(x.min().x.max() + 1)},
         )
 
 
@@ -57,14 +55,14 @@ def test_type_columns(df1):
 def test_fill_value_is_a_dict(df1):
     """Raise error if fill_value is not a dictionary"""
     with pytest.raises(TypeError):
-        df1.complete(columns=["Year", "Taxon"])
+        df1.complete("Year", "Taxon")
 
 
 @pytest.mark.xfail(reason="fill_value dropped. fillna better.")
 def test_wrong_column_fill_value(df1):
     """Raise ValueError if column in `fill_value` does not exist."""
     with pytest.raises(ValueError):
-        df1.complete(columns=["Taxon", "Year"])
+        df1.complete("Taxon", "Year")
 
 
 def test_wrong_data_type_dict(df1):
@@ -73,7 +71,7 @@ def test_wrong_data_type_dict(df1):
     is not a 1-dimensional object.
     """
     with pytest.raises(ValueError):
-        df1.complete(columns=[{"Year": pd.DataFrame([2005, 2006, 2007])}])
+        df1.complete({"Year": pd.DataFrame([2005, 2006, 2007])})
 
 
 def test_not_list_like_type_dict(df1):
@@ -82,7 +80,7 @@ def test_not_list_like_type_dict(df1):
     is not a list-like object.
     """
     with pytest.raises(ValueError):
-        df1.complete(columns=[{"Year": "2001, 2002, 2003"}])
+        df1.complete({"Year": "2001, 2002, 2003"})
 
 
 def test_MultiIndex_type_dict(df1):
@@ -92,9 +90,7 @@ def test_MultiIndex_type_dict(df1):
     """
     with pytest.raises(ValueError):
         df1.complete(
-            columns=[
-                {"Year": pd.MultiIndex.from_tuples([(1, 2001), (2, 2002)])}
-            ]
+            {"Year": pd.MultiIndex.from_tuples([(1, 2001), (2, 2002)])}
         )
 
 
@@ -104,7 +100,7 @@ def test_empty_type_dict(df1):
     is empty.
     """
     with pytest.raises(ValueError):
-        df1.complete(columns=[{"Year": pd.Index([])}])
+        df1.complete({"Year": pd.Index([])})
 
 
 frame = pd.DataFrame(
@@ -138,14 +134,14 @@ def test_empty_type_dict(df1):
 def test_wrong_columns(frame, wrong_columns):
     """Test that ValueError is raised if wrong column is supplied."""
     with pytest.raises(ValueError):
-        frame.complete(columns=wrong_columns)
+        frame.complete(*wrong_columns)
 
 
 @pytest.mark.parametrize("frame,empty_sub_cols", empty_sub_columns)
 def test_empty_subcols(frame, empty_sub_cols):
     """Raise ValueError for an empty group in columns"""
     with pytest.raises(ValueError):
-        frame.complete(columns=empty_sub_cols)
+        frame.complete(*empty_sub_cols)
 
 
 @pytest.mark.xfail(reason="fill_value dropped. fillna is better.")
@@ -166,7 +162,7 @@ def test_fill_value(df1):
         }
     )
 
-    result = df1.complete(columns=["Year", "Taxon"]).fillna({"Abundance": 0})
+    result = df1.complete("Year", "Taxon").fillna({"Abundance": 0})
     assert_frame_equal(result, output1)
 
 
@@ -214,7 +210,7 @@ def test_fill_value_all_years(df1, df1_output):
     """
 
     result = df1.complete(
-        columns=[{"Year": lambda x: range(x.min(), x.max() + 1)}, "Taxon"]
+        {"Year": lambda x: range(x.min(), x.max() + 1)}, "Taxon"
     ).fillna(0)
     assert_frame_equal(result, df1_output)
 
@@ -226,10 +222,8 @@ def test_dict_series(df1, df1_output):
     """
 
     result = df1.complete(
-        columns=[
-            {"Year": lambda x: pd.Series(range(x.min(), x.max() + 1))},
-            "Taxon",
-        ]
+        {"Year": lambda x: pd.Series(range(x.min(), x.max() + 1))},
+        "Taxon",
     ).fillna(0)
     assert_frame_equal(result, df1_output)
 
@@ -241,14 +235,12 @@ def test_dict_series_duplicates(df1, df1_output):
     """
 
     result = df1.complete(
-        columns=[
-            {
-                "Year": pd.Series(
-                    [1999, 2000, 2000, 2001, 2002, 2002, 2002, 2003, 2004]
-                )
-            },
-            "Taxon",
-        ]
+        {
+            "Year": pd.Series(
+                [1999, 2000, 2000, 2001, 2002, 2002, 2002, 2003, 2004]
+            )
+        },
+        "Taxon",
     ).fillna(0)
     assert_frame_equal(result, df1_output)
 
@@ -261,7 +253,7 @@ def test_dict_values_outside_range(df1):
     in the dictionary's values.
     """
     result = df1.complete(
-        columns=[("Taxon", "Abundance"), {"Year": np.arange(2005, 2007)}]
+        ("Taxon", "Abundance"), {"Year": np.arange(2005, 2007)}
     )
     expected = pd.DataFrame(
         [
@@ -550,7 +542,7 @@ def test_dict_values_outside_range(df1):
 @pytest.mark.parametrize("df,columns,output", complete_parameters)
 def test_complete(df, columns, output):
     "Test the complete function, with and without groupings."
-    assert_frame_equal(df.complete(columns), output)
+    assert_frame_equal(df.complete(*columns), output)
 
 
 # https://stackoverflow.com/questions/32874239/
@@ -591,7 +583,7 @@ def test_grouping_first_columns():
             "choice": [5, 6, 7, 5, 6, 7, 5, 6, 7],
         }
     )
-    result = df2.complete(columns=[("id", "c", "d"), "choice"])
+    result = df2.complete(("id", "c", "d"), "choice")
     assert_frame_equal(result, output2)
 
 
@@ -619,7 +611,8 @@ def test_complete_multiple_groupings():
     )
 
     result = df3.complete(
-        columns=[("meta", "domain1"), ("project_id", "question_count")],
+        ("meta", "domain1"),
+        ("project_id", "question_count"),
     ).fillna({"tag_count": 0})
     assert_frame_equal(result, output3)
 
@@ -669,10 +662,8 @@ def test_dict_tuple(df1, output_dict_tuple):
     """
 
     result = df1.complete(
-        columns=[
-            {"Year": lambda x: range(x.min(), x.max() + 1)},
-            ("Taxon", "Abundance"),
-        ]
+        {"Year": lambda x: range(x.min(), x.max() + 1)},
+        ("Taxon", "Abundance"),
     )
 
     assert_frame_equal(result, output_dict_tuple)
@@ -689,7 +680,7 @@ def test_complete_groupby():
     )
 
     result = df.complete(
-        columns=[{"year": lambda x: range(x.min(), x.max() + 1)}],
+        {"year": lambda x: range(x.min(), x.max() + 1)},
         by="state",
     )