diff --git a/AUTHORS.rst b/AUTHORS.rst index c56067b2a..ed1a7b785 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -101,3 +101,4 @@ Contributors - `@MollyCroke `_ | `contributions `_ - `@ericclessantostv `_ | `contributions `_ - `@fireddd `_ | `contributions `_ +- `@Zeroto521 `_ | `contributions `_ diff --git a/CHANGELOG.md b/CHANGELOG.md index 89a60c5cf..da28862c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog ## [Unreleased] + - [DOC] Fix references and broken links in AUTHORS.rst. @loganthomas - [DOC] Updated Broken links in the README and contributing docs. @nvamsikrishna05 - [INF] Update pre-commit hooks and remove mutable references. Issue #844. @loganthomas @@ -14,6 +15,7 @@ - [DOC] Delete Read the Docs project and remove all readthedocs.io references from the repo. Issue #863. @loganthomas - [DOC] Updated various documentation sources to reflect pyjanitor-dev ownership. @loganthomas - [INF] Fix `isort` automatic checks. Issue #845. @loganthomas +- [EHN] Set `expand_column`'s `sep` default is `"|"`, same to `pandas.Series.str.get_dummies`. Issue #876. @Zeroto521 - [ENH] Deprecate `limit` from fill_direction. fill_direction now uses kwargs. @samukweku ## [v0.21.0] - 2021-07-16 diff --git a/janitor/functions.py b/janitor/functions.py index 6f2812037..6cbe42059 100644 --- a/janitor/functions.py +++ b/janitor/functions.py @@ -1292,7 +1292,10 @@ def _fill_empty(df, column_names, value=None): # noqa: F811 @pf.register_dataframe_method @deprecated_alias(column="column_name") def expand_column( - df: pd.DataFrame, column_name: Hashable, sep: str, concat: bool = True + df: pd.DataFrame, + column_name: Hashable, + sep: str = "|", + concat: bool = True, ) -> pd.DataFrame: """Expand a categorical column with multiple labels into dummy-coded columns. @@ -1319,7 +1322,8 @@ def expand_column( :param df: A pandas DataFrame. :param column_name: Which column to expand. - :param sep: The delimiter. Example delimiters include `|`, `, `, `,` etc. + :param sep: The delimiter, same to + :py:meth:`~pandas.Series.str.get_dummies`'s `sep`, default as `|`. :param concat: Whether to return the expanded column concatenated to the original dataframe (`concat=True`), or to return it standalone (`concat=False`). diff --git a/tests/functions/test_expand_column.py b/tests/functions/test_expand_column.py index 5f507a678..3e218762c 100644 --- a/tests/functions/test_expand_column.py +++ b/tests/functions/test_expand_column.py @@ -25,3 +25,17 @@ def test_expand_and_concat(): column_name="col1", sep=", ", concat=True ) assert df.shape[1] == 8 + + +@pytest.mark.functions +def test_sep_default_parameter(): + """Test that the default parameter is a pipe character `|`.""" + df = pd.DataFrame( + { + "col1": ["A|B", "B|C|D", "E|F", "A|E|F"], + "col2": [1, 2, 3, 4], + } + ) + result = df.expand_column("col1") + + assert result.shape[1] == 8