From b0b3d3244f3ecd6561fdfbc894210963c390288f Mon Sep 17 00:00:00 2001 From: Shan Dou Date: Fri, 19 Jul 2019 23:20:22 -0700 Subject: [PATCH 01/11] add and pass test for `perserve_position` kwarg --- janitor/functions.py | 35 +++++++++++++++----- tests/functions/test_deconcatenate_column.py | 29 +++++++++++++--- 2 files changed, 51 insertions(+), 13 deletions(-) diff --git a/janitor/functions.py b/janitor/functions.py index 769ea3164..ced38a0c1 100644 --- a/janitor/functions.py +++ b/janitor/functions.py @@ -910,6 +910,7 @@ def deconcatenate_column( column_name, new_column_names: Union[str, Iterable[str], Any], sep: str, + preserve_position: bool = False, ) -> pd.DataFrame: """ De-concatenates a single column into multiple columns. @@ -919,40 +920,58 @@ def deconcatenate_column( Used to quickly split columns out of a single column. This method does not mutate the original DataFrame. + + When `preserve_position=True`, `new_column_names` replaces original + `column_name` and preserves the column order (`column_name` is dropped); + otherwise, `new_column_names` is appended to the right of the dataframe Functional usage example: .. code-block:: python - df = deconcatenate_columns(df, - column_name='id', - new_column_names=['col1', 'col2'], - sep='-') + df = deconcatenate_column( + df, column_name='id', new_column_names=['col1', 'col2'], + sep='-', preserve_position=True + ) Method chaining example: .. code-block:: python df = (pd.DataFrame(...). - deconcatenate_columns(column_name='id', - new_column_names=['col1', 'col2'], - sep='-')) + deconcatenate_column( + column_name='id', new_column_names=['col1', 'col2'], + sep='-', preserve_position=True + )) + # When `preserve_position=True` + # df.columns change from [... id ...] into [... col1, col2, ...] + # When `preserve_position=False` (default) + # df.columns change from [... id ...] into [... id ... col1, col2] :param df: A pandas DataFrame. :param column_name: The column to split. :param new_column_names: A list of new column names post-splitting. :param sep: The separator delimiting the column's data. + :param preserve_position: Boolean for whether or not to preserve original + position of the column upon de-concatenation, default to False :returns: A pandas DataFrame with a deconcatenated column. """ assert ( column_name in df.columns ), f"column name {column_name} not present in dataframe" # noqa: E501 + cols = list(df.columns) + index_original = cols.index(column_name) deconcat = df[column_name].str.split(sep, expand=True) assert ( len(new_column_names) == deconcat.shape[1] ), "number of new column names not correct." deconcat.columns = new_column_names - return df.join(deconcat) + df = pd.concat([df, deconcat], axis=1) + if preserve_position: + for i, col_new in enumerate(new_column_names): + cols.insert(index_original + i, col_new) + df = df[cols].drop(columns=column_name) + return df @pf.register_dataframe_method diff --git a/tests/functions/test_deconcatenate_column.py b/tests/functions/test_deconcatenate_column.py index 326d28ae7..364f4126d 100644 --- a/tests/functions/test_deconcatenate_column.py +++ b/tests/functions/test_deconcatenate_column.py @@ -3,13 +3,32 @@ @pytest.mark.functions def test_deconcatenate_column(dataframe): - df = dataframe.concatenate_columns( + df_orig = dataframe.concatenate_columns( column_names=["a", "decorated-elephant"], sep="-", new_column_name="index", ) - df = df.deconcatenate_column( - column_name="index", new_column_names=["A", "B"], sep="-" + index_original = list(df_orig.columns).index("index") + index_next = index_original + 1 + df = df_orig.deconcatenate_column( + column_name="index", + new_column_names=["col1", "col2"], + sep="-", + preserve_position=False, + ) + assert "col1" in df.columns + assert "col2" in df.columns + # Test for `preserve_position` kwarg + df = df_orig.deconcatenate_column( + column_name="index", + new_column_names=["col1", "col2"], + sep="-", + preserve_position=True, + ) + assert "index" not in df.columns, "column_name not dropped" + assert list(df.columns).index("col1") == index_original, ( + "Position not preserved" + ) + assert list(df.columns).index("col2") == index_next, ( + "Position not preserved" ) - assert "A" in df.columns - assert "B" in df.columns From 57dd52a56e2d92ac899ed6f06ec39343ae5bc580 Mon Sep 17 00:00:00 2001 From: Shan Dou Date: Fri, 19 Jul 2019 23:28:29 -0700 Subject: [PATCH 02/11] fix formatting issue of media franchise example notebook --- examples/notebooks/medium_franchise.ipynb | 78 +++++++++++------------ 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/examples/notebooks/medium_franchise.ipynb b/examples/notebooks/medium_franchise.ipynb index 1d5a67eec..a6ad415fa 100644 --- a/examples/notebooks/medium_franchise.ipynb +++ b/examples/notebooks/medium_franchise.ipynb @@ -7,7 +7,7 @@ "# Tidy Up Web-Scraped Media Franchise Data\n", "\n", "## Background\n", - "This example combines functionalities of [pyjanitor](https://anaconda.org/conda-forge/pyjanitor) and [pandas-flavor](https://anaconda.org/conda-forge/pandas-flavor) to showcase an explicit--and thus reproducible--workflow enabled by dataframe __*method chaining*__.\n", + "This example combines functionalities of [pyjanitor](https://anaconda.org/conda-forge/pyjanitor) and [pandas-flavor](https://anaconda.org/conda-forge/pandas-flavor) to showcase an explicit--and thus reproducible--workflow enabled by dataframe __method chaining__.\n", "\n", "The data cleaning workflow largely follows the [R example](https://github.com/rfordatascience/tidytuesday/blob/master/data/2019/2019-07-02/revenue.R) from [the tidytuesday project](https://github.com/rfordatascience/tidytuesday). The raw data is scraped from [Wikipedia page](https://en.wikipedia.org/wiki/List_of_highest-grossing_media_franchises) titled \"*List of highest-grossing media franchises*\". The workflow is presented both in multi-step (section1) and in one-shot (section 2) fashions.\n", "\n", @@ -44,7 +44,7 @@ "\n", "## Python implementation\n", "\n", - "### Prepration" + "### Preparation" ] }, { @@ -52,8 +52,8 @@ "execution_count": 1, "metadata": { "ExecuteTime": { - "end_time": "2019-07-19T04:34:11.833664Z", - "start_time": "2019-07-19T04:34:11.093511Z" + "end_time": "2019-07-20T06:26:58.073741Z", + "start_time": "2019-07-20T06:26:57.348190Z" } }, "outputs": [], @@ -70,8 +70,8 @@ "execution_count": 2, "metadata": { "ExecuteTime": { - "end_time": "2019-07-19T04:34:11.838545Z", - "start_time": "2019-07-19T04:34:11.835963Z" + "end_time": "2019-07-20T06:26:58.078657Z", + "start_time": "2019-07-20T06:26:58.076034Z" } }, "outputs": [], @@ -105,8 +105,8 @@ "execution_count": 3, "metadata": { "ExecuteTime": { - "end_time": "2019-07-19T04:34:12.346107Z", - "start_time": "2019-07-19T04:34:11.840801Z" + "end_time": "2019-07-20T06:26:58.476918Z", + "start_time": "2019-07-20T06:26:58.081346Z" } }, "outputs": [ @@ -214,11 +214,11 @@ "source": [ "#### Rename columns\n", "R snippet:\n", - ">```R\n", - ">clean_money <- df %>% \n", - "> set_names(nm = c(\"franchise\", \"year_created\", \"total_revenue\", \"revenue_items\",\n", - "> \"original_media\", \"creators\", \"owners\"))\n", - ">```" + "```R\n", + "clean_money <- df %>% \n", + " set_names(nm = c(\"franchise\", \"year_created\", \"total_revenue\", \"revenue_items\",\n", + " \"original_media\", \"creators\", \"owners\"))\n", + "```" ] }, { @@ -226,8 +226,8 @@ "execution_count": 4, "metadata": { "ExecuteTime": { - "end_time": "2019-07-19T04:34:12.354447Z", - "start_time": "2019-07-19T04:34:12.348171Z" + "end_time": "2019-07-20T06:26:58.487280Z", + "start_time": "2019-07-20T06:26:58.479897Z" } }, "outputs": [], @@ -314,8 +314,8 @@ "execution_count": 5, "metadata": { "ExecuteTime": { - "end_time": "2019-07-19T04:34:12.369814Z", - "start_time": "2019-07-19T04:34:12.357413Z" + "end_time": "2019-07-20T06:26:58.500590Z", + "start_time": "2019-07-20T06:26:58.488818Z" } }, "outputs": [ @@ -443,8 +443,8 @@ "execution_count": 6, "metadata": { "ExecuteTime": { - "end_time": "2019-07-19T04:34:12.386894Z", - "start_time": "2019-07-19T04:34:12.371620Z" + "end_time": "2019-07-20T06:26:58.517156Z", + "start_time": "2019-07-20T06:26:58.502408Z" } }, "outputs": [ @@ -574,8 +574,8 @@ "execution_count": 7, "metadata": { "ExecuteTime": { - "end_time": "2019-07-19T04:34:12.397654Z", - "start_time": "2019-07-19T04:34:12.389122Z" + "end_time": "2019-07-20T06:26:58.528543Z", + "start_time": "2019-07-20T06:26:58.519225Z" } }, "outputs": [], @@ -656,8 +656,8 @@ "execution_count": 8, "metadata": { "ExecuteTime": { - "end_time": "2019-07-19T04:34:12.467859Z", - "start_time": "2019-07-19T04:34:12.399969Z" + "end_time": "2019-07-20T06:26:58.565078Z", + "start_time": "2019-07-20T06:26:58.531203Z" } }, "outputs": [ @@ -793,8 +793,8 @@ "execution_count": 9, "metadata": { "ExecuteTime": { - "end_time": "2019-07-19T04:34:12.474179Z", - "start_time": "2019-07-19T04:34:12.470242Z" + "end_time": "2019-07-20T06:26:58.573802Z", + "start_time": "2019-07-20T06:26:58.567195Z" } }, "outputs": [], @@ -833,8 +833,8 @@ "execution_count": 10, "metadata": { "ExecuteTime": { - "end_time": "2019-07-19T04:34:12.509476Z", - "start_time": "2019-07-19T04:34:12.475892Z" + "end_time": "2019-07-20T06:26:58.606774Z", + "start_time": "2019-07-20T06:26:58.576010Z" } }, "outputs": [ @@ -975,8 +975,8 @@ "execution_count": 11, "metadata": { "ExecuteTime": { - "end_time": "2019-07-19T04:34:12.516987Z", - "start_time": "2019-07-19T04:34:12.511117Z" + "end_time": "2019-07-20T06:26:58.614835Z", + "start_time": "2019-07-20T06:26:58.608821Z" } }, "outputs": [], @@ -1055,8 +1055,8 @@ "execution_count": 12, "metadata": { "ExecuteTime": { - "end_time": "2019-07-19T04:34:12.536140Z", - "start_time": "2019-07-19T04:34:12.518663Z" + "end_time": "2019-07-20T06:26:58.636212Z", + "start_time": "2019-07-20T06:26:58.616698Z" } }, "outputs": [ @@ -1180,8 +1180,8 @@ "execution_count": 13, "metadata": { "ExecuteTime": { - "end_time": "2019-07-19T04:34:12.983936Z", - "start_time": "2019-07-19T04:34:12.537990Z" + "end_time": "2019-07-20T06:26:59.015550Z", + "start_time": "2019-07-20T06:26:58.638120Z" } }, "outputs": [], @@ -1249,8 +1249,8 @@ "execution_count": 14, "metadata": { "ExecuteTime": { - "end_time": "2019-07-19T04:34:13.003367Z", - "start_time": "2019-07-19T04:34:12.986028Z" + "end_time": "2019-07-20T06:26:59.038340Z", + "start_time": "2019-07-20T06:26:59.017594Z" } }, "outputs": [ @@ -1341,8 +1341,8 @@ "execution_count": 15, "metadata": { "ExecuteTime": { - "end_time": "2019-07-19T04:34:13.016011Z", - "start_time": "2019-07-19T04:34:13.005577Z" + "end_time": "2019-07-20T06:26:59.050321Z", + "start_time": "2019-07-20T06:26:59.041266Z" } }, "outputs": [ @@ -1436,8 +1436,8 @@ "execution_count": 16, "metadata": { "ExecuteTime": { - "end_time": "2019-07-19T04:34:13.037967Z", - "start_time": "2019-07-19T04:34:13.017671Z" + "end_time": "2019-07-20T06:26:59.071102Z", + "start_time": "2019-07-20T06:26:59.051957Z" } }, "outputs": [ From 95fe6c87ab3cf137fedd6aacb23141b1087ab12a Mon Sep 17 00:00:00 2001 From: Shan Dou Date: Fri, 19 Jul 2019 23:39:16 -0700 Subject: [PATCH 03/11] pip8 style changes upon make check --- janitor/functions.py | 6 +++--- tests/functions/test_deconcatenate_column.py | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/janitor/functions.py b/janitor/functions.py index ced38a0c1..c26e9ccaa 100644 --- a/janitor/functions.py +++ b/janitor/functions.py @@ -920,9 +920,9 @@ def deconcatenate_column( Used to quickly split columns out of a single column. This method does not mutate the original DataFrame. - - When `preserve_position=True`, `new_column_names` replaces original - `column_name` and preserves the column order (`column_name` is dropped); + + When `preserve_position=True`, `new_column_names` replaces original + `column_name` and preserves the column order (`column_name` is dropped); otherwise, `new_column_names` is appended to the right of the dataframe Functional usage example: diff --git a/tests/functions/test_deconcatenate_column.py b/tests/functions/test_deconcatenate_column.py index 364f4126d..d5f2a13e8 100644 --- a/tests/functions/test_deconcatenate_column.py +++ b/tests/functions/test_deconcatenate_column.py @@ -26,9 +26,9 @@ def test_deconcatenate_column(dataframe): preserve_position=True, ) assert "index" not in df.columns, "column_name not dropped" - assert list(df.columns).index("col1") == index_original, ( - "Position not preserved" - ) - assert list(df.columns).index("col2") == index_next, ( - "Position not preserved" - ) + assert ( + list(df.columns).index("col1") == index_original + ), "Position not preserved" + assert ( + list(df.columns).index("col2") == index_next + ), "Position not preserved" From 0d10ae74b1956a941d7abec10dbc67d7e40cddc6 Mon Sep 17 00:00:00 2001 From: Shan Dou Date: Sat, 20 Jul 2019 14:49:46 -0700 Subject: [PATCH 04/11] adjust docstring for `preserve_position` kwarg --- janitor/functions.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/janitor/functions.py b/janitor/functions.py index c26e9ccaa..0521f2d3b 100644 --- a/janitor/functions.py +++ b/janitor/functions.py @@ -918,13 +918,23 @@ def deconcatenate_column( This is the inverse of the `concatenate_columns` function. Used to quickly split columns out of a single column. + + The keyword argument `preserve_position` takes `True` or `False` boolean + that controls whether the `new_column_names` will take the original + position of the to-be-deconcatenated `column_name`: + + - When `preserve_position=False` (default), `df.columns` change from + `[..., column_name, ...]` to `[..., column_name, ..., new_column_names]`. + In other words, the deconcatenated new columns are appended to the right + of the original dataframe and the original `column_name` is NOT dropped. + - When `preserve_position=True`, `df.column` change from + `[..., column_name, ...]` to `[..., new_column_names, ...]`. + In other words, the deconcatenated new column will REPLACE the original + `column_name` at its original position, and `column_name` itself + is dropped. This method does not mutate the original DataFrame. - When `preserve_position=True`, `new_column_names` replaces original - `column_name` and preserves the column order (`column_name` is dropped); - otherwise, `new_column_names` is appended to the right of the dataframe - Functional usage example: .. code-block:: python @@ -943,10 +953,6 @@ def deconcatenate_column( column_name='id', new_column_names=['col1', 'col2'], sep='-', preserve_position=True )) - # When `preserve_position=True` - # df.columns change from [... id ...] into [... col1, col2, ...] - # When `preserve_position=False` (default) - # df.columns change from [... id ...] into [... id ... col1, col2] :param df: A pandas DataFrame. :param column_name: The column to split. From b8a5c2fd0a6066cb654e6c79019946306f5ff8c4 Mon Sep 17 00:00:00 2001 From: Shan Dou Date: Sat, 20 Jul 2019 15:23:03 -0700 Subject: [PATCH 05/11] improve locations of branch logic --- janitor/functions.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/janitor/functions.py b/janitor/functions.py index 0521f2d3b..520f3f978 100644 --- a/janitor/functions.py +++ b/janitor/functions.py @@ -965,18 +965,25 @@ def deconcatenate_column( assert ( column_name in df.columns ), f"column name {column_name} not present in dataframe" # noqa: E501 - cols = list(df.columns) - index_original = cols.index(column_name) deconcat = df[column_name].str.split(sep, expand=True) + if preserve_position: + # Keep a copy of the original dataframe + df_original = df.copy() assert ( len(new_column_names) == deconcat.shape[1] ), "number of new column names not correct." deconcat.columns = new_column_names df = pd.concat([df, deconcat], axis=1) if preserve_position: + cols = list(df_original.columns) + index_original = cols.index(column_name) for i, col_new in enumerate(new_column_names): cols.insert(index_original + i, col_new) df = df[cols].drop(columns=column_name) + assert ( + len(df.columns) == len(df_original.columns) + + len(new_column_names) - 1 + ), "number of columns after deconcatenation is incorrect" return df From 2858d163d791caa31c56d870dbbcafd9e0f71641 Mon Sep 17 00:00:00 2001 From: Shan Dou Date: Sat, 20 Jul 2019 15:24:02 -0700 Subject: [PATCH 06/11] revert to original test for `test_deconcatenate_column` --- tests/functions/test_deconcatenate_column.py | 23 +++----------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/tests/functions/test_deconcatenate_column.py b/tests/functions/test_deconcatenate_column.py index d5f2a13e8..b9f6bc101 100644 --- a/tests/functions/test_deconcatenate_column.py +++ b/tests/functions/test_deconcatenate_column.py @@ -8,27 +8,10 @@ def test_deconcatenate_column(dataframe): sep="-", new_column_name="index", ) - index_original = list(df_orig.columns).index("index") - index_next = index_original + 1 df = df_orig.deconcatenate_column( column_name="index", - new_column_names=["col1", "col2"], + new_column_names=["A", "B"], sep="-", - preserve_position=False, ) - assert "col1" in df.columns - assert "col2" in df.columns - # Test for `preserve_position` kwarg - df = df_orig.deconcatenate_column( - column_name="index", - new_column_names=["col1", "col2"], - sep="-", - preserve_position=True, - ) - assert "index" not in df.columns, "column_name not dropped" - assert ( - list(df.columns).index("col1") == index_original - ), "Position not preserved" - assert ( - list(df.columns).index("col2") == index_next - ), "Position not preserved" + assert "A" in df.columns + assert "B" in df.columns From ec5128e22d82dddffe81fdf338a29871d0e9784c Mon Sep 17 00:00:00 2001 From: Shan Dou Date: Sat, 20 Jul 2019 15:25:08 -0700 Subject: [PATCH 07/11] Set up separate test for `preserve_position=True` in `deconcatenate_column` --- ..._deconcatenate_column_preserve_position.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 tests/functions/test_deconcatenate_column_preserve_position.py diff --git a/tests/functions/test_deconcatenate_column_preserve_position.py b/tests/functions/test_deconcatenate_column_preserve_position.py new file mode 100644 index 000000000..389901a9b --- /dev/null +++ b/tests/functions/test_deconcatenate_column_preserve_position.py @@ -0,0 +1,29 @@ +import pytest + + +@pytest.mark.functions +def test_deconcatenate_column_preserve_position(dataframe): + df_original = dataframe.concatenate_columns( + column_names=["a", "decorated-elephant"], + sep="-", + new_column_name="index", + ) + index_original = list(df_original.columns).index("index") + df = df_original.deconcatenate_column( + column_name="index", + new_column_names=["col1", "col2"], + sep="-", + preserve_position=True, + ) + assert "index" not in df.columns, "column_name not dropped" + assert "col1" in df.columns, "new column not present" + assert "col2" in df.columns, "new column not present" + assert ( + len(df_original.columns) + 1 == len(df.columns) + ), 'Number of columns inconsistent' + assert ( + list(df.columns).index("col1") == index_original + ), "Position not preserved" + assert ( + list(df.columns).index("col2") == index_original + 1 + ), "Position not preserved" From c76dc69fa9b269f8317ff7637295a8dc332201d5 Mon Sep 17 00:00:00 2001 From: Shan Dou Date: Sat, 20 Jul 2019 15:27:42 -0700 Subject: [PATCH 08/11] change url to shandou contributions --- AUTHORS.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index f71b79928..7a1404dff 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -55,7 +55,7 @@ Contributors - `@loganthomas `_ | `contributions `_ - `@kulini `_ | `contributions `_ - `@dwgoltra `_ | `contributions `_ -- `@shandou `_ | `contributions `_ +- `@shandou `_ | `contributions `_ - `@samwalkow `_ | `contributions `_ - `@portc13 `_ | `contributions `_ - `@DSNortsev `_ | `contributions `_ From 22975adab5cdbba954ba85e2efd15f31b60d97fd Mon Sep 17 00:00:00 2001 From: Shan Dou Date: Sat, 20 Jul 2019 15:32:34 -0700 Subject: [PATCH 09/11] pip8 reformat upon `make check` --- janitor/functions.py | 10 +++++----- tests/functions/test_deconcatenate_column.py | 4 +--- .../test_deconcatenate_column_preserve_position.py | 6 +++--- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/janitor/functions.py b/janitor/functions.py index 520f3f978..d81c298fe 100644 --- a/janitor/functions.py +++ b/janitor/functions.py @@ -918,15 +918,15 @@ def deconcatenate_column( This is the inverse of the `concatenate_columns` function. Used to quickly split columns out of a single column. - + The keyword argument `preserve_position` takes `True` or `False` boolean that controls whether the `new_column_names` will take the original position of the to-be-deconcatenated `column_name`: - + - When `preserve_position=False` (default), `df.columns` change from `[..., column_name, ...]` to `[..., column_name, ..., new_column_names]`. In other words, the deconcatenated new columns are appended to the right - of the original dataframe and the original `column_name` is NOT dropped. + of the original dataframe and the original `column_name` is NOT dropped. - When `preserve_position=True`, `df.column` change from `[..., column_name, ...]` to `[..., new_column_names, ...]`. In other words, the deconcatenated new column will REPLACE the original @@ -981,8 +981,8 @@ def deconcatenate_column( cols.insert(index_original + i, col_new) df = df[cols].drop(columns=column_name) assert ( - len(df.columns) == len(df_original.columns) + - len(new_column_names) - 1 + len(df.columns) + == len(df_original.columns) + len(new_column_names) - 1 ), "number of columns after deconcatenation is incorrect" return df diff --git a/tests/functions/test_deconcatenate_column.py b/tests/functions/test_deconcatenate_column.py index b9f6bc101..9c90e59de 100644 --- a/tests/functions/test_deconcatenate_column.py +++ b/tests/functions/test_deconcatenate_column.py @@ -9,9 +9,7 @@ def test_deconcatenate_column(dataframe): new_column_name="index", ) df = df_orig.deconcatenate_column( - column_name="index", - new_column_names=["A", "B"], - sep="-", + column_name="index", new_column_names=["A", "B"], sep="-" ) assert "A" in df.columns assert "B" in df.columns diff --git a/tests/functions/test_deconcatenate_column_preserve_position.py b/tests/functions/test_deconcatenate_column_preserve_position.py index 389901a9b..35b720907 100644 --- a/tests/functions/test_deconcatenate_column_preserve_position.py +++ b/tests/functions/test_deconcatenate_column_preserve_position.py @@ -18,9 +18,9 @@ def test_deconcatenate_column_preserve_position(dataframe): assert "index" not in df.columns, "column_name not dropped" assert "col1" in df.columns, "new column not present" assert "col2" in df.columns, "new column not present" - assert ( - len(df_original.columns) + 1 == len(df.columns) - ), 'Number of columns inconsistent' + assert len(df_original.columns) + 1 == len( + df.columns + ), "Number of columns inconsistent" assert ( list(df.columns).index("col1") == index_original ), "Position not preserved" From 5ef0f6b9397a0b5e73b3b380efc022f0c445317b Mon Sep 17 00:00:00 2001 From: Shan Dou Date: Sat, 20 Jul 2019 15:36:46 -0700 Subject: [PATCH 10/11] change type annotation used for `new_column_names` --- janitor/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/janitor/functions.py b/janitor/functions.py index d81c298fe..0f2e5c6e8 100644 --- a/janitor/functions.py +++ b/janitor/functions.py @@ -908,7 +908,7 @@ def concatenate_columns( def deconcatenate_column( df: pd.DataFrame, column_name, - new_column_names: Union[str, Iterable[str], Any], + new_column_names: Union[List(str), Tuple(str)], sep: str, preserve_position: bool = False, ) -> pd.DataFrame: From 5c51cafa6eb02f955fed33a0caa19a6bf281fe2b Mon Sep 17 00:00:00 2001 From: Shan Dou Date: Sat, 20 Jul 2019 15:39:36 -0700 Subject: [PATCH 11/11] correct bracket used for List[str] and Tuple[str] type hinting --- janitor/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/janitor/functions.py b/janitor/functions.py index 0f2e5c6e8..8db6e2fc0 100644 --- a/janitor/functions.py +++ b/janitor/functions.py @@ -908,7 +908,7 @@ def concatenate_columns( def deconcatenate_column( df: pd.DataFrame, column_name, - new_column_names: Union[List(str), Tuple(str)], + new_column_names: Union[List[str], Tuple[str]], sep: str, preserve_position: bool = False, ) -> pd.DataFrame: