pyjanitor-devs · ericmjl · Jul 21, 2019 · Jul 20, 2019 · Jul 20, 2019 · Jul 20, 2019
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -55,7 +55,7 @@ Contributors
 - `@loganthomas <https://github.com/loganthomas>`_ | `contributions <https://github.com/ericmjl/pyjanitor/pulls?utf8=%E2%9C%93&q=is%3Aclosed+mentions%3Aloganthomas>`_
 - `@kulini <https://github.com/kulini>`_ | `contributions <https://github.com/ericmjl/pyjanitor/pulls?utf8=%E2%9C%93&q=is%3Aclosed+kulini>`_
 - `@dwgoltra <https://github.com/dwgoltra>`_ | `contributions <https://github.com/ericmjl/pyjanitor/pulls?utf8=%E2%9C%93&q=is%3Aclosed+mentions%3Adwgoltra>`_
-- `@shandou <https://github.com/shandou>`_ | `contributions <https://github.com/ericmjl/pyjanitor/pulls?utf8=%E2%9C%93&q=is%3Aclosed+mentions%3Ashandou>`_
+- `@shandou <https://github.com/shandou>`_ | `contributions <https://github.com/ericmjl/pyjanitor/pulls?utf8=%E2%9C%93&q=is%3Apr+author%3Ashandou>`_
 - `@samwalkow <https://github.com/samwalkow>`_ | `contributions <https://github.com/ericmjl/pyjanitor/pulls?utf8=%E2%9C%93&q=is%3Aclosed+mentions%3Asamwalkow>`_
 - `@portc13 <https://github.com/portc13>`_ | `contributions <https://github.com/ericmjl/pyjanitor/pulls?utf8=%E2%9C%93&q=is%3Aclosed+mentions%3portc13>`_
 - `@DSNortsev <https://github.com/DSNortsev>`_ | `contributions <https://github.com/ericmjl/pyjanitor/pulls?utf8=%E2%9C%93&q=is%3Aclosed+mentions%3ADSNortsev>`_

diff --git a/examples/notebooks/medium_franchise.ipynb b/examples/notebooks/medium_franchise.ipynb
@@ -7,7 +7,7 @@
     "# Tidy Up Web-Scraped Media Franchise Data\n",
     "\n",
     "## Background\n",
-    "This example combines functionalities of [pyjanitor](https://anaconda.org/conda-forge/pyjanitor) and [pandas-flavor](https://anaconda.org/conda-forge/pandas-flavor) to showcase an explicit--and thus reproducible--workflow enabled by dataframe __*method chaining*__.\n",
+    "This example combines functionalities of [pyjanitor](https://anaconda.org/conda-forge/pyjanitor) and [pandas-flavor](https://anaconda.org/conda-forge/pandas-flavor) to showcase an explicit--and thus reproducible--workflow enabled by dataframe __method chaining__.\n",
     "\n",
     "The data cleaning workflow largely follows the [R example](https://github.com/rfordatascience/tidytuesday/blob/master/data/2019/2019-07-02/revenue.R) from [the tidytuesday project](https://github.com/rfordatascience/tidytuesday). The raw data is scraped from [Wikipedia page](https://en.wikipedia.org/wiki/List_of_highest-grossing_media_franchises) titled \"*List of highest-grossing media franchises*\". The workflow is presented both in multi-step (section1) and in one-shot (section 2) fashions.\n",
     "\n",
@@ -44,16 +44,16 @@
     "\n",
     "## Python implementation\n",
     "\n",
-    "### Prepration"
+    "### Preparation"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-19T04:34:11.833664Z",
-     "start_time": "2019-07-19T04:34:11.093511Z"
+     "end_time": "2019-07-20T06:26:58.073741Z",
+     "start_time": "2019-07-20T06:26:57.348190Z"
     }
    },
    "outputs": [],
@@ -70,8 +70,8 @@
    "execution_count": 2,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-19T04:34:11.838545Z",
-     "start_time": "2019-07-19T04:34:11.835963Z"
+     "end_time": "2019-07-20T06:26:58.078657Z",
+     "start_time": "2019-07-20T06:26:58.076034Z"
     }
    },
    "outputs": [],
@@ -105,8 +105,8 @@
    "execution_count": 3,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-19T04:34:12.346107Z",
-     "start_time": "2019-07-19T04:34:11.840801Z"
+     "end_time": "2019-07-20T06:26:58.476918Z",
+     "start_time": "2019-07-20T06:26:58.081346Z"
     }
    },
    "outputs": [
@@ -214,20 +214,20 @@
    "source": [
     "#### Rename columns\n",
     "R snippet:\n",
-    ">```R\n",
-    ">clean_money <- df %>% \n",
-    ">  set_names(nm = c(\"franchise\", \"year_created\", \"total_revenue\", \"revenue_items\",\n",
-    ">                   \"original_media\", \"creators\", \"owners\"))\n",
-    ">```"
+    "```R\n",
+    "clean_money <- df %>% \n",
+    "  set_names(nm = c(\"franchise\", \"year_created\", \"total_revenue\", \"revenue_items\",\n",
+    "                   \"original_media\", \"creators\", \"owners\"))\n",
+    "```"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 4,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-19T04:34:12.354447Z",
-     "start_time": "2019-07-19T04:34:12.348171Z"
+     "end_time": "2019-07-20T06:26:58.487280Z",
+     "start_time": "2019-07-20T06:26:58.479897Z"
     }
    },
    "outputs": [],
@@ -314,8 +314,8 @@
    "execution_count": 5,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-19T04:34:12.369814Z",
-     "start_time": "2019-07-19T04:34:12.357413Z"
+     "end_time": "2019-07-20T06:26:58.500590Z",
+     "start_time": "2019-07-20T06:26:58.488818Z"
     }
    },
    "outputs": [
@@ -443,8 +443,8 @@
    "execution_count": 6,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-19T04:34:12.386894Z",
-     "start_time": "2019-07-19T04:34:12.371620Z"
+     "end_time": "2019-07-20T06:26:58.517156Z",
+     "start_time": "2019-07-20T06:26:58.502408Z"
     }
    },
    "outputs": [
@@ -574,8 +574,8 @@
    "execution_count": 7,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-19T04:34:12.397654Z",
-     "start_time": "2019-07-19T04:34:12.389122Z"
+     "end_time": "2019-07-20T06:26:58.528543Z",
+     "start_time": "2019-07-20T06:26:58.519225Z"
     }
    },
    "outputs": [],
@@ -656,8 +656,8 @@
    "execution_count": 8,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-19T04:34:12.467859Z",
-     "start_time": "2019-07-19T04:34:12.399969Z"
+     "end_time": "2019-07-20T06:26:58.565078Z",
+     "start_time": "2019-07-20T06:26:58.531203Z"
     }
    },
    "outputs": [
@@ -793,8 +793,8 @@
    "execution_count": 9,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-19T04:34:12.474179Z",
-     "start_time": "2019-07-19T04:34:12.470242Z"
+     "end_time": "2019-07-20T06:26:58.573802Z",
+     "start_time": "2019-07-20T06:26:58.567195Z"
     }
    },
    "outputs": [],
@@ -833,8 +833,8 @@
    "execution_count": 10,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-19T04:34:12.509476Z",
-     "start_time": "2019-07-19T04:34:12.475892Z"
+     "end_time": "2019-07-20T06:26:58.606774Z",
+     "start_time": "2019-07-20T06:26:58.576010Z"
     }
    },
    "outputs": [
@@ -975,8 +975,8 @@
    "execution_count": 11,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-19T04:34:12.516987Z",
-     "start_time": "2019-07-19T04:34:12.511117Z"
+     "end_time": "2019-07-20T06:26:58.614835Z",
+     "start_time": "2019-07-20T06:26:58.608821Z"
     }
    },
    "outputs": [],
@@ -1055,8 +1055,8 @@
    "execution_count": 12,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-19T04:34:12.536140Z",
-     "start_time": "2019-07-19T04:34:12.518663Z"
+     "end_time": "2019-07-20T06:26:58.636212Z",
+     "start_time": "2019-07-20T06:26:58.616698Z"
     }
    },
    "outputs": [
@@ -1180,8 +1180,8 @@
    "execution_count": 13,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-19T04:34:12.983936Z",
-     "start_time": "2019-07-19T04:34:12.537990Z"
+     "end_time": "2019-07-20T06:26:59.015550Z",
+     "start_time": "2019-07-20T06:26:58.638120Z"
     }
    },
    "outputs": [],
@@ -1249,8 +1249,8 @@
    "execution_count": 14,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-19T04:34:13.003367Z",
-     "start_time": "2019-07-19T04:34:12.986028Z"
+     "end_time": "2019-07-20T06:26:59.038340Z",
+     "start_time": "2019-07-20T06:26:59.017594Z"
     }
    },
    "outputs": [
@@ -1341,8 +1341,8 @@
    "execution_count": 15,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-19T04:34:13.016011Z",
-     "start_time": "2019-07-19T04:34:13.005577Z"
+     "end_time": "2019-07-20T06:26:59.050321Z",
+     "start_time": "2019-07-20T06:26:59.041266Z"
     }
    },
    "outputs": [
@@ -1436,8 +1436,8 @@
    "execution_count": 16,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-07-19T04:34:13.037967Z",
-     "start_time": "2019-07-19T04:34:13.017671Z"
+     "end_time": "2019-07-20T06:26:59.071102Z",
+     "start_time": "2019-07-20T06:26:59.051957Z"
     }
    },
    "outputs": [

diff --git a/janitor/functions.py b/janitor/functions.py
@@ -908,8 +908,9 @@ def concatenate_columns(
 def deconcatenate_column(
     df: pd.DataFrame,
     column_name,
-    new_column_names: Union[str, Iterable[str], Any],
+    new_column_names: Union[List[str], Tuple[str]],
     sep: str,
+    preserve_position: bool = False,
 ) -> pd.DataFrame:
     """
     De-concatenates a single column into multiple columns.
@@ -918,41 +919,72 @@ def deconcatenate_column(
 
     Used to quickly split columns out of a single column.
 
+    The keyword argument `preserve_position` takes `True` or `False` boolean
+    that controls whether the `new_column_names` will take the original
+    position of the to-be-deconcatenated `column_name`:
+
+    - When `preserve_position=False` (default), `df.columns` change from
+      `[..., column_name, ...]` to `[..., column_name, ..., new_column_names]`.
+      In other words, the deconcatenated new columns are appended to the right
+      of the original dataframe and the original `column_name` is NOT dropped.
+    - When `preserve_position=True`, `df.column` change from
+      `[..., column_name, ...]` to `[..., new_column_names, ...]`.
+      In other words, the deconcatenated new column will REPLACE the original
+      `column_name` at its original position, and `column_name` itself
+      is dropped.
+
     This method does not mutate the original DataFrame.
 
     Functional usage example:
 
     .. code-block:: python
 
-        df = deconcatenate_columns(df,
-                                   column_name='id',
-                                   new_column_names=['col1', 'col2'],
-                                   sep='-')
+        df = deconcatenate_column(
+                df, column_name='id', new_column_names=['col1', 'col2'],
+                sep='-', preserve_position=True
+        )
 
     Method chaining example:
 
     .. code-block:: python
 
         df = (pd.DataFrame(...).
-              deconcatenate_columns(column_name='id',
-                                    new_column_names=['col1', 'col2'],
-                                    sep='-'))
+                deconcatenate_column(
+                    column_name='id', new_column_names=['col1', 'col2'],
+                    sep='-', preserve_position=True
+                ))
 
     :param df: A pandas DataFrame.
     :param column_name: The column to split.
     :param new_column_names: A list of new column names post-splitting.
     :param sep: The separator delimiting the column's data.
+    :param preserve_position: Boolean for whether or not to preserve original
+        position of the column upon de-concatenation, default to False
     :returns: A pandas DataFrame with a deconcatenated column.
     """
     assert (
         column_name in df.columns
     ), f"column name {column_name} not present in dataframe"  # noqa: E501
     deconcat = df[column_name].str.split(sep, expand=True)
+    if preserve_position:
+        # Keep a copy of the original dataframe
+        df_original = df.copy()
     assert (
         len(new_column_names) == deconcat.shape[1]
     ), "number of new column names not correct."
     deconcat.columns = new_column_names
-    return df.join(deconcat)
+    df = pd.concat([df, deconcat], axis=1)
+    if preserve_position:
+        cols = list(df_original.columns)
+        index_original = cols.index(column_name)
+        for i, col_new in enumerate(new_column_names):
+            cols.insert(index_original + i, col_new)
+        df = df[cols].drop(columns=column_name)
+        assert (
+            len(df.columns)
+            == len(df_original.columns) + len(new_column_names) - 1
+        ), "number of columns after deconcatenation is incorrect"
+    return df
 
 
 @pf.register_dataframe_method

diff --git a/tests/functions/test_deconcatenate_column.py b/tests/functions/test_deconcatenate_column.py
@@ -3,12 +3,12 @@
 
 @pytest.mark.functions
 def test_deconcatenate_column(dataframe):
-    df = dataframe.concatenate_columns(
+    df_orig = dataframe.concatenate_columns(
         column_names=["a", "decorated-elephant"],
         sep="-",
         new_column_name="index",
     )
-    df = df.deconcatenate_column(
+    df = df_orig.deconcatenate_column(
         column_name="index", new_column_names=["A", "B"], sep="-"
     )
     assert "A" in df.columns

diff --git a/tests/functions/test_deconcatenate_column_preserve_position.py b/tests/functions/test_deconcatenate_column_preserve_position.py
@@ -0,0 +1,29 @@
+import pytest
+
+
+@pytest.mark.functions
+def test_deconcatenate_column_preserve_position(dataframe):
+    df_original = dataframe.concatenate_columns(
+        column_names=["a", "decorated-elephant"],
+        sep="-",
+        new_column_name="index",
+    )
+    index_original = list(df_original.columns).index("index")
+    df = df_original.deconcatenate_column(
+        column_name="index",
+        new_column_names=["col1", "col2"],
+        sep="-",
+        preserve_position=True,
+    )
+    assert "index" not in df.columns, "column_name not dropped"
+    assert "col1" in df.columns, "new column not present"
+    assert "col2" in df.columns, "new column not present"
+    assert len(df_original.columns) + 1 == len(
+        df.columns
+    ), "Number of columns inconsistent"
+    assert (
+        list(df.columns).index("col1") == index_original
+    ), "Position not preserved"
+    assert (
+        list(df.columns).index("col2") == index_original + 1
+    ), "Position not preserved"