Skip to content

Commit b87257e

Browse files
authored
Merge pull request #11 from JoshuaC3/clean_names-multiindex
Clean names multiindex
2 parents 12ec1a1 + b401d8b commit b87257e

File tree

2 files changed

+69
-20
lines changed

2 files changed

+69
-20
lines changed

janitor/functions.py

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -31,26 +31,22 @@ def clean_names(df):
3131
:param df: The pandas DataFrame object.
3232
:returns: A pandas DataFrame.
3333
"""
34-
columns = [(c.lower()
35-
.replace(' ', '_')
36-
.replace('/', '_')
37-
.replace(':', '_')
38-
.replace("'", '')
39-
.replace('’', '')
40-
.replace(',', '_')
41-
.replace('?', '_')
42-
.replace('-', '_')
43-
.replace('(', '_')
44-
.replace(')', '_')
45-
.replace('.', '_')
46-
) for c in df.columns]
47-
48-
newcolumns = []
49-
for col in columns:
50-
# NOTE: Replace repeating underscores with single ones
51-
newcol = re.sub('[_]{2,}', '_', col)
52-
newcolumns.append(newcol)
53-
df.columns = newcolumns
34+
df = df.rename(
35+
columns=lambda x: x.lower()
36+
.replace(' ', '_')
37+
.replace('/', '_')
38+
.replace(':', '_')
39+
.replace("'", '')
40+
.replace('’', '')
41+
.replace(',', '_')
42+
.replace('?', '_')
43+
.replace('-', '_')
44+
.replace('(', '_')
45+
.replace(')', '_')
46+
.replace('.', '_')
47+
)
48+
49+
df = df.rename(columns=lambda x: re.sub('_+', '_', x))
5450
return df
5551

5652

tests/test_functions.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,17 @@ def null_df():
2626
return df
2727

2828

29+
@pytest.fixture
30+
def multiindex_dataframe():
31+
data = {
32+
('a', 'b'): [1, 2, 3],
33+
('Bell__Chart', 'Normal Distribution'): [1, 2, 3],
34+
('decorated-elephant', "r.i.p-rhino :'("): [1, 2, 3],
35+
}
36+
df = pd.DataFrame(data)
37+
return df
38+
39+
2940
def test_clean_names_functional(dataframe):
3041
df = clean_names(dataframe)
3142
expected_columns = ['a', 'bell_chart', 'decorated_elephant']
@@ -106,3 +117,45 @@ def test_convert_excel_date():
106117
def test_fill_empty(null_df):
107118
df = jn.DataFrame(null_df).fill_empty(columns=['2'], value=3)
108119
assert set(df.loc[:, '2']) == set([3])
120+
121+
122+
def test_multiindex_clean_names_functional(multiindex_dataframe):
123+
df = clean_names(multiindex_dataframe)
124+
125+
levels = [
126+
['a', 'bell_chart', 'decorated_elephant'],
127+
['b', 'normal_distribution', 'r_i_p_rhino_']
128+
]
129+
130+
labels = [[1, 0, 2], [1, 0, 2]]
131+
132+
expected_columns = pd.MultiIndex(levels=levels, labels=labels)
133+
assert set(df.columns) == set(expected_columns)
134+
135+
136+
def test_multiindex_clean_names_method_chain(multiindex_dataframe):
137+
df = jn.DataFrame(multiindex_dataframe).clean_names()
138+
139+
levels = [
140+
['a', 'bell_chart', 'decorated_elephant'],
141+
['b', 'normal_distribution', 'r_i_p_rhino_']
142+
]
143+
144+
labels = [[0, 1, 2], [0, 1, 2]]
145+
146+
expected_columns = pd.MultiIndex(levels=levels, labels=labels)
147+
assert set(df.columns) == set(expected_columns)
148+
149+
150+
def test_multiindex_clean_names_pipe(multiindex_dataframe):
151+
df = multiindex_dataframe.pipe(clean_names)
152+
153+
levels = [
154+
['a', 'bell_chart', 'decorated_elephant'],
155+
['b', 'normal_distribution', 'r_i_p_rhino_']
156+
]
157+
158+
labels = [[0, 1, 2], [0, 1, 2]]
159+
160+
expected_columns = pd.MultiIndex(levels=levels, labels=labels)
161+
assert set(df.columns) == set(expected_columns)

0 commit comments

Comments
 (0)