Skip to content

Commit e1f5577

Browse files
authored
Merge pull request #672 from dleen/master
Allow clean to keep some metadata keys
2 parents c8fec9d + d05b250 commit e1f5577

File tree

3 files changed

+183
-36
lines changed

3 files changed

+183
-36
lines changed

nbdev/clean.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -52,29 +52,37 @@ def _clean_cell_output(cell):
5252
o.get('metadata', {}).pop('tags', None)
5353

5454
# %% ../nbs/11_clean.ipynb 8
55-
def _clean_cell(cell, clear_all=False):
55+
def _clean_cell(cell, clear_all=False, allowed_metadata_keys=None):
5656
"Clean `cell` by removing superfluous metadata or everything except the input if `clear_all`"
5757
if 'execution_count' in cell: cell['execution_count'] = None
5858
if 'outputs' in cell:
5959
if clear_all: cell['outputs'] = []
6060
else: _clean_cell_output(cell)
6161
if cell['source'] == ['']: cell['source'] = []
6262
cell['metadata'] = {} if clear_all else {
63-
k:v for k,v in cell['metadata'].items() if k=="hide_input"}
63+
k:v for k,v in cell['metadata'].items() if k in allowed_metadata_keys}
6464

6565
# %% ../nbs/11_clean.ipynb 9
66-
def clean_nb(nb, clear_all=False):
66+
def clean_nb(
67+
nb, # The notebook to clean
68+
clear_all=False, # Remove all cell metadata and cell outputs
69+
allowed_metadata_keys:list=None, # Preserve the list of keys in the main notebook metadata
70+
allowed_cell_metadata_keys:list=None # Preserve the list of keys in cell level metadata
71+
):
6772
"Clean `nb` from superfluous metadata"
68-
for c in nb['cells']: _clean_cell(c, clear_all=clear_all)
69-
nb['metadata'] = {k:v for k,v in nb['metadata'].items() if k in
70-
("kernelspec", "jekyll", "jupytext", "doc")}
73+
metadata_keys = {"kernelspec", "jekyll", "jupytext", "doc"}
74+
if allowed_metadata_keys: metadata_keys.update(allowed_metadata_keys)
75+
cell_metadata_keys = {"hide_input"}
76+
if allowed_cell_metadata_keys: cell_metadata_keys.update(allowed_cell_metadata_keys)
77+
for c in nb['cells']: _clean_cell(c, clear_all=clear_all, allowed_metadata_keys=cell_metadata_keys)
78+
nb['metadata'] = {k:v for k,v in nb['metadata'].items() if k in metadata_keys}
7179

72-
# %% ../nbs/11_clean.ipynb 12
80+
# %% ../nbs/11_clean.ipynb 19
7381
def _reconfigure(*strms):
7482
for s in strms:
7583
if hasattr(s,'reconfigure'): s.reconfigure(encoding='utf-8')
7684

77-
# %% ../nbs/11_clean.ipynb 13
85+
# %% ../nbs/11_clean.ipynb 20
7886
def process_write(warn_msg, proc_nb, f_in, f_out=None, disp=False):
7987
if not f_out: f_out = sys.stdout if disp else f_in
8088
if isinstance(f_in, (str,Path)): f_in = Path(f_in).open()
@@ -87,7 +95,7 @@ def process_write(warn_msg, proc_nb, f_in, f_out=None, disp=False):
8795
warn(f'{warn_msg}')
8896
warn(e)
8997

90-
# %% ../nbs/11_clean.ipynb 14
98+
# %% ../nbs/11_clean.ipynb 21
9199
@call_parse
92100
def nbdev_clean(
93101
fname:str=None, # A notebook name or glob to clean
@@ -97,14 +105,18 @@ def nbdev_clean(
97105
):
98106
"Clean all notebooks in `fname` to avoid merge conflicts"
99107
# Git hooks will pass the notebooks in stdin
100-
_clean = partial(clean_nb, clear_all=clear_all)
108+
allowed_metadata_keys = config_key("allowed_metadata_keys", default='', missing_ok=True, path=False).split()
109+
allowed_cell_metadata_keys = config_key("allowed_cell_metadata_keys", default='', missing_ok=True, path=False).split()
110+
_clean = partial(clean_nb, clear_all=clear_all,
111+
allowed_metadata_keys=allowed_metadata_keys,
112+
allowed_cell_metadata_keys=allowed_cell_metadata_keys)
101113
_write = partial(process_write, warn_msg='Failed to clean notebook', proc_nb=_clean)
102114
if stdin: return _write(f_in=sys.stdin, f_out=sys.stdout)
103115

104116
if fname is None: fname = config_key("nbs_path", '.', missing_ok=True)
105117
for f in globtastic(fname, file_glob='*.ipynb', skip_folder_re='^[_.]'): _write(f_in=f, disp=disp)
106118

107-
# %% ../nbs/11_clean.ipynb 16
119+
# %% ../nbs/11_clean.ipynb 23
108120
@call_parse
109121
def nbdev_install_hooks():
110122
"Install git hooks to clean and trust notebooks automatically"

nbs/11_clean.ipynb

Lines changed: 98 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -121,15 +121,15 @@
121121
"outputs": [],
122122
"source": [
123123
"#|export\n",
124-
"def _clean_cell(cell, clear_all=False):\n",
124+
"def _clean_cell(cell, clear_all=False, allowed_metadata_keys=None):\n",
125125
" \"Clean `cell` by removing superfluous metadata or everything except the input if `clear_all`\"\n",
126126
" if 'execution_count' in cell: cell['execution_count'] = None\n",
127127
" if 'outputs' in cell:\n",
128128
" if clear_all: cell['outputs'] = []\n",
129129
" else: _clean_cell_output(cell)\n",
130130
" if cell['source'] == ['']: cell['source'] = []\n",
131131
" cell['metadata'] = {} if clear_all else {\n",
132-
" k:v for k,v in cell['metadata'].items() if k==\"hide_input\"}"
132+
" k:v for k,v in cell['metadata'].items() if k in allowed_metadata_keys}"
133133
]
134134
},
135135
{
@@ -139,11 +139,26 @@
139139
"outputs": [],
140140
"source": [
141141
"#|export\n",
142-
"def clean_nb(nb, clear_all=False):\n",
142+
"def clean_nb(\n",
143+
" nb, # The notebook to clean\n",
144+
" clear_all=False, # Remove all cell metadata and cell outputs\n",
145+
" allowed_metadata_keys:list=None, # Preserve the list of keys in the main notebook metadata\n",
146+
" allowed_cell_metadata_keys:list=None # Preserve the list of keys in cell level metadata\n",
147+
"):\n",
143148
" \"Clean `nb` from superfluous metadata\"\n",
144-
" for c in nb['cells']: _clean_cell(c, clear_all=clear_all)\n",
145-
" nb['metadata'] = {k:v for k,v in nb['metadata'].items() if k in\n",
146-
" (\"kernelspec\", \"jekyll\", \"jupytext\", \"doc\")}"
149+
" metadata_keys = {\"kernelspec\", \"jekyll\", \"jupytext\", \"doc\"}\n",
150+
" if allowed_metadata_keys: metadata_keys.update(allowed_metadata_keys)\n",
151+
" cell_metadata_keys = {\"hide_input\"}\n",
152+
" if allowed_cell_metadata_keys: cell_metadata_keys.update(allowed_cell_metadata_keys)\n",
153+
" for c in nb['cells']: _clean_cell(c, clear_all=clear_all, allowed_metadata_keys=cell_metadata_keys)\n",
154+
" nb['metadata'] = {k:v for k,v in nb['metadata'].items() if k in metadata_keys}"
155+
]
156+
},
157+
{
158+
"cell_type": "markdown",
159+
"metadata": {},
160+
"source": [
161+
"The test notebook has metadata in both the main metadata section and contains cell level metadata in the second cell:"
147162
]
148163
},
149164
{
@@ -152,24 +167,69 @@
152167
"metadata": {},
153168
"outputs": [],
154169
"source": [
155-
"tst = {'cell_type': 'code', 'execution_count': 26,\n",
156-
" 'metadata': {'hide_input': True, 'meta': 23},\n",
157-
" 'outputs': [{'execution_count': 2,\n",
158-
" 'data': {\n",
159-
" 'application/vnd.google.colaboratory.intrinsic+json': {'type': 'string'},\n",
160-
" 'plain/text': ['sample output',]\n",
161-
" }, 'output': 'super'}],\n",
162-
" 'source': 'awesome_code'}\n",
163-
"nb = {'metadata': {'kernelspec': 'some_spec', 'jekyll': 'some_meta', 'meta': 37}, 'cells': [tst]}\n",
170+
"test_nb = read_nb('../tests/metadata.ipynb')\n",
164171
"\n",
165-
"clean_nb(nb)\n",
166-
"test_eq(nb['cells'][0], {'cell_type': 'code', 'execution_count': None,\n",
167-
" 'metadata': {'hide_input': True},\n",
168-
" 'outputs': [{'execution_count': None, \n",
169-
" 'data': { 'plain/text': ['sample output',]},\n",
170-
" 'output': 'super'}],\n",
171-
" 'source': 'awesome_code'})\n",
172-
"test_eq(nb['metadata'], {'kernelspec': 'some_spec', 'jekyll': 'some_meta'})"
172+
"assert set(['meta', 'jekyll', 'my_extra_key', 'my_removed_key']) <= set(test_nb.metadata.keys())\n",
173+
"assert set(['meta', 'hide_input', 'my_extra_cell_key', 'my_removed_cell_key']) == set(test_nb.cells[1].metadata.keys())"
174+
]
175+
},
176+
{
177+
"cell_type": "markdown",
178+
"metadata": {},
179+
"source": [
180+
"After cleaning the notebook, all extra metadata is removed, only some keys are allowed by default:"
181+
]
182+
},
183+
{
184+
"cell_type": "code",
185+
"execution_count": null,
186+
"metadata": {},
187+
"outputs": [],
188+
"source": [
189+
"clean_nb(test_nb)\n",
190+
"\n",
191+
"assert set(['jekyll', 'kernelspec']) == set(test_nb.metadata.keys())\n",
192+
"assert set(['hide_input']) == set(test_nb.cells[1].metadata.keys())"
193+
]
194+
},
195+
{
196+
"cell_type": "markdown",
197+
"metadata": {},
198+
"source": [
199+
"We can preserve some additional keys at the notebook or cell levels:"
200+
]
201+
},
202+
{
203+
"cell_type": "code",
204+
"execution_count": null,
205+
"metadata": {},
206+
"outputs": [],
207+
"source": [
208+
"test_nb = read_nb('../tests/metadata.ipynb')\n",
209+
"clean_nb(test_nb, allowed_metadata_keys={'my_extra_key'}, allowed_cell_metadata_keys={'my_extra_cell_key'})\n",
210+
"\n",
211+
"assert set(['jekyll', 'kernelspec', 'my_extra_key']) == set(test_nb.metadata.keys())\n",
212+
"assert set(['hide_input', 'my_extra_cell_key']) == set(test_nb.cells[1].metadata.keys())"
213+
]
214+
},
215+
{
216+
"cell_type": "markdown",
217+
"metadata": {},
218+
"source": [
219+
"Passing the `clear_all=True` keyword removes everything from the cell metadata:"
220+
]
221+
},
222+
{
223+
"cell_type": "code",
224+
"execution_count": null,
225+
"metadata": {},
226+
"outputs": [],
227+
"source": [
228+
"test_nb = read_nb('../tests/metadata.ipynb')\n",
229+
"clean_nb(test_nb, clear_all=True)\n",
230+
"\n",
231+
"assert set(['jekyll', 'kernelspec']) == set(test_nb.metadata.keys())\n",
232+
"test_eq(test_nb.cells[1].metadata, {})"
173233
]
174234
},
175235
{
@@ -227,7 +287,11 @@
227287
"):\n",
228288
" \"Clean all notebooks in `fname` to avoid merge conflicts\"\n",
229289
" # Git hooks will pass the notebooks in stdin\n",
230-
" _clean = partial(clean_nb, clear_all=clear_all)\n",
290+
" allowed_metadata_keys = config_key(\"allowed_metadata_keys\", default='', missing_ok=True, path=False).split()\n",
291+
" allowed_cell_metadata_keys = config_key(\"allowed_cell_metadata_keys\", default='', missing_ok=True, path=False).split()\n",
292+
" _clean = partial(clean_nb, clear_all=clear_all,\n",
293+
" allowed_metadata_keys=allowed_metadata_keys,\n",
294+
" allowed_cell_metadata_keys=allowed_cell_metadata_keys)\n",
231295
" _write = partial(process_write, warn_msg='Failed to clean notebook', proc_nb=_clean)\n",
232296
" if stdin: return _write(f_in=sys.stdin, f_out=sys.stdout)\n",
233297
" \n",
@@ -239,7 +303,16 @@
239303
"cell_type": "markdown",
240304
"metadata": {},
241305
"source": [
242-
"By default (`fname` left to `None`), the all the notebooks in `lib_folder` are cleaned. You can opt in to fully clean the notebook by removing every bit of metadata and the cell outputs by passing `clear_all=True`."
306+
"By default (`fname` left to `None`), the all the notebooks in `lib_folder` are cleaned. You can opt in to fully clean the notebook by removing every bit of metadata and the cell outputs by passing `clear_all=True`.\n",
307+
"\n",
308+
"If you want to keep some keys in the main notebook metadata you can set `allowed_metadata_keys` in `settings.ini`.\n",
309+
"Similarly for cell level metadata use: `allowed_cell_metadata_keys`. For example, to preserve both `k1` and `k2` at both the notebook and cell level adding the following in `settings.ini`:\n",
310+
"```\n",
311+
"...\n",
312+
"allowed_metadata_keys = k1 k2\n",
313+
"allowed_cell_metadata_keys = k1 k2\n",
314+
"...\n",
315+
"```"
243316
]
244317
},
245318
{

tests/metadata.ipynb

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"## A notebook with metadata"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 1,
13+
"metadata": {
14+
"hide_input": true,
15+
"my_extra_cell_key": "foo",
16+
"my_removed_cell_key": "foo",
17+
"meta": 37
18+
},
19+
"outputs": [
20+
{
21+
"data": {
22+
"text/plain": [
23+
"2"
24+
]
25+
},
26+
"execution_count": 1,
27+
"metadata": {},
28+
"output_type": "execute_result"
29+
}
30+
],
31+
"source": [
32+
"# A cell with metadata\n",
33+
"1+1"
34+
]
35+
}
36+
],
37+
"metadata": {
38+
"kernelspec": {
39+
"display_name": "Python 3 (ipykernel)",
40+
"language": "python",
41+
"name": "python3"
42+
},
43+
"language_info": {
44+
"codemirror_mode": {
45+
"name": "ipython",
46+
"version": 3
47+
},
48+
"file_extension": ".py",
49+
"mimetype": "text/x-python",
50+
"name": "python",
51+
"nbconvert_exporter": "python",
52+
"pygments_lexer": "ipython3",
53+
"version": "3.7.13"
54+
},
55+
"my_extra_key": "foo",
56+
"my_removed_key": "foo",
57+
"jekyll": "some_meta",
58+
"meta": 37
59+
},
60+
"nbformat": 4,
61+
"nbformat_minor": 4
62+
}

0 commit comments

Comments
 (0)