|
121 | 121 | "outputs": [],
|
122 | 122 | "source": [
|
123 | 123 | "#|export\n",
|
124 |
| - "def _clean_cell(cell, clear_all=False):\n", |
| 124 | + "def _clean_cell(cell, clear_all=False, allowed_metadata_keys=None):\n", |
125 | 125 | " \"Clean `cell` by removing superfluous metadata or everything except the input if `clear_all`\"\n",
|
126 | 126 | " if 'execution_count' in cell: cell['execution_count'] = None\n",
|
127 | 127 | " if 'outputs' in cell:\n",
|
128 | 128 | " if clear_all: cell['outputs'] = []\n",
|
129 | 129 | " else: _clean_cell_output(cell)\n",
|
130 | 130 | " if cell['source'] == ['']: cell['source'] = []\n",
|
131 | 131 | " cell['metadata'] = {} if clear_all else {\n",
|
132 |
| - " k:v for k,v in cell['metadata'].items() if k==\"hide_input\"}" |
| 132 | + " k:v for k,v in cell['metadata'].items() if k in allowed_metadata_keys}" |
133 | 133 | ]
|
134 | 134 | },
|
135 | 135 | {
|
|
139 | 139 | "outputs": [],
|
140 | 140 | "source": [
|
141 | 141 | "#|export\n",
|
142 |
| - "def clean_nb(nb, clear_all=False):\n", |
| 142 | + "def clean_nb(\n", |
| 143 | + " nb, # The notebook to clean\n", |
| 144 | + " clear_all=False, # Remove all cell metadata and cell outputs\n", |
| 145 | + " allowed_metadata_keys:list=None, # Preserve the list of keys in the main notebook metadata\n", |
| 146 | + " allowed_cell_metadata_keys:list=None # Preserve the list of keys in cell level metadata\n", |
| 147 | + "):\n", |
143 | 148 | " \"Clean `nb` from superfluous metadata\"\n",
|
144 |
| - " for c in nb['cells']: _clean_cell(c, clear_all=clear_all)\n", |
145 |
| - " nb['metadata'] = {k:v for k,v in nb['metadata'].items() if k in\n", |
146 |
| - " (\"kernelspec\", \"jekyll\", \"jupytext\", \"doc\")}" |
| 149 | + " metadata_keys = {\"kernelspec\", \"jekyll\", \"jupytext\", \"doc\"}\n", |
| 150 | + " if allowed_metadata_keys: metadata_keys.update(allowed_metadata_keys)\n", |
| 151 | + " cell_metadata_keys = {\"hide_input\"}\n", |
| 152 | + " if allowed_cell_metadata_keys: cell_metadata_keys.update(allowed_cell_metadata_keys)\n", |
| 153 | + " for c in nb['cells']: _clean_cell(c, clear_all=clear_all, allowed_metadata_keys=cell_metadata_keys)\n", |
| 154 | + " nb['metadata'] = {k:v for k,v in nb['metadata'].items() if k in metadata_keys}" |
| 155 | + ] |
| 156 | + }, |
| 157 | + { |
| 158 | + "cell_type": "markdown", |
| 159 | + "metadata": {}, |
| 160 | + "source": [ |
| 161 | + "The test notebook has metadata in both the main metadata section and contains cell level metadata in the second cell:" |
147 | 162 | ]
|
148 | 163 | },
|
149 | 164 | {
|
|
152 | 167 | "metadata": {},
|
153 | 168 | "outputs": [],
|
154 | 169 | "source": [
|
155 |
| - "tst = {'cell_type': 'code', 'execution_count': 26,\n", |
156 |
| - " 'metadata': {'hide_input': True, 'meta': 23},\n", |
157 |
| - " 'outputs': [{'execution_count': 2,\n", |
158 |
| - " 'data': {\n", |
159 |
| - " 'application/vnd.google.colaboratory.intrinsic+json': {'type': 'string'},\n", |
160 |
| - " 'plain/text': ['sample output',]\n", |
161 |
| - " }, 'output': 'super'}],\n", |
162 |
| - " 'source': 'awesome_code'}\n", |
163 |
| - "nb = {'metadata': {'kernelspec': 'some_spec', 'jekyll': 'some_meta', 'meta': 37}, 'cells': [tst]}\n", |
| 170 | + "test_nb = read_nb('../tests/metadata.ipynb')\n", |
164 | 171 | "\n",
|
165 |
| - "clean_nb(nb)\n", |
166 |
| - "test_eq(nb['cells'][0], {'cell_type': 'code', 'execution_count': None,\n", |
167 |
| - " 'metadata': {'hide_input': True},\n", |
168 |
| - " 'outputs': [{'execution_count': None, \n", |
169 |
| - " 'data': { 'plain/text': ['sample output',]},\n", |
170 |
| - " 'output': 'super'}],\n", |
171 |
| - " 'source': 'awesome_code'})\n", |
172 |
| - "test_eq(nb['metadata'], {'kernelspec': 'some_spec', 'jekyll': 'some_meta'})" |
| 172 | + "assert set(['meta', 'jekyll', 'my_extra_key', 'my_removed_key']) <= set(test_nb.metadata.keys())\n", |
| 173 | + "assert set(['meta', 'hide_input', 'my_extra_cell_key', 'my_removed_cell_key']) == set(test_nb.cells[1].metadata.keys())" |
| 174 | + ] |
| 175 | + }, |
| 176 | + { |
| 177 | + "cell_type": "markdown", |
| 178 | + "metadata": {}, |
| 179 | + "source": [ |
| 180 | + "After cleaning the notebook, all extra metadata is removed, only some keys are allowed by default:" |
| 181 | + ] |
| 182 | + }, |
| 183 | + { |
| 184 | + "cell_type": "code", |
| 185 | + "execution_count": null, |
| 186 | + "metadata": {}, |
| 187 | + "outputs": [], |
| 188 | + "source": [ |
| 189 | + "clean_nb(test_nb)\n", |
| 190 | + "\n", |
| 191 | + "assert set(['jekyll', 'kernelspec']) == set(test_nb.metadata.keys())\n", |
| 192 | + "assert set(['hide_input']) == set(test_nb.cells[1].metadata.keys())" |
| 193 | + ] |
| 194 | + }, |
| 195 | + { |
| 196 | + "cell_type": "markdown", |
| 197 | + "metadata": {}, |
| 198 | + "source": [ |
| 199 | + "We can preserve some additional keys at the notebook or cell levels:" |
| 200 | + ] |
| 201 | + }, |
| 202 | + { |
| 203 | + "cell_type": "code", |
| 204 | + "execution_count": null, |
| 205 | + "metadata": {}, |
| 206 | + "outputs": [], |
| 207 | + "source": [ |
| 208 | + "test_nb = read_nb('../tests/metadata.ipynb')\n", |
| 209 | + "clean_nb(test_nb, allowed_metadata_keys={'my_extra_key'}, allowed_cell_metadata_keys={'my_extra_cell_key'})\n", |
| 210 | + "\n", |
| 211 | + "assert set(['jekyll', 'kernelspec', 'my_extra_key']) == set(test_nb.metadata.keys())\n", |
| 212 | + "assert set(['hide_input', 'my_extra_cell_key']) == set(test_nb.cells[1].metadata.keys())" |
| 213 | + ] |
| 214 | + }, |
| 215 | + { |
| 216 | + "cell_type": "markdown", |
| 217 | + "metadata": {}, |
| 218 | + "source": [ |
| 219 | + "Passing the `clear_all=True` keyword removes everything from the cell metadata:" |
| 220 | + ] |
| 221 | + }, |
| 222 | + { |
| 223 | + "cell_type": "code", |
| 224 | + "execution_count": null, |
| 225 | + "metadata": {}, |
| 226 | + "outputs": [], |
| 227 | + "source": [ |
| 228 | + "test_nb = read_nb('../tests/metadata.ipynb')\n", |
| 229 | + "clean_nb(test_nb, clear_all=True)\n", |
| 230 | + "\n", |
| 231 | + "assert set(['jekyll', 'kernelspec']) == set(test_nb.metadata.keys())\n", |
| 232 | + "test_eq(test_nb.cells[1].metadata, {})" |
173 | 233 | ]
|
174 | 234 | },
|
175 | 235 | {
|
|
227 | 287 | "):\n",
|
228 | 288 | " \"Clean all notebooks in `fname` to avoid merge conflicts\"\n",
|
229 | 289 | " # Git hooks will pass the notebooks in stdin\n",
|
230 |
| - " _clean = partial(clean_nb, clear_all=clear_all)\n", |
| 290 | + " allowed_metadata_keys = config_key(\"allowed_metadata_keys\", default='', missing_ok=True, path=False).split()\n", |
| 291 | + " allowed_cell_metadata_keys = config_key(\"allowed_cell_metadata_keys\", default='', missing_ok=True, path=False).split()\n", |
| 292 | + " _clean = partial(clean_nb, clear_all=clear_all,\n", |
| 293 | + " allowed_metadata_keys=allowed_metadata_keys,\n", |
| 294 | + " allowed_cell_metadata_keys=allowed_cell_metadata_keys)\n", |
231 | 295 | " _write = partial(process_write, warn_msg='Failed to clean notebook', proc_nb=_clean)\n",
|
232 | 296 | " if stdin: return _write(f_in=sys.stdin, f_out=sys.stdout)\n",
|
233 | 297 | " \n",
|
|
239 | 303 | "cell_type": "markdown",
|
240 | 304 | "metadata": {},
|
241 | 305 | "source": [
|
242 |
| - "By default (`fname` left to `None`), the all the notebooks in `lib_folder` are cleaned. You can opt in to fully clean the notebook by removing every bit of metadata and the cell outputs by passing `clear_all=True`." |
| 306 | + "By default (`fname` left to `None`), the all the notebooks in `lib_folder` are cleaned. You can opt in to fully clean the notebook by removing every bit of metadata and the cell outputs by passing `clear_all=True`.\n", |
| 307 | + "\n", |
| 308 | + "If you want to keep some keys in the main notebook metadata you can set `allowed_metadata_keys` in `settings.ini`.\n", |
| 309 | + "Similarly for cell level metadata use: `allowed_cell_metadata_keys`. For example, to preserve both `k1` and `k2` at both the notebook and cell level adding the following in `settings.ini`:\n", |
| 310 | + "```\n", |
| 311 | + "...\n", |
| 312 | + "allowed_metadata_keys = k1 k2\n", |
| 313 | + "allowed_cell_metadata_keys = k1 k2\n", |
| 314 | + "...\n", |
| 315 | + "```" |
243 | 316 | ]
|
244 | 317 | },
|
245 | 318 | {
|
|
0 commit comments