-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathnb-stripout.py
executable file
·98 lines (74 loc) · 2.7 KB
/
nb-stripout.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python3
import argparse
import io
import json
import sys
if sys.stdin:
input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding="utf-8")
output_stream = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
parser = argparse.ArgumentParser()
parser.add_argument("-t", "--textconv", action="store_true", help="Print results to output")
parser.add_argument("-d", "--doc-mode", action="store_true", help="fastai docs nb-specific strip out")
parser.add_argument("files", nargs="*", help="Files to strip output from")
args = parser.parse_args()
# define which fields need to be kept:
cell_metadata_keep_code = []
cell_metadata_keep_docs = ["hide_input"]
nb_metadata_keep = ["kernelspec", "jekyll"]
def clean_cell_outputs(o):
if "execution_count" in o:
o["execution_count"] = None
### filter for doc nb cells ###
# 1. reset execution_count (in cell and cell's outputs field)
# 2. keep only cell_metadata_keep_doc fields
def clean_cell_docs(o):
if "execution_count" in o:
o["execution_count"] = None
if "outputs" in o:
for l in o["outputs"]:
clean_cell_outputs(l)
o["metadata"] = {k: o["metadata"][k] for k in o["metadata"].keys() if k in cell_metadata_keep_docs}
return o
### filter for code nb cells ###
# 1. reset execution_count
# 2. delete cell's metadata
# 3. delete cell's outputs
def clean_cell_code(o):
if "execution_count" in o:
o["execution_count"] = None
if "outputs" in o:
o["outputs"] = []
o["metadata"] = {}
return o
# optimize runtime
clean_cell = clean_cell_code if not args.doc_mode else clean_cell_docs
### filter for nb top level entries ###
# 1. keep only nb_metadata_keep fields
# 2. the other rules apply based on clean_cell alias
def clean_nb(s):
s["cells"] = [clean_cell(o) for o in s["cells"]]
s["metadata"] = {k: s["metadata"][k] for k in s["metadata"].keys() if k in nb_metadata_keep}
for filename in args.files:
if not filename.endswith(".ipynb"):
continue
with io.open(filename, "r", encoding="utf-8") as f:
s = json.load(f)
clean_nb(s)
x = json.dumps(s, sort_keys=True, indent=1, ensure_ascii=False)
if args.textconv:
# XXX: if there is more than one file, this is probably wrong
output_stream.write(x)
output_stream.write("\n")
output_stream.flush()
else:
with io.open(filename, "w", encoding="utf-8") as f:
f.write(x)
f.write("\n")
# implied textconv mode
if not args.files and input_stream:
s = json.load(input_stream)
clean_nb(s)
x = json.dumps(s, sort_keys=True, indent=1, ensure_ascii=False)
output_stream.write(x)
output_stream.write("\n")
output_stream.flush()