author | wenzelm |
Fri, 07 May 2021 16:49:08 +0200 | |
changeset 73647 | a037f01aedab |
parent 69207 | ae2074acbaa8 |
permissions | -rw-r--r-- |
61441 | 1 |
(* Title: Pure/Thy/markdown.ML |
2 |
Author: Makarius |
|
3 |
||
61448 | 4 |
Minimal support for Markdown documents (see also http://commonmark.org) |
5 |
that consist only of paragraphs and (nested) lists: |
|
6 |
||
7 |
* list items start with marker \<^item> (itemize), \<^enum> (enumerate), \<^descr> (description) |
|
8 |
* adjacent list items with same indentation and same marker are grouped |
|
9 |
into a single list |
|
10 |
* singleton blank lines separate paragraphs |
|
11 |
* multiple blank lines escape from the current list hierarchy |
|
12 |
||
13 |
Notable differences to official Markdown: |
|
14 |
||
15 |
* indentation of list items needs to match exactly |
|
16 |
* indentation is unlimited (Markdown interprets 4 spaces as block quote) |
|
17 |
* list items always consist of paragraphs -- no notion of "tight" list |
|
61441 | 18 |
*) |
19 |
||
20 |
signature MARKDOWN = |
|
21 |
sig |
|
61443 | 22 |
datatype kind = Itemize | Enumerate | Description |
61449 | 23 |
val print_kind: kind -> string |
61595 | 24 |
val is_control: Symbol.symbol -> bool |
61441 | 25 |
type line |
61451
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
26 |
val line_source: line -> Antiquote.text_antiquote list |
61461 | 27 |
val line_is_item: line -> bool |
61443 | 28 |
val line_content: line -> Antiquote.text_antiquote list |
61445 | 29 |
val make_line: Antiquote.text_antiquote list -> line |
30 |
val empty_line: line |
|
64357
e10fa8afc96c
tuned signature: avoid conflict with "paragraph" as section heading;
wenzelm
parents:
62804
diff
changeset
|
31 |
datatype block = Par of line list | List of {indent: int, kind: kind, body: block list} |
61445 | 32 |
val read_lines: line list -> block list |
61457 | 33 |
val read_antiquotes: Antiquote.text_antiquote list -> block list |
34 |
val read_source: Input.source -> block list |
|
35 |
val text_reports: Antiquote.text_antiquote list -> Position.report list |
|
61449 | 36 |
val reports: block list -> Position.report list |
61441 | 37 |
end; |
38 |
||
39 |
structure Markdown: MARKDOWN = |
|
40 |
struct |
|
41 |
||
61595 | 42 |
(* item kinds *) |
61457 | 43 |
|
61441 | 44 |
datatype kind = Itemize | Enumerate | Description; |
61449 | 45 |
|
67336 | 46 |
fun print_kind Itemize = Markup.itemizeN |
47 |
| print_kind Enumerate = Markup.enumerateN |
|
48 |
| print_kind Description = Markup.descriptionN; |
|
61449 | 49 |
|
61595 | 50 |
val kinds = [("item", Itemize), ("enum", Enumerate), ("descr", Description)]; |
51 |
||
62529
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
wenzelm
parents:
61595
diff
changeset
|
52 |
val is_control = member (op =) ["\<^item>", "\<^enum>", "\<^descr>"]; |
61595 | 53 |
|
54 |
||
55 |
(* document lines *) |
|
56 |
||
61441 | 57 |
datatype line = |
58 |
Line of |
|
61451
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
59 |
{source: Antiquote.text_antiquote list, |
61441 | 60 |
is_empty: bool, |
61459
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
61 |
indent: int, |
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
62 |
item: kind option, |
67322 | 63 |
bullet_pos: Position.T, |
61459
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
64 |
content: Antiquote.text_antiquote list}; |
61445 | 65 |
|
66 |
val eof_line = |
|
61451
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
67 |
Line {source = [Antiquote.Text [(Symbol.eof, Position.none)]], |
67322 | 68 |
is_empty = false, indent = 0, item = NONE, bullet_pos = Position.none, content = []}; |
61441 | 69 |
|
61451
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
70 |
fun line_source (Line {source, ...}) = source; |
61459
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
71 |
fun line_is_empty (Line {is_empty, ...}) = is_empty; |
61461 | 72 |
fun line_is_item (Line {item, ...}) = is_some item; |
61441 | 73 |
fun line_content (Line {content, ...}) = content; |
74 |
||
61443 | 75 |
|
76 |
(* make line *) |
|
77 |
||
61441 | 78 |
local |
79 |
||
80 |
fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space; |
|
81 |
val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []); |
|
82 |
||
61451
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
83 |
fun check_blanks source = |
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
84 |
(case bad_blanks source of |
61441 | 85 |
[] => () |
86 |
| (c, pos) :: _ => |
|
69207 | 87 |
error ("Bad blank character " ^ quote (ML_Syntax.print_symbol c) ^ Position.here pos)); |
61441 | 88 |
|
62804 | 89 |
val is_space = Symbol.is_space o Symbol_Pos.symbol; |
61441 | 90 |
val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false); |
91 |
||
61595 | 92 |
fun strip_spaces (Antiquote.Text ss :: rest) = |
67522 | 93 |
let val (sp, ss') = chop_prefix is_space ss |
61595 | 94 |
in (length sp, if null ss' then rest else Antiquote.Text ss' :: rest) end |
95 |
| strip_spaces source = (0, source); |
|
61442 | 96 |
|
61595 | 97 |
fun read_marker source = |
98 |
let val (indent, source') = strip_spaces source in |
|
99 |
(case source' of |
|
100 |
(control as Antiquote.Control {name = (name, pos), body = [], ...}) :: rest => |
|
101 |
let |
|
102 |
val item = AList.lookup (op =) kinds name; |
|
67322 | 103 |
val bullet_pos = if is_some item then pos else Position.none; |
61595 | 104 |
val (_, rest') = strip_spaces (if is_some item then rest else control :: rest); |
67322 | 105 |
in ((indent, item, bullet_pos), rest') end |
61595 | 106 |
| _ => ((indent, NONE, Position.none), source')) |
107 |
end; |
|
61441 | 108 |
|
109 |
in |
|
110 |
||
61451
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
111 |
fun make_line source = |
61441 | 112 |
let |
61451
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
113 |
val _ = check_blanks source; |
67322 | 114 |
val ((indent, item, bullet_pos), content) = read_marker source; |
61459
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
115 |
in |
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
116 |
Line {source = source, is_empty = is_empty source, indent = indent, |
67322 | 117 |
item = item, bullet_pos = bullet_pos, content = content} |
61459
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
118 |
end; |
61441 | 119 |
|
61445 | 120 |
val empty_line = make_line []; |
121 |
||
61441 | 122 |
end; |
123 |
||
124 |
||
61445 | 125 |
(* document blocks *) |
126 |
||
64357
e10fa8afc96c
tuned signature: avoid conflict with "paragraph" as section heading;
wenzelm
parents:
62804
diff
changeset
|
127 |
datatype block = Par of line list | List of {indent: int, kind: kind, body: block list}; |
61445 | 128 |
|
64357
e10fa8afc96c
tuned signature: avoid conflict with "paragraph" as section heading;
wenzelm
parents:
62804
diff
changeset
|
129 |
fun block_lines (Par lines) = lines |
61459
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
130 |
| block_lines (List {body, ...}) = maps block_lines body; |
61450 | 131 |
|
67323
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents:
67322
diff
changeset
|
132 |
fun block_source (Par lines) = maps line_source lines |
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents:
67322
diff
changeset
|
133 |
| block_source (List {body, ...}) = maps line_source (maps block_lines body); |
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents:
67322
diff
changeset
|
134 |
|
67467 | 135 |
fun block_range (Par lines) = Antiquote.text_range (maps line_content lines) |
136 |
| block_range (List {body, ...}) = Antiquote.text_range (maps line_source (maps block_lines body)); |
|
61459
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
137 |
|
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
138 |
fun block_indent (List {indent, ...}) = indent |
64357
e10fa8afc96c
tuned signature: avoid conflict with "paragraph" as section heading;
wenzelm
parents:
62804
diff
changeset
|
139 |
| block_indent (Par (Line {indent, ...} :: _)) = indent |
61459
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
140 |
| block_indent _ = 0; |
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
141 |
|
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
142 |
fun block_list indent0 kind0 (List {indent, kind, body}) = |
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
143 |
if indent0 = indent andalso kind0 = kind then SOME body else NONE |
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
144 |
| block_list _ _ _ = NONE; |
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
145 |
|
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
146 |
val is_list = fn List _ => true | _ => false; |
61452 | 147 |
|
67323
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents:
67322
diff
changeset
|
148 |
val is_item = fn Par (line :: _) => line_is_item line | _ => false; |
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents:
67322
diff
changeset
|
149 |
|
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents:
67322
diff
changeset
|
150 |
fun list_items [] = [] |
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents:
67322
diff
changeset
|
151 |
| list_items (item :: rest) = |
67522 | 152 |
let val (item_rest, rest') = chop_prefix (not o is_item) rest; |
67323
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents:
67322
diff
changeset
|
153 |
in (item :: item_rest) :: list_items rest' end; |
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents:
67322
diff
changeset
|
154 |
|
61452 | 155 |
|
156 |
(* read document *) |
|
157 |
||
158 |
local |
|
61450 | 159 |
|
61459
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
160 |
fun build (indent, item, rev_body) document = |
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
161 |
(case (item, document) of |
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
162 |
(SOME kind, block :: blocks) => |
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
163 |
(case block_list indent kind block of |
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
164 |
SOME list => List {indent = indent, kind = kind, body = fold cons rev_body list} :: blocks |
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
165 |
| NONE => |
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
166 |
if (if is_list block then indent < block_indent block else indent <= block_indent block) |
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
167 |
then build (indent, item, block :: rev_body) blocks |
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
168 |
else List {indent = indent, kind = kind, body = rev rev_body} :: document) |
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
169 |
| (SOME kind, []) => [List {indent = indent, kind = kind, body = rev rev_body}] |
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
170 |
| (NONE, _) => fold cons rev_body document); |
61445 | 171 |
|
61460 | 172 |
fun plain_line (line as Line {is_empty, item, ...}) = |
173 |
not is_empty andalso is_none item andalso line <> eof_line; |
|
61444 | 174 |
|
61459
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
175 |
val parse_paragraph = |
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
176 |
Scan.one (fn line => line <> eof_line) -- Scan.many plain_line >> (fn (line, lines) => |
61460 | 177 |
let |
178 |
val Line {indent, item, ...} = line; |
|
64357
e10fa8afc96c
tuned signature: avoid conflict with "paragraph" as section heading;
wenzelm
parents:
62804
diff
changeset
|
179 |
val block = Par (line :: lines); |
61460 | 180 |
in (indent, item, [block]) end); |
61443 | 181 |
|
182 |
val parse_document = |
|
61459
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
183 |
parse_paragraph ::: Scan.repeat (Scan.option (Scan.one line_is_empty) |-- parse_paragraph) |
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
184 |
>> (fn pars => fold_rev build pars []); |
61441 | 185 |
|
186 |
in |
|
187 |
||
61445 | 188 |
val read_lines = |
189 |
Scan.read (Scan.stopper (K eof_line) (fn line => line = eof_line)) |
|
190 |
(Scan.repeat (Scan.many line_is_empty |-- parse_document) --| Scan.many line_is_empty) #> |
|
191 |
the_default [] #> flat; |
|
61441 | 192 |
|
61457 | 193 |
val read_antiquotes = Antiquote.split_lines #> map make_line #> read_lines; |
67571
f858fe5531ac
more uniform treatment of formal comments within document source;
wenzelm
parents:
67522
diff
changeset
|
194 |
val read_source = Antiquote.read_comments #> read_antiquotes; |
61441 | 195 |
|
61457 | 196 |
end; |
61445 | 197 |
|
61449 | 198 |
|
199 |
(* PIDE reports *) |
|
200 |
||
61457 | 201 |
val text_reports = |
202 |
maps (fn Antiquote.Text ss => [(#1 (Symbol_Pos.range ss), Markup.words)] | _ => []); |
|
203 |
||
61449 | 204 |
local |
205 |
||
67323
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents:
67322
diff
changeset
|
206 |
val block_pos = #1 o block_range; |
67467 | 207 |
val item_pos = #1 o Antiquote.text_range o maps block_source; |
67323
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents:
67322
diff
changeset
|
208 |
|
67322 | 209 |
fun line_reports depth (Line {bullet_pos, content, ...}) = |
210 |
cons (bullet_pos, Markup.markdown_bullet depth) #> append (text_reports content); |
|
61449 | 211 |
|
67323
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents:
67322
diff
changeset
|
212 |
fun item_reports blocks = |
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents:
67322
diff
changeset
|
213 |
cons (item_pos blocks, Markup.markdown_item); |
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents:
67322
diff
changeset
|
214 |
|
61452 | 215 |
fun block_reports depth block = |
216 |
(case block of |
|
64357
e10fa8afc96c
tuned signature: avoid conflict with "paragraph" as section heading;
wenzelm
parents:
62804
diff
changeset
|
217 |
Par lines => |
67323
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents:
67322
diff
changeset
|
218 |
cons (block_pos block, Markup.markdown_paragraph) #> |
61450 | 219 |
fold (line_reports depth) lines |
61459
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents:
61457
diff
changeset
|
220 |
| List {kind, body, ...} => |
67323
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents:
67322
diff
changeset
|
221 |
cons (block_pos block, Markup.markdown_list (print_kind kind)) #> |
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents:
67322
diff
changeset
|
222 |
fold item_reports (list_items body) #> |
61452 | 223 |
fold (block_reports (depth + 1)) body); |
61449 | 224 |
|
225 |
in |
|
226 |
||
61450 | 227 |
fun reports blocks = |
228 |
filter (Position.is_reported o #1) (fold (block_reports 0) blocks []); |
|
61449 | 229 |
|
61441 | 230 |
end; |
61449 | 231 |
|
232 |
end; |