| author | wenzelm | 
| Mon, 22 Aug 2022 21:37:06 +0200 | |
| changeset 75961 | 787a203a20b6 | 
| parent 69207 | ae2074acbaa8 | 
| permissions | -rw-r--r-- | 
| 61441 | 1 | (* Title: Pure/Thy/markdown.ML | 
| 2 | Author: Makarius | |
| 3 | ||
| 61448 | 4 | Minimal support for Markdown documents (see also http://commonmark.org) | 
| 5 | that consist only of paragraphs and (nested) lists: | |
| 6 | ||
| 7 | * list items start with marker \<^item> (itemize), \<^enum> (enumerate), \<^descr> (description) | |
| 8 | * adjacent list items with same indentation and same marker are grouped | |
| 9 | into a single list | |
| 10 | * singleton blank lines separate paragraphs | |
| 11 | * multiple blank lines escape from the current list hierarchy | |
| 12 | ||
| 13 | Notable differences to official Markdown: | |
| 14 | ||
| 15 | * indentation of list items needs to match exactly | |
| 16 | * indentation is unlimited (Markdown interprets 4 spaces as block quote) | |
| 17 | * list items always consist of paragraphs -- no notion of "tight" list | |
| 61441 | 18 | *) | 
| 19 | ||
| 20 | signature MARKDOWN = | |
| 21 | sig | |
| 61443 | 22 | datatype kind = Itemize | Enumerate | Description | 
| 61449 | 23 | val print_kind: kind -> string | 
| 61595 | 24 | val is_control: Symbol.symbol -> bool | 
| 61441 | 25 | type line | 
| 61451 
7f530057bc3c
clarified line content: source without marker prefix;
 wenzelm parents: 
61450diff
changeset | 26 | val line_source: line -> Antiquote.text_antiquote list | 
| 61461 | 27 | val line_is_item: line -> bool | 
| 61443 | 28 | val line_content: line -> Antiquote.text_antiquote list | 
| 61445 | 29 | val make_line: Antiquote.text_antiquote list -> line | 
| 30 | val empty_line: line | |
| 64357 
e10fa8afc96c
tuned signature: avoid conflict with "paragraph" as section heading;
 wenzelm parents: 
62804diff
changeset | 31 |   datatype block = Par of line list | List of {indent: int, kind: kind, body: block list}
 | 
| 61445 | 32 | val read_lines: line list -> block list | 
| 61457 | 33 | val read_antiquotes: Antiquote.text_antiquote list -> block list | 
| 34 | val read_source: Input.source -> block list | |
| 35 | val text_reports: Antiquote.text_antiquote list -> Position.report list | |
| 61449 | 36 | val reports: block list -> Position.report list | 
| 61441 | 37 | end; | 
| 38 | ||
| 39 | structure Markdown: MARKDOWN = | |
| 40 | struct | |
| 41 | ||
| 61595 | 42 | (* item kinds *) | 
| 61457 | 43 | |
| 61441 | 44 | datatype kind = Itemize | Enumerate | Description; | 
| 61449 | 45 | |
| 67336 | 46 | fun print_kind Itemize = Markup.itemizeN | 
| 47 | | print_kind Enumerate = Markup.enumerateN | |
| 48 | | print_kind Description = Markup.descriptionN; | |
| 61449 | 49 | |
| 61595 | 50 | val kinds = [("item", Itemize), ("enum", Enumerate), ("descr", Description)];
 | 
| 51 | ||
| 62529 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
61595diff
changeset | 52 | val is_control = member (op =) ["\<^item>", "\<^enum>", "\<^descr>"]; | 
| 61595 | 53 | |
| 54 | ||
| 55 | (* document lines *) | |
| 56 | ||
| 61441 | 57 | datatype line = | 
| 58 | Line of | |
| 61451 
7f530057bc3c
clarified line content: source without marker prefix;
 wenzelm parents: 
61450diff
changeset | 59 |    {source: Antiquote.text_antiquote list,
 | 
| 61441 | 60 | is_empty: bool, | 
| 61459 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 61 | indent: int, | 
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 62 | item: kind option, | 
| 67322 | 63 | bullet_pos: Position.T, | 
| 61459 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 64 | content: Antiquote.text_antiquote list}; | 
| 61445 | 65 | |
| 66 | val eof_line = | |
| 61451 
7f530057bc3c
clarified line content: source without marker prefix;
 wenzelm parents: 
61450diff
changeset | 67 |   Line {source = [Antiquote.Text [(Symbol.eof, Position.none)]],
 | 
| 67322 | 68 | is_empty = false, indent = 0, item = NONE, bullet_pos = Position.none, content = []}; | 
| 61441 | 69 | |
| 61451 
7f530057bc3c
clarified line content: source without marker prefix;
 wenzelm parents: 
61450diff
changeset | 70 | fun line_source (Line {source, ...}) = source;
 | 
| 61459 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 71 | fun line_is_empty (Line {is_empty, ...}) = is_empty;
 | 
| 61461 | 72 | fun line_is_item (Line {item, ...}) = is_some item;
 | 
| 61441 | 73 | fun line_content (Line {content, ...}) = content;
 | 
| 74 | ||
| 61443 | 75 | |
| 76 | (* make line *) | |
| 77 | ||
| 61441 | 78 | local | 
| 79 | ||
| 80 | fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space; | |
| 81 | val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []); | |
| 82 | ||
| 61451 
7f530057bc3c
clarified line content: source without marker prefix;
 wenzelm parents: 
61450diff
changeset | 83 | fun check_blanks source = | 
| 
7f530057bc3c
clarified line content: source without marker prefix;
 wenzelm parents: 
61450diff
changeset | 84 | (case bad_blanks source of | 
| 61441 | 85 | [] => () | 
| 86 | | (c, pos) :: _ => | |
| 69207 | 87 |       error ("Bad blank character " ^ quote (ML_Syntax.print_symbol c) ^ Position.here pos));
 | 
| 61441 | 88 | |
| 62804 | 89 | val is_space = Symbol.is_space o Symbol_Pos.symbol; | 
| 61441 | 90 | val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false); | 
| 91 | ||
| 61595 | 92 | fun strip_spaces (Antiquote.Text ss :: rest) = | 
| 67522 | 93 | let val (sp, ss') = chop_prefix is_space ss | 
| 61595 | 94 | in (length sp, if null ss' then rest else Antiquote.Text ss' :: rest) end | 
| 95 | | strip_spaces source = (0, source); | |
| 61442 | 96 | |
| 61595 | 97 | fun read_marker source = | 
| 98 | let val (indent, source') = strip_spaces source in | |
| 99 | (case source' of | |
| 100 |       (control as Antiquote.Control {name = (name, pos), body = [], ...}) :: rest =>
 | |
| 101 | let | |
| 102 | val item = AList.lookup (op =) kinds name; | |
| 67322 | 103 | val bullet_pos = if is_some item then pos else Position.none; | 
| 61595 | 104 | val (_, rest') = strip_spaces (if is_some item then rest else control :: rest); | 
| 67322 | 105 | in ((indent, item, bullet_pos), rest') end | 
| 61595 | 106 | | _ => ((indent, NONE, Position.none), source')) | 
| 107 | end; | |
| 61441 | 108 | |
| 109 | in | |
| 110 | ||
| 61451 
7f530057bc3c
clarified line content: source without marker prefix;
 wenzelm parents: 
61450diff
changeset | 111 | fun make_line source = | 
| 61441 | 112 | let | 
| 61451 
7f530057bc3c
clarified line content: source without marker prefix;
 wenzelm parents: 
61450diff
changeset | 113 | val _ = check_blanks source; | 
| 67322 | 114 | val ((indent, item, bullet_pos), content) = read_marker source; | 
| 61459 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 115 | in | 
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 116 |     Line {source = source, is_empty = is_empty source, indent = indent,
 | 
| 67322 | 117 | item = item, bullet_pos = bullet_pos, content = content} | 
| 61459 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 118 | end; | 
| 61441 | 119 | |
| 61445 | 120 | val empty_line = make_line []; | 
| 121 | ||
| 61441 | 122 | end; | 
| 123 | ||
| 124 | ||
| 61445 | 125 | (* document blocks *) | 
| 126 | ||
| 64357 
e10fa8afc96c
tuned signature: avoid conflict with "paragraph" as section heading;
 wenzelm parents: 
62804diff
changeset | 127 | datatype block = Par of line list | List of {indent: int, kind: kind, body: block list};
 | 
| 61445 | 128 | |
| 64357 
e10fa8afc96c
tuned signature: avoid conflict with "paragraph" as section heading;
 wenzelm parents: 
62804diff
changeset | 129 | fun block_lines (Par lines) = lines | 
| 61459 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 130 |   | block_lines (List {body, ...}) = maps block_lines body;
 | 
| 61450 | 131 | |
| 67323 
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
 wenzelm parents: 
67322diff
changeset | 132 | fun block_source (Par lines) = maps line_source lines | 
| 
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
 wenzelm parents: 
67322diff
changeset | 133 |   | block_source (List {body, ...}) = maps line_source (maps block_lines body);
 | 
| 
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
 wenzelm parents: 
67322diff
changeset | 134 | |
| 67467 | 135 | fun block_range (Par lines) = Antiquote.text_range (maps line_content lines) | 
| 136 |   | block_range (List {body, ...}) = Antiquote.text_range (maps line_source (maps block_lines body));
 | |
| 61459 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 137 | |
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 138 | fun block_indent (List {indent, ...}) = indent
 | 
| 64357 
e10fa8afc96c
tuned signature: avoid conflict with "paragraph" as section heading;
 wenzelm parents: 
62804diff
changeset | 139 |   | block_indent (Par (Line {indent, ...} :: _)) = indent
 | 
| 61459 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 140 | | block_indent _ = 0; | 
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 141 | |
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 142 | fun block_list indent0 kind0 (List {indent, kind, body}) =
 | 
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 143 | if indent0 = indent andalso kind0 = kind then SOME body else NONE | 
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 144 | | block_list _ _ _ = NONE; | 
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 145 | |
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 146 | val is_list = fn List _ => true | _ => false; | 
| 61452 | 147 | |
| 67323 
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
 wenzelm parents: 
67322diff
changeset | 148 | val is_item = fn Par (line :: _) => line_is_item line | _ => false; | 
| 
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
 wenzelm parents: 
67322diff
changeset | 149 | |
| 
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
 wenzelm parents: 
67322diff
changeset | 150 | fun list_items [] = [] | 
| 
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
 wenzelm parents: 
67322diff
changeset | 151 | | list_items (item :: rest) = | 
| 67522 | 152 | let val (item_rest, rest') = chop_prefix (not o is_item) rest; | 
| 67323 
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
 wenzelm parents: 
67322diff
changeset | 153 | in (item :: item_rest) :: list_items rest' end; | 
| 
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
 wenzelm parents: 
67322diff
changeset | 154 | |
| 61452 | 155 | |
| 156 | (* read document *) | |
| 157 | ||
| 158 | local | |
| 61450 | 159 | |
| 61459 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 160 | fun build (indent, item, rev_body) document = | 
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 161 | (case (item, document) of | 
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 162 | (SOME kind, block :: blocks) => | 
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 163 | (case block_list indent kind block of | 
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 164 |         SOME list => List {indent = indent, kind = kind, body = fold cons rev_body list} :: blocks
 | 
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 165 | | NONE => | 
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 166 | if (if is_list block then indent < block_indent block else indent <= block_indent block) | 
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 167 | then build (indent, item, block :: rev_body) blocks | 
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 168 |           else List {indent = indent, kind = kind, body = rev rev_body} :: document)
 | 
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 169 |   | (SOME kind, []) => [List {indent = indent, kind = kind, body = rev rev_body}]
 | 
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 170 | | (NONE, _) => fold cons rev_body document); | 
| 61445 | 171 | |
| 61460 | 172 | fun plain_line (line as Line {is_empty, item, ...}) =
 | 
| 173 | not is_empty andalso is_none item andalso line <> eof_line; | |
| 61444 | 174 | |
| 61459 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 175 | val parse_paragraph = | 
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 176 | Scan.one (fn line => line <> eof_line) -- Scan.many plain_line >> (fn (line, lines) => | 
| 61460 | 177 | let | 
| 178 |       val Line {indent, item, ...} = line;
 | |
| 64357 
e10fa8afc96c
tuned signature: avoid conflict with "paragraph" as section heading;
 wenzelm parents: 
62804diff
changeset | 179 | val block = Par (line :: lines); | 
| 61460 | 180 | in (indent, item, [block]) end); | 
| 61443 | 181 | |
| 182 | val parse_document = | |
| 61459 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 183 | parse_paragraph ::: Scan.repeat (Scan.option (Scan.one line_is_empty) |-- parse_paragraph) | 
| 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 184 | >> (fn pars => fold_rev build pars []); | 
| 61441 | 185 | |
| 186 | in | |
| 187 | ||
| 61445 | 188 | val read_lines = | 
| 189 | Scan.read (Scan.stopper (K eof_line) (fn line => line = eof_line)) | |
| 190 | (Scan.repeat (Scan.many line_is_empty |-- parse_document) --| Scan.many line_is_empty) #> | |
| 191 | the_default [] #> flat; | |
| 61441 | 192 | |
| 61457 | 193 | val read_antiquotes = Antiquote.split_lines #> map make_line #> read_lines; | 
| 67571 
f858fe5531ac
more uniform treatment of formal comments within document source;
 wenzelm parents: 
67522diff
changeset | 194 | val read_source = Antiquote.read_comments #> read_antiquotes; | 
| 61441 | 195 | |
| 61457 | 196 | end; | 
| 61445 | 197 | |
| 61449 | 198 | |
| 199 | (* PIDE reports *) | |
| 200 | ||
| 61457 | 201 | val text_reports = | 
| 202 | maps (fn Antiquote.Text ss => [(#1 (Symbol_Pos.range ss), Markup.words)] | _ => []); | |
| 203 | ||
| 61449 | 204 | local | 
| 205 | ||
| 67323 
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
 wenzelm parents: 
67322diff
changeset | 206 | val block_pos = #1 o block_range; | 
| 67467 | 207 | val item_pos = #1 o Antiquote.text_range o maps block_source; | 
| 67323 
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
 wenzelm parents: 
67322diff
changeset | 208 | |
| 67322 | 209 | fun line_reports depth (Line {bullet_pos, content, ...}) =
 | 
| 210 | cons (bullet_pos, Markup.markdown_bullet depth) #> append (text_reports content); | |
| 61449 | 211 | |
| 67323 
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
 wenzelm parents: 
67322diff
changeset | 212 | fun item_reports blocks = | 
| 
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
 wenzelm parents: 
67322diff
changeset | 213 | cons (item_pos blocks, Markup.markdown_item); | 
| 
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
 wenzelm parents: 
67322diff
changeset | 214 | |
| 61452 | 215 | fun block_reports depth block = | 
| 216 | (case block of | |
| 64357 
e10fa8afc96c
tuned signature: avoid conflict with "paragraph" as section heading;
 wenzelm parents: 
62804diff
changeset | 217 | Par lines => | 
| 67323 
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
 wenzelm parents: 
67322diff
changeset | 218 | cons (block_pos block, Markup.markdown_paragraph) #> | 
| 61450 | 219 | fold (line_reports depth) lines | 
| 61459 
5f2ddeb15c06
clarified nesting of paragraphs: indentation is taken into account more uniformly;
 wenzelm parents: 
61457diff
changeset | 220 |   | List {kind, body, ...} =>
 | 
| 67323 
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
 wenzelm parents: 
67322diff
changeset | 221 | cons (block_pos block, Markup.markdown_list (print_kind kind)) #> | 
| 
d02208cefbdb
PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
 wenzelm parents: 
67322diff
changeset | 222 | fold item_reports (list_items body) #> | 
| 61452 | 223 | fold (block_reports (depth + 1)) body); | 
| 61449 | 224 | |
| 225 | in | |
| 226 | ||
| 61450 | 227 | fun reports blocks = | 
| 228 | filter (Position.is_reported o #1) (fold (block_reports 0) blocks []); | |
| 61449 | 229 | |
| 61441 | 230 | end; | 
| 61449 | 231 | |
| 232 | end; |