author | wenzelm |
Thu, 15 Oct 2015 16:37:14 +0200 | |
changeset 61452 | fa665e3df0ca |
parent 61451 | 7f530057bc3c |
child 61453 | 3a3e3527445e |
permissions | -rw-r--r-- |
61441 | 1 |
(* Title: Pure/Thy/markdown.ML |
2 |
Author: Makarius |
|
3 |
||
61448 | 4 |
Minimal support for Markdown documents (see also http://commonmark.org) |
5 |
that consist only of paragraphs and (nested) lists: |
|
6 |
||
7 |
* list items start with marker \<^item> (itemize), \<^enum> (enumerate), \<^descr> (description) |
|
8 |
* adjacent list items with same indentation and same marker are grouped |
|
9 |
into a single list |
|
10 |
* singleton blank lines separate paragraphs |
|
11 |
* multiple blank lines escape from the current list hierarchy |
|
12 |
||
13 |
Notable differences to official Markdown: |
|
14 |
||
15 |
* indentation of list items needs to match exactly |
|
16 |
* indentation is unlimited (Markdown interprets 4 spaces as block quote) |
|
17 |
* list items always consist of paragraphs -- no notion of "tight" list |
|
61441 | 18 |
*) |
19 |
||
20 |
signature MARKDOWN = |
|
21 |
sig |
|
61443 | 22 |
datatype kind = Itemize | Enumerate | Description |
61449 | 23 |
val print_kind: kind -> string |
61445 | 24 |
type marker = {indent: int, kind: kind} |
61441 | 25 |
type line |
61451
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
26 |
val line_source: line -> Antiquote.text_antiquote list |
61443 | 27 |
val line_content: line -> Antiquote.text_antiquote list |
61445 | 28 |
val make_line: Antiquote.text_antiquote list -> line |
29 |
val empty_line: line |
|
30 |
datatype block = Paragraph of line list | List of marker * block list |
|
31 |
val read_lines: line list -> block list |
|
32 |
val read: Input.source -> block list |
|
61449 | 33 |
val reports: block list -> Position.report list |
61441 | 34 |
end; |
35 |
||
36 |
structure Markdown: MARKDOWN = |
|
37 |
struct |
|
38 |
||
61445 | 39 |
(* document lines *) |
61441 | 40 |
|
41 |
datatype kind = Itemize | Enumerate | Description; |
|
61449 | 42 |
|
43 |
fun print_kind Itemize = "itemize" |
|
44 |
| print_kind Enumerate = "enumerate" |
|
45 |
| print_kind Description = "description"; |
|
46 |
||
61445 | 47 |
type marker = {indent: int, kind: kind}; |
61441 | 48 |
|
49 |
datatype line = |
|
50 |
Line of |
|
61451
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
51 |
{source: Antiquote.text_antiquote list, |
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
52 |
content: Antiquote.text_antiquote list, |
61441 | 53 |
is_empty: bool, |
61445 | 54 |
marker: (marker * Position.T) option}; |
55 |
||
56 |
val eof_line = |
|
61451
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
57 |
Line {source = [Antiquote.Text [(Symbol.eof, Position.none)]], |
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
58 |
content = [], is_empty = false, marker = NONE}; |
61441 | 59 |
|
61451
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
60 |
fun line_source (Line {source, ...}) = source; |
61441 | 61 |
fun line_content (Line {content, ...}) = content; |
62 |
fun line_is_empty (Line {is_empty, ...}) = is_empty; |
|
63 |
fun line_marker (Line {marker, ...}) = marker; |
|
64 |
||
61443 | 65 |
|
66 |
(* make line *) |
|
67 |
||
61441 | 68 |
local |
69 |
||
70 |
fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space; |
|
71 |
val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []); |
|
72 |
||
61451
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
73 |
fun check_blanks source = |
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
74 |
(case bad_blanks source of |
61441 | 75 |
[] => () |
76 |
| (c, pos) :: _ => |
|
77 |
error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos)); |
|
78 |
||
79 |
fun is_space ((s, _): Symbol_Pos.T) = s = Symbol.space; |
|
80 |
val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false); |
|
81 |
||
61444 | 82 |
val scan_marker = |
61449 | 83 |
Scan.many is_space -- Symbol_Pos.scan_pos -- |
61445 | 84 |
(Symbol_Pos.$$ "\<^item>" >> K Itemize || |
85 |
Symbol_Pos.$$ "\<^enum>" >> K Enumerate || |
|
61449 | 86 |
Symbol_Pos.$$ "\<^descr>" >> K Description) |
87 |
>> (fn ((spaces, pos), kind) => ({indent = length spaces, kind = kind}, pos)); |
|
61442 | 88 |
|
61451
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
89 |
fun read_marker (Antiquote.Text ss :: rest) = |
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
90 |
(case Scan.finite Symbol_Pos.stopper (Scan.option scan_marker --| Scan.many is_space) ss of |
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
91 |
(marker, []) => (marker, rest) |
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
92 |
| (marker, ss') => (marker, Antiquote.Text ss' :: rest)) |
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
93 |
| read_marker source = (NONE, source); |
61441 | 94 |
|
95 |
in |
|
96 |
||
61451
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
97 |
fun make_line source = |
61441 | 98 |
let |
61451
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
99 |
val _ = check_blanks source; |
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
100 |
val (marker, content) = read_marker source; |
7f530057bc3c
clarified line content: source without marker prefix;
wenzelm
parents:
61450
diff
changeset
|
101 |
in Line {source = source, content = content, is_empty = is_empty source, marker = marker} end; |
61441 | 102 |
|
61445 | 103 |
val empty_line = make_line []; |
104 |
||
61441 | 105 |
end; |
106 |
||
107 |
||
61445 | 108 |
(* document blocks *) |
109 |
||
110 |
datatype block = Paragraph of line list | List of marker * block list; |
|
111 |
||
61450 | 112 |
fun block_lines (Paragraph lines) = lines |
113 |
| block_lines (List (_, blocks)) = maps block_lines blocks; |
|
114 |
||
61452 | 115 |
fun block_range (Paragraph lines) = Antiquote.range (maps line_content lines) |
116 |
| block_range (List (_, blocks)) = Antiquote.range (maps line_source (maps block_lines blocks)); |
|
117 |
||
118 |
||
119 |
(* read document *) |
|
120 |
||
121 |
local |
|
61450 | 122 |
|
61445 | 123 |
fun add_span (opt_marker, body) document = |
124 |
(case (opt_marker, document) of |
|
61446 | 125 |
(SOME marker, (list as List (list_marker, list_body)) :: rest) => |
126 |
if marker = list_marker then |
|
127 |
List (list_marker, body @ list_body) :: rest |
|
128 |
else if #indent marker < #indent list_marker then |
|
129 |
List (marker, body @ [list]) :: rest |
|
130 |
else |
|
131 |
List (marker, body) :: document |
|
61445 | 132 |
| (SOME marker, _) => List (marker, body) :: document |
133 |
| (NONE, _) => body @ document); |
|
134 |
||
61442 | 135 |
fun plain_line line = |
61445 | 136 |
not (line_is_empty line) andalso is_none (line_marker line) andalso line <> eof_line; |
61441 | 137 |
|
61444 | 138 |
val parse_paragraph = Scan.many1 plain_line >> Paragraph; |
139 |
||
61441 | 140 |
val parse_span = |
61444 | 141 |
parse_paragraph >> (fn par => (NONE, [par])) || |
142 |
Scan.one (is_some o line_marker) -- Scan.many plain_line -- |
|
143 |
Scan.repeat (Scan.one line_is_empty |-- parse_paragraph) >> |
|
61445 | 144 |
(fn ((line, lines), pars) => |
145 |
(Option.map #1 (line_marker line), Paragraph (line :: lines) :: pars)); |
|
61443 | 146 |
|
147 |
val parse_document = |
|
61445 | 148 |
parse_span ::: Scan.repeat (Scan.option (Scan.one line_is_empty) |-- parse_span) |
149 |
>> (fn spans => fold_rev add_span spans []); |
|
61441 | 150 |
|
151 |
in |
|
152 |
||
61445 | 153 |
val read_lines = |
154 |
Scan.read (Scan.stopper (K eof_line) (fn line => line = eof_line)) |
|
155 |
(Scan.repeat (Scan.many line_is_empty |-- parse_document) --| Scan.many line_is_empty) #> |
|
156 |
the_default [] #> flat; |
|
61441 | 157 |
|
158 |
end; |
|
159 |
||
61445 | 160 |
val read = Antiquote.read #> Antiquote.split_lines #> map make_line #> read_lines; |
161 |
||
61449 | 162 |
|
163 |
(* PIDE reports *) |
|
164 |
||
165 |
local |
|
166 |
||
167 |
fun line_reports depth (Line {marker = SOME (_, pos), ...}) = |
|
61450 | 168 |
cons (pos, Markup.markdown_item depth) |
61449 | 169 |
| line_reports _ _ = I; |
170 |
||
61452 | 171 |
fun block_reports depth block = |
172 |
(case block of |
|
173 |
Paragraph lines => |
|
174 |
cons (#1 (block_range block), Markup.markdown_paragraph) #> |
|
61450 | 175 |
fold (line_reports depth) lines |
61452 | 176 |
| List ({kind, ...}, body) => |
177 |
cons (#1 (block_range block), Markup.markdown_list (print_kind kind)) #> |
|
178 |
fold (block_reports (depth + 1)) body); |
|
61449 | 179 |
|
180 |
in |
|
181 |
||
61450 | 182 |
fun reports blocks = |
183 |
filter (Position.is_reported o #1) (fold (block_reports 0) blocks []); |
|
61449 | 184 |
|
61441 | 185 |
end; |
61449 | 186 |
|
187 |
end; |