src/Pure/Thy/markdown.ML
author wenzelm
Sat, 17 Oct 2015 19:47:34 +0200
changeset 61461 77c9643a6353
parent 61460 732028edfbc7
child 61595 3591274c607e
permissions -rw-r--r--
more explicit output of list items;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
     1
(*  Title:      Pure/Thy/markdown.ML
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
     3
61448
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
     4
Minimal support for Markdown documents (see also http://commonmark.org)
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
     5
that consist only of paragraphs and (nested) lists:
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
     6
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
     7
  * list items start with marker \<^item> (itemize), \<^enum> (enumerate), \<^descr> (description)
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
     8
  * adjacent list items with same indentation and same marker are grouped
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
     9
    into a single list
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    10
  * singleton blank lines separate paragraphs
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    11
  * multiple blank lines escape from the current list hierarchy
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    12
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    13
Notable differences to official Markdown:
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    14
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    15
  * indentation of list items needs to match exactly
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    16
  * indentation is unlimited (Markdown interprets 4 spaces as block quote)
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    17
  * list items always consist of paragraphs -- no notion of "tight" list
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    18
*)
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    19
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    20
signature MARKDOWN =
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    21
sig
61457
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
    22
  val is_control: Symbol.symbol -> bool
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    23
  datatype kind = Itemize | Enumerate | Description
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    24
  val print_kind: kind -> string
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    25
  type line
61451
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
    26
  val line_source: line -> Antiquote.text_antiquote list
61461
77c9643a6353 more explicit output of list items;
wenzelm
parents: 61460
diff changeset
    27
  val line_is_item: line -> bool
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    28
  val line_content: line -> Antiquote.text_antiquote list
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    29
  val make_line: Antiquote.text_antiquote list -> line
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    30
  val empty_line: line
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
    31
  datatype block = Paragraph of line list | List of {indent: int, kind: kind, body: block list}
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    32
  val read_lines: line list -> block list
61457
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
    33
  val read_antiquotes: Antiquote.text_antiquote list -> block list
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
    34
  val read_source: Input.source -> block list
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
    35
  val text_reports: Antiquote.text_antiquote list -> Position.report list
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    36
  val reports: block list -> Position.report list
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    37
end;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    38
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    39
structure Markdown: MARKDOWN =
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    40
struct
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    41
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    42
(* document lines *)
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    43
61457
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
    44
val is_control = member (op =) ["\\<^item>", "\\<^enum>", "\\<^descr>"];
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
    45
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    46
datatype kind = Itemize | Enumerate | Description;
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    47
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    48
fun print_kind Itemize = "itemize"
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    49
  | print_kind Enumerate = "enumerate"
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    50
  | print_kind Description = "description";
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    51
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    52
datatype line =
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    53
  Line of
61451
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
    54
   {source: Antiquote.text_antiquote list,
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    55
    is_empty: bool,
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
    56
    indent: int,
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
    57
    item: kind option,
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
    58
    item_pos: Position.T,
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
    59
    content: Antiquote.text_antiquote list};
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    60
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    61
val eof_line =
61451
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
    62
  Line {source = [Antiquote.Text [(Symbol.eof, Position.none)]],
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
    63
    is_empty = false, indent = 0, item = NONE, item_pos = Position.none, content = []};
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    64
61451
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
    65
fun line_source (Line {source, ...}) = source;
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
    66
fun line_is_empty (Line {is_empty, ...}) = is_empty;
61461
77c9643a6353 more explicit output of list items;
wenzelm
parents: 61460
diff changeset
    67
fun line_is_item (Line {item, ...}) = is_some item;
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    68
fun line_content (Line {content, ...}) = content;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    69
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    70
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    71
(* make line *)
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    72
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    73
local
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    74
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    75
fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    76
val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []);
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    77
61451
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
    78
fun check_blanks source =
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
    79
  (case bad_blanks source of
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    80
    [] => ()
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    81
  | (c, pos) :: _ =>
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    82
      error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos));
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    83
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    84
fun is_space ((s, _): Symbol_Pos.T) = s = Symbol.space;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    85
val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false);
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    86
61444
1fcdfc1a7e50 more document structure;
wenzelm
parents: 61443
diff changeset
    87
val scan_marker =
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    88
  Scan.many is_space -- Symbol_Pos.scan_pos --
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
    89
  Scan.option
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
    90
   (Symbol_Pos.$$ "\\<^item>" >> K Itemize ||
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
    91
    Symbol_Pos.$$ "\\<^enum>" >> K Enumerate ||
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
    92
    Symbol_Pos.$$ "\\<^descr>" >> K Description) --| Scan.many is_space
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
    93
  >> (fn ((sp, pos), item) => (length sp, item, if is_some item then pos else Position.none));
61442
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    94
61451
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
    95
fun read_marker (Antiquote.Text ss :: rest) =
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
    96
      (case Scan.finite Symbol_Pos.stopper scan_marker ss of
61451
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
    97
        (marker, []) => (marker, rest)
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
    98
      | (marker, ss') => (marker, Antiquote.Text ss' :: rest))
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
    99
  | read_marker source = ((0, NONE, Position.none), source);
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   100
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   101
in
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   102
61451
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
   103
fun make_line source =
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   104
  let
61451
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
   105
    val _ = check_blanks source;
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   106
    val ((indent, item, item_pos), content) = read_marker source;
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   107
  in
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   108
    Line {source = source, is_empty = is_empty source, indent = indent,
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   109
      item = item, item_pos = item_pos, content = content}
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   110
  end;
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   111
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   112
val empty_line = make_line [];
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   113
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   114
end;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   115
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   116
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   117
(* document blocks *)
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   118
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   119
datatype block =
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   120
  Paragraph of line list | List of {indent: int, kind: kind, body: block list};
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   121
61450
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   122
fun block_lines (Paragraph lines) = lines
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   123
  | block_lines (List {body, ...}) = maps block_lines body;
61450
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   124
61452
wenzelm
parents: 61451
diff changeset
   125
fun block_range (Paragraph lines) = Antiquote.range (maps line_content lines)
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   126
  | block_range (List {body, ...}) = Antiquote.range (maps line_source (maps block_lines body));
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   127
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   128
fun block_indent (List {indent, ...}) = indent
61460
wenzelm
parents: 61459
diff changeset
   129
  | block_indent (Paragraph (Line {indent, ...} :: _)) = indent
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   130
  | block_indent _ = 0;
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   131
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   132
fun block_list indent0 kind0 (List {indent, kind, body}) =
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   133
      if indent0 = indent andalso kind0 = kind then SOME body else NONE
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   134
  | block_list _ _ _ = NONE;
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   135
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   136
val is_list = fn List _ => true | _ => false;
61452
wenzelm
parents: 61451
diff changeset
   137
wenzelm
parents: 61451
diff changeset
   138
wenzelm
parents: 61451
diff changeset
   139
(* read document *)
wenzelm
parents: 61451
diff changeset
   140
wenzelm
parents: 61451
diff changeset
   141
local
61450
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   142
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   143
fun build (indent, item, rev_body) document =
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   144
  (case (item, document) of
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   145
    (SOME kind, block :: blocks) =>
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   146
      (case block_list indent kind block of
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   147
        SOME list => List {indent = indent, kind = kind, body = fold cons rev_body list} :: blocks
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   148
      | NONE =>
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   149
          if (if is_list block then indent < block_indent block else indent <= block_indent block)
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   150
          then build (indent, item, block :: rev_body) blocks
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   151
          else List {indent = indent, kind = kind, body = rev rev_body} :: document)
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   152
  | (SOME kind, []) => [List {indent = indent, kind = kind, body = rev rev_body}]
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   153
  | (NONE, _) => fold cons rev_body document);
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   154
61460
wenzelm
parents: 61459
diff changeset
   155
fun plain_line (line as Line {is_empty, item, ...}) =
wenzelm
parents: 61459
diff changeset
   156
  not is_empty andalso is_none item andalso line <> eof_line;
61444
1fcdfc1a7e50 more document structure;
wenzelm
parents: 61443
diff changeset
   157
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   158
val parse_paragraph =
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   159
  Scan.one (fn line => line <> eof_line) -- Scan.many plain_line >> (fn (line, lines) =>
61460
wenzelm
parents: 61459
diff changeset
   160
    let
wenzelm
parents: 61459
diff changeset
   161
      val Line {indent, item, ...} = line;
wenzelm
parents: 61459
diff changeset
   162
      val block = Paragraph (line :: lines);
wenzelm
parents: 61459
diff changeset
   163
    in (indent, item, [block]) end);
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
   164
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
   165
val parse_document =
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   166
  parse_paragraph ::: Scan.repeat (Scan.option (Scan.one line_is_empty) |-- parse_paragraph)
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   167
    >> (fn pars => fold_rev build pars []);
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   168
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   169
in
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   170
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   171
val read_lines =
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   172
  Scan.read (Scan.stopper (K eof_line) (fn line => line = eof_line))
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   173
    (Scan.repeat (Scan.many line_is_empty |-- parse_document) --| Scan.many line_is_empty) #>
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   174
  the_default [] #> flat;
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   175
61457
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
   176
val read_antiquotes = Antiquote.split_lines #> map make_line #> read_lines;
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
   177
val read_source = Antiquote.read #> read_antiquotes;
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   178
61457
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
   179
end;
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   180
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   181
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   182
(* PIDE reports *)
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   183
61457
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
   184
val text_reports =
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
   185
  maps (fn Antiquote.Text ss => [(#1 (Symbol_Pos.range ss), Markup.words)] | _ => []);
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
   186
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   187
local
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   188
61460
wenzelm
parents: 61459
diff changeset
   189
fun line_reports depth (Line {item_pos, content, ...}) =
wenzelm
parents: 61459
diff changeset
   190
  cons (item_pos, Markup.markdown_item depth) #> append (text_reports content);
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   191
61452
wenzelm
parents: 61451
diff changeset
   192
fun block_reports depth block =
wenzelm
parents: 61451
diff changeset
   193
  (case block of
wenzelm
parents: 61451
diff changeset
   194
    Paragraph lines =>
wenzelm
parents: 61451
diff changeset
   195
      cons (#1 (block_range block), Markup.markdown_paragraph) #>
61450
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   196
      fold (line_reports depth) lines
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   197
  | List {kind, body, ...} =>
61452
wenzelm
parents: 61451
diff changeset
   198
      cons (#1 (block_range block), Markup.markdown_list (print_kind kind)) #>
wenzelm
parents: 61451
diff changeset
   199
      fold (block_reports (depth + 1)) body);
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   200
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   201
in
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   202
61450
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   203
fun reports blocks =
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   204
  filter (Position.is_reported o #1) (fold (block_reports 0) blocks []);
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   205
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   206
end;
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   207
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   208
end;