src/Pure/Thy/markdown.ML
author wenzelm
Fri, 07 May 2021 16:49:08 +0200
changeset 73647 a037f01aedab
parent 69207 ae2074acbaa8
permissions -rw-r--r--
tuned comments;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
     1
(*  Title:      Pure/Thy/markdown.ML
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
     3
61448
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
     4
Minimal support for Markdown documents (see also http://commonmark.org)
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
     5
that consist only of paragraphs and (nested) lists:
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
     6
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
     7
  * list items start with marker \<^item> (itemize), \<^enum> (enumerate), \<^descr> (description)
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
     8
  * adjacent list items with same indentation and same marker are grouped
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
     9
    into a single list
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    10
  * singleton blank lines separate paragraphs
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    11
  * multiple blank lines escape from the current list hierarchy
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    12
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    13
Notable differences to official Markdown:
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    14
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    15
  * indentation of list items needs to match exactly
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    16
  * indentation is unlimited (Markdown interprets 4 spaces as block quote)
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    17
  * list items always consist of paragraphs -- no notion of "tight" list
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    18
*)
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    19
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    20
signature MARKDOWN =
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    21
sig
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    22
  datatype kind = Itemize | Enumerate | Description
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    23
  val print_kind: kind -> string
61595
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
    24
  val is_control: Symbol.symbol -> bool
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    25
  type line
61451
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
    26
  val line_source: line -> Antiquote.text_antiquote list
61461
77c9643a6353 more explicit output of list items;
wenzelm
parents: 61460
diff changeset
    27
  val line_is_item: line -> bool
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    28
  val line_content: line -> Antiquote.text_antiquote list
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    29
  val make_line: Antiquote.text_antiquote list -> line
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    30
  val empty_line: line
64357
e10fa8afc96c tuned signature: avoid conflict with "paragraph" as section heading;
wenzelm
parents: 62804
diff changeset
    31
  datatype block = Par of line list | List of {indent: int, kind: kind, body: block list}
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    32
  val read_lines: line list -> block list
61457
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
    33
  val read_antiquotes: Antiquote.text_antiquote list -> block list
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
    34
  val read_source: Input.source -> block list
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
    35
  val text_reports: Antiquote.text_antiquote list -> Position.report list
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    36
  val reports: block list -> Position.report list
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    37
end;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    38
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    39
structure Markdown: MARKDOWN =
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    40
struct
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    41
61595
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
    42
(* item kinds *)
61457
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
    43
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    44
datatype kind = Itemize | Enumerate | Description;
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    45
67336
3ee6da378183 HTML output for Markdown elements;
wenzelm
parents: 67323
diff changeset
    46
fun print_kind Itemize = Markup.itemizeN
3ee6da378183 HTML output for Markdown elements;
wenzelm
parents: 67323
diff changeset
    47
  | print_kind Enumerate = Markup.enumerateN
3ee6da378183 HTML output for Markdown elements;
wenzelm
parents: 67323
diff changeset
    48
  | print_kind Description = Markup.descriptionN;
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    49
61595
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
    50
val kinds = [("item", Itemize), ("enum", Enumerate), ("descr", Description)];
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
    51
62529
8b7bdfc09f3b clarified treatment of fragments of Isabelle symbols during bootstrap;
wenzelm
parents: 61595
diff changeset
    52
val is_control = member (op =) ["\<^item>", "\<^enum>", "\<^descr>"];
61595
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
    53
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
    54
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
    55
(* document lines *)
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
    56
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    57
datatype line =
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    58
  Line of
61451
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
    59
   {source: Antiquote.text_antiquote list,
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    60
    is_empty: bool,
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
    61
    indent: int,
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
    62
    item: kind option,
67322
734a4e44b159 clarified terminology of "markdown_bullet";
wenzelm
parents: 64357
diff changeset
    63
    bullet_pos: Position.T,
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
    64
    content: Antiquote.text_antiquote list};
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    65
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    66
val eof_line =
61451
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
    67
  Line {source = [Antiquote.Text [(Symbol.eof, Position.none)]],
67322
734a4e44b159 clarified terminology of "markdown_bullet";
wenzelm
parents: 64357
diff changeset
    68
    is_empty = false, indent = 0, item = NONE, bullet_pos = Position.none, content = []};
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    69
61451
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
    70
fun line_source (Line {source, ...}) = source;
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
    71
fun line_is_empty (Line {is_empty, ...}) = is_empty;
61461
77c9643a6353 more explicit output of list items;
wenzelm
parents: 61460
diff changeset
    72
fun line_is_item (Line {item, ...}) = is_some item;
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    73
fun line_content (Line {content, ...}) = content;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    74
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    75
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    76
(* make line *)
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    77
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    78
local
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    79
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    80
fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    81
val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []);
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    82
61451
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
    83
fun check_blanks source =
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
    84
  (case bad_blanks source of
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    85
    [] => ()
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    86
  | (c, pos) :: _ =>
69207
ae2074acbaa8 clarified signature;
wenzelm
parents: 67571
diff changeset
    87
      error ("Bad blank character " ^ quote (ML_Syntax.print_symbol c) ^ Position.here pos));
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    88
62804
7b9c5416f30e tuned signature;
wenzelm
parents: 62529
diff changeset
    89
val is_space = Symbol.is_space o Symbol_Pos.symbol;
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    90
val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false);
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    91
61595
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
    92
fun strip_spaces (Antiquote.Text ss :: rest) =
67522
9e712280cc37 clarified take/drop/chop prefix/suffix;
wenzelm
parents: 67467
diff changeset
    93
      let val (sp, ss') = chop_prefix is_space ss
61595
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
    94
      in (length sp, if null ss' then rest else Antiquote.Text ss' :: rest) end
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
    95
  | strip_spaces source = (0, source);
61442
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    96
61595
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
    97
fun read_marker source =
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
    98
  let val (indent, source') = strip_spaces source in
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
    99
    (case source' of
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
   100
      (control as Antiquote.Control {name = (name, pos), body = [], ...}) :: rest =>
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
   101
        let
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
   102
          val item = AList.lookup (op =) kinds name;
67322
734a4e44b159 clarified terminology of "markdown_bullet";
wenzelm
parents: 64357
diff changeset
   103
          val bullet_pos = if is_some item then pos else Position.none;
61595
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
   104
          val (_, rest') = strip_spaces (if is_some item then rest else control :: rest);
67322
734a4e44b159 clarified terminology of "markdown_bullet";
wenzelm
parents: 64357
diff changeset
   105
        in ((indent, item, bullet_pos), rest') end
61595
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
   106
    | _ => ((indent, NONE, Position.none), source'))
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61461
diff changeset
   107
  end;
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   108
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   109
in
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   110
61451
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
   111
fun make_line source =
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   112
  let
61451
7f530057bc3c clarified line content: source without marker prefix;
wenzelm
parents: 61450
diff changeset
   113
    val _ = check_blanks source;
67322
734a4e44b159 clarified terminology of "markdown_bullet";
wenzelm
parents: 64357
diff changeset
   114
    val ((indent, item, bullet_pos), content) = read_marker source;
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   115
  in
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   116
    Line {source = source, is_empty = is_empty source, indent = indent,
67322
734a4e44b159 clarified terminology of "markdown_bullet";
wenzelm
parents: 64357
diff changeset
   117
      item = item, bullet_pos = bullet_pos, content = content}
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   118
  end;
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   119
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   120
val empty_line = make_line [];
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   121
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   122
end;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   123
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   124
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   125
(* document blocks *)
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   126
64357
e10fa8afc96c tuned signature: avoid conflict with "paragraph" as section heading;
wenzelm
parents: 62804
diff changeset
   127
datatype block = Par of line list | List of {indent: int, kind: kind, body: block list};
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   128
64357
e10fa8afc96c tuned signature: avoid conflict with "paragraph" as section heading;
wenzelm
parents: 62804
diff changeset
   129
fun block_lines (Par lines) = lines
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   130
  | block_lines (List {body, ...}) = maps block_lines body;
61450
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   131
67323
d02208cefbdb PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents: 67322
diff changeset
   132
fun block_source (Par lines) = maps line_source lines
d02208cefbdb PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents: 67322
diff changeset
   133
  | block_source (List {body, ...}) = maps line_source (maps block_lines body);
d02208cefbdb PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents: 67322
diff changeset
   134
67467
482b62d694ca clarified signature;
wenzelm
parents: 67336
diff changeset
   135
fun block_range (Par lines) = Antiquote.text_range (maps line_content lines)
482b62d694ca clarified signature;
wenzelm
parents: 67336
diff changeset
   136
  | block_range (List {body, ...}) = Antiquote.text_range (maps line_source (maps block_lines body));
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   137
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   138
fun block_indent (List {indent, ...}) = indent
64357
e10fa8afc96c tuned signature: avoid conflict with "paragraph" as section heading;
wenzelm
parents: 62804
diff changeset
   139
  | block_indent (Par (Line {indent, ...} :: _)) = indent
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   140
  | block_indent _ = 0;
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   141
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   142
fun block_list indent0 kind0 (List {indent, kind, body}) =
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   143
      if indent0 = indent andalso kind0 = kind then SOME body else NONE
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   144
  | block_list _ _ _ = NONE;
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   145
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   146
val is_list = fn List _ => true | _ => false;
61452
wenzelm
parents: 61451
diff changeset
   147
67323
d02208cefbdb PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents: 67322
diff changeset
   148
val is_item = fn Par (line :: _) => line_is_item line | _ => false;
d02208cefbdb PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents: 67322
diff changeset
   149
d02208cefbdb PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents: 67322
diff changeset
   150
fun list_items [] = []
d02208cefbdb PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents: 67322
diff changeset
   151
  | list_items (item :: rest) =
67522
9e712280cc37 clarified take/drop/chop prefix/suffix;
wenzelm
parents: 67467
diff changeset
   152
      let val (item_rest, rest') = chop_prefix (not o is_item) rest;
67323
d02208cefbdb PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents: 67322
diff changeset
   153
      in (item :: item_rest) :: list_items rest' end;
d02208cefbdb PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents: 67322
diff changeset
   154
61452
wenzelm
parents: 61451
diff changeset
   155
wenzelm
parents: 61451
diff changeset
   156
(* read document *)
wenzelm
parents: 61451
diff changeset
   157
wenzelm
parents: 61451
diff changeset
   158
local
61450
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   159
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   160
fun build (indent, item, rev_body) document =
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   161
  (case (item, document) of
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   162
    (SOME kind, block :: blocks) =>
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   163
      (case block_list indent kind block of
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   164
        SOME list => List {indent = indent, kind = kind, body = fold cons rev_body list} :: blocks
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   165
      | NONE =>
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   166
          if (if is_list block then indent < block_indent block else indent <= block_indent block)
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   167
          then build (indent, item, block :: rev_body) blocks
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   168
          else List {indent = indent, kind = kind, body = rev rev_body} :: document)
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   169
  | (SOME kind, []) => [List {indent = indent, kind = kind, body = rev rev_body}]
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   170
  | (NONE, _) => fold cons rev_body document);
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   171
61460
wenzelm
parents: 61459
diff changeset
   172
fun plain_line (line as Line {is_empty, item, ...}) =
wenzelm
parents: 61459
diff changeset
   173
  not is_empty andalso is_none item andalso line <> eof_line;
61444
1fcdfc1a7e50 more document structure;
wenzelm
parents: 61443
diff changeset
   174
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   175
val parse_paragraph =
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   176
  Scan.one (fn line => line <> eof_line) -- Scan.many plain_line >> (fn (line, lines) =>
61460
wenzelm
parents: 61459
diff changeset
   177
    let
wenzelm
parents: 61459
diff changeset
   178
      val Line {indent, item, ...} = line;
64357
e10fa8afc96c tuned signature: avoid conflict with "paragraph" as section heading;
wenzelm
parents: 62804
diff changeset
   179
      val block = Par (line :: lines);
61460
wenzelm
parents: 61459
diff changeset
   180
    in (indent, item, [block]) end);
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
   181
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
   182
val parse_document =
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   183
  parse_paragraph ::: Scan.repeat (Scan.option (Scan.one line_is_empty) |-- parse_paragraph)
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   184
    >> (fn pars => fold_rev build pars []);
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   185
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   186
in
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   187
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   188
val read_lines =
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   189
  Scan.read (Scan.stopper (K eof_line) (fn line => line = eof_line))
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   190
    (Scan.repeat (Scan.many line_is_empty |-- parse_document) --| Scan.many line_is_empty) #>
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   191
  the_default [] #> flat;
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   192
61457
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
   193
val read_antiquotes = Antiquote.split_lines #> map make_line #> read_lines;
67571
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
   194
val read_source = Antiquote.read_comments #> read_antiquotes;
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   195
61457
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
   196
end;
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   197
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   198
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   199
(* PIDE reports *)
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   200
61457
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
   201
val text_reports =
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
   202
  maps (fn Antiquote.Text ss => [(#1 (Symbol_Pos.range ss), Markup.words)] | _ => []);
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61454
diff changeset
   203
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   204
local
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   205
67323
d02208cefbdb PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents: 67322
diff changeset
   206
val block_pos = #1 o block_range;
67467
482b62d694ca clarified signature;
wenzelm
parents: 67336
diff changeset
   207
val item_pos = #1 o Antiquote.text_range o maps block_source;
67323
d02208cefbdb PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents: 67322
diff changeset
   208
67322
734a4e44b159 clarified terminology of "markdown_bullet";
wenzelm
parents: 64357
diff changeset
   209
fun line_reports depth (Line {bullet_pos, content, ...}) =
734a4e44b159 clarified terminology of "markdown_bullet";
wenzelm
parents: 64357
diff changeset
   210
  cons (bullet_pos, Markup.markdown_bullet depth) #> append (text_reports content);
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   211
67323
d02208cefbdb PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents: 67322
diff changeset
   212
fun item_reports blocks =
d02208cefbdb PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents: 67322
diff changeset
   213
  cons (item_pos blocks, Markup.markdown_item);
d02208cefbdb PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents: 67322
diff changeset
   214
61452
wenzelm
parents: 61451
diff changeset
   215
fun block_reports depth block =
wenzelm
parents: 61451
diff changeset
   216
  (case block of
64357
e10fa8afc96c tuned signature: avoid conflict with "paragraph" as section heading;
wenzelm
parents: 62804
diff changeset
   217
    Par lines =>
67323
d02208cefbdb PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents: 67322
diff changeset
   218
      cons (block_pos block, Markup.markdown_paragraph) #>
61450
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   219
      fold (line_reports depth) lines
61459
5f2ddeb15c06 clarified nesting of paragraphs: indentation is taken into account more uniformly;
wenzelm
parents: 61457
diff changeset
   220
  | List {kind, body, ...} =>
67323
d02208cefbdb PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents: 67322
diff changeset
   221
      cons (block_pos block, Markup.markdown_list (print_kind kind)) #>
d02208cefbdb PIDE markup for Markdown items (which may consist of multiple paragraphs or lists);
wenzelm
parents: 67322
diff changeset
   222
      fold item_reports (list_items body) #>
61452
wenzelm
parents: 61451
diff changeset
   223
      fold (block_reports (depth + 1)) body);
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   224
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   225
in
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   226
61450
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   227
fun reports blocks =
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   228
  filter (Position.is_reported o #1) (fold (block_reports 0) blocks []);
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   229
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   230
end;
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   231
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   232
end;