src/Pure/Thy/markdown.ML
author wenzelm
Fri Nov 06 23:31:50 2015 +0100 (2015-11-06)
changeset 61595 3591274c607e
parent 61461 77c9643a6353
child 62529 8b7bdfc09f3b
permissions -rw-r--r--
more formal treatment of control symbols;
wenzelm@61441
     1
(*  Title:      Pure/Thy/markdown.ML
wenzelm@61441
     2
    Author:     Makarius
wenzelm@61441
     3
wenzelm@61448
     4
Minimal support for Markdown documents (see also http://commonmark.org)
wenzelm@61448
     5
that consist only of paragraphs and (nested) lists:
wenzelm@61448
     6
wenzelm@61448
     7
  * list items start with marker \<^item> (itemize), \<^enum> (enumerate), \<^descr> (description)
wenzelm@61448
     8
  * adjacent list items with same indentation and same marker are grouped
wenzelm@61448
     9
    into a single list
wenzelm@61448
    10
  * singleton blank lines separate paragraphs
wenzelm@61448
    11
  * multiple blank lines escape from the current list hierarchy
wenzelm@61448
    12
wenzelm@61448
    13
Notable differences to official Markdown:
wenzelm@61448
    14
wenzelm@61448
    15
  * indentation of list items needs to match exactly
wenzelm@61448
    16
  * indentation is unlimited (Markdown interprets 4 spaces as block quote)
wenzelm@61448
    17
  * list items always consist of paragraphs -- no notion of "tight" list
wenzelm@61441
    18
*)
wenzelm@61441
    19
wenzelm@61441
    20
signature MARKDOWN =
wenzelm@61441
    21
sig
wenzelm@61443
    22
  datatype kind = Itemize | Enumerate | Description
wenzelm@61449
    23
  val print_kind: kind -> string
wenzelm@61595
    24
  val is_control: Symbol.symbol -> bool
wenzelm@61441
    25
  type line
wenzelm@61451
    26
  val line_source: line -> Antiquote.text_antiquote list
wenzelm@61461
    27
  val line_is_item: line -> bool
wenzelm@61443
    28
  val line_content: line -> Antiquote.text_antiquote list
wenzelm@61445
    29
  val make_line: Antiquote.text_antiquote list -> line
wenzelm@61445
    30
  val empty_line: line
wenzelm@61459
    31
  datatype block = Paragraph of line list | List of {indent: int, kind: kind, body: block list}
wenzelm@61445
    32
  val read_lines: line list -> block list
wenzelm@61457
    33
  val read_antiquotes: Antiquote.text_antiquote list -> block list
wenzelm@61457
    34
  val read_source: Input.source -> block list
wenzelm@61457
    35
  val text_reports: Antiquote.text_antiquote list -> Position.report list
wenzelm@61449
    36
  val reports: block list -> Position.report list
wenzelm@61441
    37
end;
wenzelm@61441
    38
wenzelm@61441
    39
structure Markdown: MARKDOWN =
wenzelm@61441
    40
struct
wenzelm@61441
    41
wenzelm@61595
    42
(* item kinds *)
wenzelm@61457
    43
wenzelm@61441
    44
datatype kind = Itemize | Enumerate | Description;
wenzelm@61449
    45
wenzelm@61449
    46
fun print_kind Itemize = "itemize"
wenzelm@61449
    47
  | print_kind Enumerate = "enumerate"
wenzelm@61449
    48
  | print_kind Description = "description";
wenzelm@61449
    49
wenzelm@61595
    50
val kinds = [("item", Itemize), ("enum", Enumerate), ("descr", Description)];
wenzelm@61595
    51
wenzelm@61595
    52
val is_control = member (op =) ["\\<^item>", "\\<^enum>", "\\<^descr>"];
wenzelm@61595
    53
wenzelm@61595
    54
wenzelm@61595
    55
(* document lines *)
wenzelm@61595
    56
wenzelm@61441
    57
datatype line =
wenzelm@61441
    58
  Line of
wenzelm@61451
    59
   {source: Antiquote.text_antiquote list,
wenzelm@61441
    60
    is_empty: bool,
wenzelm@61459
    61
    indent: int,
wenzelm@61459
    62
    item: kind option,
wenzelm@61459
    63
    item_pos: Position.T,
wenzelm@61459
    64
    content: Antiquote.text_antiquote list};
wenzelm@61445
    65
wenzelm@61445
    66
val eof_line =
wenzelm@61451
    67
  Line {source = [Antiquote.Text [(Symbol.eof, Position.none)]],
wenzelm@61459
    68
    is_empty = false, indent = 0, item = NONE, item_pos = Position.none, content = []};
wenzelm@61441
    69
wenzelm@61451
    70
fun line_source (Line {source, ...}) = source;
wenzelm@61459
    71
fun line_is_empty (Line {is_empty, ...}) = is_empty;
wenzelm@61461
    72
fun line_is_item (Line {item, ...}) = is_some item;
wenzelm@61441
    73
fun line_content (Line {content, ...}) = content;
wenzelm@61441
    74
wenzelm@61443
    75
wenzelm@61443
    76
(* make line *)
wenzelm@61443
    77
wenzelm@61441
    78
local
wenzelm@61441
    79
wenzelm@61441
    80
fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space;
wenzelm@61441
    81
val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []);
wenzelm@61441
    82
wenzelm@61451
    83
fun check_blanks source =
wenzelm@61451
    84
  (case bad_blanks source of
wenzelm@61441
    85
    [] => ()
wenzelm@61441
    86
  | (c, pos) :: _ =>
wenzelm@61441
    87
      error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos));
wenzelm@61441
    88
wenzelm@61441
    89
fun is_space ((s, _): Symbol_Pos.T) = s = Symbol.space;
wenzelm@61441
    90
val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false);
wenzelm@61441
    91
wenzelm@61595
    92
fun strip_spaces (Antiquote.Text ss :: rest) =
wenzelm@61595
    93
      let val (sp, ss') = take_prefix is_space ss
wenzelm@61595
    94
      in (length sp, if null ss' then rest else Antiquote.Text ss' :: rest) end
wenzelm@61595
    95
  | strip_spaces source = (0, source);
wenzelm@61442
    96
wenzelm@61595
    97
fun read_marker source =
wenzelm@61595
    98
  let val (indent, source') = strip_spaces source in
wenzelm@61595
    99
    (case source' of
wenzelm@61595
   100
      (control as Antiquote.Control {name = (name, pos), body = [], ...}) :: rest =>
wenzelm@61595
   101
        let
wenzelm@61595
   102
          val item = AList.lookup (op =) kinds name;
wenzelm@61595
   103
          val item_pos = if is_some item then pos else Position.none;
wenzelm@61595
   104
          val (_, rest') = strip_spaces (if is_some item then rest else control :: rest);
wenzelm@61595
   105
        in ((indent, item, item_pos), rest') end
wenzelm@61595
   106
    | _ => ((indent, NONE, Position.none), source'))
wenzelm@61595
   107
  end;
wenzelm@61441
   108
wenzelm@61441
   109
in
wenzelm@61441
   110
wenzelm@61451
   111
fun make_line source =
wenzelm@61441
   112
  let
wenzelm@61451
   113
    val _ = check_blanks source;
wenzelm@61459
   114
    val ((indent, item, item_pos), content) = read_marker source;
wenzelm@61459
   115
  in
wenzelm@61459
   116
    Line {source = source, is_empty = is_empty source, indent = indent,
wenzelm@61459
   117
      item = item, item_pos = item_pos, content = content}
wenzelm@61459
   118
  end;
wenzelm@61441
   119
wenzelm@61445
   120
val empty_line = make_line [];
wenzelm@61445
   121
wenzelm@61441
   122
end;
wenzelm@61441
   123
wenzelm@61441
   124
wenzelm@61445
   125
(* document blocks *)
wenzelm@61445
   126
wenzelm@61459
   127
datatype block =
wenzelm@61459
   128
  Paragraph of line list | List of {indent: int, kind: kind, body: block list};
wenzelm@61445
   129
wenzelm@61450
   130
fun block_lines (Paragraph lines) = lines
wenzelm@61459
   131
  | block_lines (List {body, ...}) = maps block_lines body;
wenzelm@61450
   132
wenzelm@61452
   133
fun block_range (Paragraph lines) = Antiquote.range (maps line_content lines)
wenzelm@61459
   134
  | block_range (List {body, ...}) = Antiquote.range (maps line_source (maps block_lines body));
wenzelm@61459
   135
wenzelm@61459
   136
fun block_indent (List {indent, ...}) = indent
wenzelm@61460
   137
  | block_indent (Paragraph (Line {indent, ...} :: _)) = indent
wenzelm@61459
   138
  | block_indent _ = 0;
wenzelm@61459
   139
wenzelm@61459
   140
fun block_list indent0 kind0 (List {indent, kind, body}) =
wenzelm@61459
   141
      if indent0 = indent andalso kind0 = kind then SOME body else NONE
wenzelm@61459
   142
  | block_list _ _ _ = NONE;
wenzelm@61459
   143
wenzelm@61459
   144
val is_list = fn List _ => true | _ => false;
wenzelm@61452
   145
wenzelm@61452
   146
wenzelm@61452
   147
(* read document *)
wenzelm@61452
   148
wenzelm@61452
   149
local
wenzelm@61450
   150
wenzelm@61459
   151
fun build (indent, item, rev_body) document =
wenzelm@61459
   152
  (case (item, document) of
wenzelm@61459
   153
    (SOME kind, block :: blocks) =>
wenzelm@61459
   154
      (case block_list indent kind block of
wenzelm@61459
   155
        SOME list => List {indent = indent, kind = kind, body = fold cons rev_body list} :: blocks
wenzelm@61459
   156
      | NONE =>
wenzelm@61459
   157
          if (if is_list block then indent < block_indent block else indent <= block_indent block)
wenzelm@61459
   158
          then build (indent, item, block :: rev_body) blocks
wenzelm@61459
   159
          else List {indent = indent, kind = kind, body = rev rev_body} :: document)
wenzelm@61459
   160
  | (SOME kind, []) => [List {indent = indent, kind = kind, body = rev rev_body}]
wenzelm@61459
   161
  | (NONE, _) => fold cons rev_body document);
wenzelm@61445
   162
wenzelm@61460
   163
fun plain_line (line as Line {is_empty, item, ...}) =
wenzelm@61460
   164
  not is_empty andalso is_none item andalso line <> eof_line;
wenzelm@61444
   165
wenzelm@61459
   166
val parse_paragraph =
wenzelm@61459
   167
  Scan.one (fn line => line <> eof_line) -- Scan.many plain_line >> (fn (line, lines) =>
wenzelm@61460
   168
    let
wenzelm@61460
   169
      val Line {indent, item, ...} = line;
wenzelm@61460
   170
      val block = Paragraph (line :: lines);
wenzelm@61460
   171
    in (indent, item, [block]) end);
wenzelm@61443
   172
wenzelm@61443
   173
val parse_document =
wenzelm@61459
   174
  parse_paragraph ::: Scan.repeat (Scan.option (Scan.one line_is_empty) |-- parse_paragraph)
wenzelm@61459
   175
    >> (fn pars => fold_rev build pars []);
wenzelm@61441
   176
wenzelm@61441
   177
in
wenzelm@61441
   178
wenzelm@61445
   179
val read_lines =
wenzelm@61445
   180
  Scan.read (Scan.stopper (K eof_line) (fn line => line = eof_line))
wenzelm@61445
   181
    (Scan.repeat (Scan.many line_is_empty |-- parse_document) --| Scan.many line_is_empty) #>
wenzelm@61445
   182
  the_default [] #> flat;
wenzelm@61441
   183
wenzelm@61457
   184
val read_antiquotes = Antiquote.split_lines #> map make_line #> read_lines;
wenzelm@61457
   185
val read_source = Antiquote.read #> read_antiquotes;
wenzelm@61441
   186
wenzelm@61457
   187
end;
wenzelm@61445
   188
wenzelm@61449
   189
wenzelm@61449
   190
(* PIDE reports *)
wenzelm@61449
   191
wenzelm@61457
   192
val text_reports =
wenzelm@61457
   193
  maps (fn Antiquote.Text ss => [(#1 (Symbol_Pos.range ss), Markup.words)] | _ => []);
wenzelm@61457
   194
wenzelm@61449
   195
local
wenzelm@61449
   196
wenzelm@61460
   197
fun line_reports depth (Line {item_pos, content, ...}) =
wenzelm@61460
   198
  cons (item_pos, Markup.markdown_item depth) #> append (text_reports content);
wenzelm@61449
   199
wenzelm@61452
   200
fun block_reports depth block =
wenzelm@61452
   201
  (case block of
wenzelm@61452
   202
    Paragraph lines =>
wenzelm@61452
   203
      cons (#1 (block_range block), Markup.markdown_paragraph) #>
wenzelm@61450
   204
      fold (line_reports depth) lines
wenzelm@61459
   205
  | List {kind, body, ...} =>
wenzelm@61452
   206
      cons (#1 (block_range block), Markup.markdown_list (print_kind kind)) #>
wenzelm@61452
   207
      fold (block_reports (depth + 1)) body);
wenzelm@61449
   208
wenzelm@61449
   209
in
wenzelm@61449
   210
wenzelm@61450
   211
fun reports blocks =
wenzelm@61450
   212
  filter (Position.is_reported o #1) (fold (block_reports 0) blocks []);
wenzelm@61449
   213
wenzelm@61441
   214
end;
wenzelm@61449
   215
wenzelm@61449
   216
end;