src/Pure/Thy/markdown.ML
author wenzelm
Thu, 15 Oct 2015 16:12:38 +0200
changeset 61450 239a04ec2d4c
parent 61449 4f31f79cf2d1
child 61451 7f530057bc3c
permissions -rw-r--r--
more markup;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
     1
(*  Title:      Pure/Thy/markdown.ML
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
     3
61448
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
     4
Minimal support for Markdown documents (see also http://commonmark.org)
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
     5
that consist only of paragraphs and (nested) lists:
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
     6
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
     7
  * list items start with marker \<^item> (itemize), \<^enum> (enumerate), \<^descr> (description)
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
     8
  * adjacent list items with same indentation and same marker are grouped
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
     9
    into a single list
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    10
  * singleton blank lines separate paragraphs
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    11
  * multiple blank lines escape from the current list hierarchy
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    12
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    13
Notable differences to official Markdown:
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    14
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    15
  * indentation of list items needs to match exactly
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    16
  * indentation is unlimited (Markdown interprets 4 spaces as block quote)
25e40e78f6d4 more comments;
wenzelm
parents: 61446
diff changeset
    17
  * list items always consist of paragraphs -- no notion of "tight" list
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    18
*)
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    19
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    20
signature MARKDOWN =
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    21
sig
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    22
  datatype kind = Itemize | Enumerate | Description
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    23
  val print_kind: kind -> string
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    24
  type marker = {indent: int, kind: kind}
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    25
  type line
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    26
  val line_content: line -> Antiquote.text_antiquote list
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    27
  val make_line: Antiquote.text_antiquote list -> line
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    28
  val empty_line: line
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    29
  datatype block = Paragraph of line list | List of marker * block list
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    30
  val read_lines: line list -> block list
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    31
  val read: Input.source -> block list
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    32
  val reports: block list -> Position.report list
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    33
end;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    34
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    35
structure Markdown: MARKDOWN =
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    36
struct
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    37
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    38
(* document lines *)
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    39
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    40
datatype kind = Itemize | Enumerate | Description;
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    41
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    42
fun print_kind Itemize = "itemize"
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    43
  | print_kind Enumerate = "enumerate"
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    44
  | print_kind Description = "description";
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    45
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    46
type marker = {indent: int, kind: kind};
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    47
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    48
datatype line =
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    49
  Line of
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    50
   {content: Antiquote.text_antiquote list,
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    51
    is_empty: bool,
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    52
    marker: (marker * Position.T) option};
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    53
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    54
val eof_line =
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    55
  Line {content = [Antiquote.Text [(Symbol.eof, Position.none)]],
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    56
    is_empty = false, marker = NONE};
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    57
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    58
fun line_content (Line {content, ...}) = content;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    59
fun line_is_empty (Line {is_empty, ...}) = is_empty;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    60
fun line_marker (Line {marker, ...}) = marker;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    61
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    62
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    63
(* make line *)
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    64
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    65
local
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    66
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    67
fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    68
val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []);
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    69
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    70
fun check_blanks content =
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    71
  (case bad_blanks content of
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    72
    [] => ()
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    73
  | (c, pos) :: _ =>
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    74
      error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos));
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    75
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    76
fun is_space ((s, _): Symbol_Pos.T) = s = Symbol.space;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    77
val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false);
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    78
61444
1fcdfc1a7e50 more document structure;
wenzelm
parents: 61443
diff changeset
    79
val scan_marker =
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    80
  Scan.many is_space -- Symbol_Pos.scan_pos --
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    81
  (Symbol_Pos.$$ "\<^item>" >> K Itemize ||
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    82
   Symbol_Pos.$$ "\<^enum>" >> K Enumerate ||
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    83
   Symbol_Pos.$$ "\<^descr>" >> K Description)
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    84
  >> (fn ((spaces, pos), kind) => ({indent = length spaces, kind = kind}, pos));
61442
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    85
61444
1fcdfc1a7e50 more document structure;
wenzelm
parents: 61443
diff changeset
    86
fun read_marker (Antiquote.Text ss :: _) =
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
    87
      #1 (Scan.finite Symbol_Pos.stopper (Scan.option scan_marker) ss)
61444
1fcdfc1a7e50 more document structure;
wenzelm
parents: 61443
diff changeset
    88
  | read_marker _ = NONE;
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    89
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    90
in
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    91
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    92
fun make_line content =
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    93
  let
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    94
    val _ = check_blanks content;
61444
1fcdfc1a7e50 more document structure;
wenzelm
parents: 61443
diff changeset
    95
    val marker = read_marker content;
1fcdfc1a7e50 more document structure;
wenzelm
parents: 61443
diff changeset
    96
  in Line {content = content, is_empty = is_empty content, marker = marker} end;
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    97
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    98
val empty_line = make_line [];
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
    99
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   100
end;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   101
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   102
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   103
(* document blocks *)
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   104
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   105
datatype block = Paragraph of line list | List of marker * block list;
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   106
61450
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   107
fun block_lines (Paragraph lines) = lines
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   108
  | block_lines (List (_, blocks)) = maps block_lines blocks;
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   109
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   110
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   111
fun add_span (opt_marker, body) document =
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   112
  (case (opt_marker, document) of
61446
9b09acfb7e06 proper nesting of adjacent lists;
wenzelm
parents: 61445
diff changeset
   113
    (SOME marker, (list as List (list_marker, list_body)) :: rest) =>
9b09acfb7e06 proper nesting of adjacent lists;
wenzelm
parents: 61445
diff changeset
   114
      if marker = list_marker then
9b09acfb7e06 proper nesting of adjacent lists;
wenzelm
parents: 61445
diff changeset
   115
        List (list_marker, body @ list_body) :: rest
9b09acfb7e06 proper nesting of adjacent lists;
wenzelm
parents: 61445
diff changeset
   116
      else if #indent marker < #indent list_marker then
9b09acfb7e06 proper nesting of adjacent lists;
wenzelm
parents: 61445
diff changeset
   117
        List (marker, body @ [list]) :: rest
9b09acfb7e06 proper nesting of adjacent lists;
wenzelm
parents: 61445
diff changeset
   118
      else
9b09acfb7e06 proper nesting of adjacent lists;
wenzelm
parents: 61445
diff changeset
   119
        List (marker, body) :: document
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   120
  | (SOME marker, _) => List (marker, body) :: document
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   121
  | (NONE, _) => body @ document);
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   122
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   123
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
   124
(* read document *)
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   125
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   126
local
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   127
61442
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
   128
fun plain_line line =
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   129
  not (line_is_empty line) andalso is_none (line_marker line) andalso line <> eof_line;
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   130
61444
1fcdfc1a7e50 more document structure;
wenzelm
parents: 61443
diff changeset
   131
val parse_paragraph = Scan.many1 plain_line >> Paragraph;
1fcdfc1a7e50 more document structure;
wenzelm
parents: 61443
diff changeset
   132
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   133
val parse_span =
61444
1fcdfc1a7e50 more document structure;
wenzelm
parents: 61443
diff changeset
   134
  parse_paragraph >> (fn par => (NONE, [par])) ||
1fcdfc1a7e50 more document structure;
wenzelm
parents: 61443
diff changeset
   135
  Scan.one (is_some o line_marker) -- Scan.many plain_line --
1fcdfc1a7e50 more document structure;
wenzelm
parents: 61443
diff changeset
   136
    Scan.repeat (Scan.one line_is_empty |-- parse_paragraph) >>
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   137
      (fn ((line, lines), pars) =>
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   138
        (Option.map #1 (line_marker line), Paragraph (line :: lines) :: pars));
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
   139
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
   140
val parse_document =
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   141
  parse_span ::: Scan.repeat (Scan.option (Scan.one line_is_empty) |-- parse_span)
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   142
    >> (fn spans => fold_rev add_span spans []);
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   143
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   144
in
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   145
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   146
val read_lines =
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   147
  Scan.read (Scan.stopper (K eof_line) (fn line => line = eof_line))
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   148
    (Scan.repeat (Scan.many line_is_empty |-- parse_document) --| Scan.many line_is_empty) #>
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   149
  the_default [] #> flat;
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   150
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   151
end;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   152
61445
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   153
val read = Antiquote.read #> Antiquote.split_lines #> map make_line #> read_lines;
31aadb15eda5 more document structure;
wenzelm
parents: 61444
diff changeset
   154
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   155
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   156
(* PIDE reports *)
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   157
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   158
local
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   159
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   160
fun line_reports depth (Line {marker = SOME (_, pos), ...}) =
61450
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   161
      cons (pos, Markup.markdown_item depth)
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   162
  | line_reports _ _ = I;
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   163
61450
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   164
val lines_pos = #1 o Antiquote.range o maps line_content;
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   165
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   166
fun block_reports depth (Paragraph lines) =
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   167
      cons (lines_pos lines, Markup.markdown_paragraph) #>
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   168
      fold (line_reports depth) lines
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   169
  | block_reports depth (List ({kind, ...}, body)) =
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   170
      cons (lines_pos (maps block_lines body), Markup.markdown_list (print_kind kind)) #>
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   171
      fold (block_reports (depth + 1)) body;
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   172
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   173
in
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   174
61450
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   175
fun reports blocks =
239a04ec2d4c more markup;
wenzelm
parents: 61449
diff changeset
   176
  filter (Position.is_reported o #1) (fold (block_reports 0) blocks []);
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   177
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   178
end;
61449
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   179
4f31f79cf2d1 report Markdown document structure;
wenzelm
parents: 61448
diff changeset
   180
end;