src/Pure/Thy/markdown.ML
author wenzelm
Fri Oct 16 10:11:20 2015 +0200 (2015-10-16)
changeset 61457 3e21699bb83b
parent 61454 c86286ae9fe5
child 61459 5f2ddeb15c06
permissions -rw-r--r--
clarified Antiquote.antiq_reports;
Thy_Output.output_text: support for markdown (inactive);
eliminared Thy_Output.check_text -- uniform use of Thy_Output.output_text;
wenzelm@61441
     1
(*  Title:      Pure/Thy/markdown.ML
wenzelm@61441
     2
    Author:     Makarius
wenzelm@61441
     3
wenzelm@61448
     4
Minimal support for Markdown documents (see also http://commonmark.org)
wenzelm@61448
     5
that consist only of paragraphs and (nested) lists:
wenzelm@61448
     6
wenzelm@61448
     7
  * list items start with marker \<^item> (itemize), \<^enum> (enumerate), \<^descr> (description)
wenzelm@61448
     8
  * adjacent list items with same indentation and same marker are grouped
wenzelm@61448
     9
    into a single list
wenzelm@61448
    10
  * singleton blank lines separate paragraphs
wenzelm@61448
    11
  * multiple blank lines escape from the current list hierarchy
wenzelm@61448
    12
wenzelm@61448
    13
Notable differences to official Markdown:
wenzelm@61448
    14
wenzelm@61448
    15
  * indentation of list items needs to match exactly
wenzelm@61448
    16
  * indentation is unlimited (Markdown interprets 4 spaces as block quote)
wenzelm@61448
    17
  * list items always consist of paragraphs -- no notion of "tight" list
wenzelm@61441
    18
*)
wenzelm@61441
    19
wenzelm@61441
    20
signature MARKDOWN =
wenzelm@61441
    21
sig
wenzelm@61457
    22
  val is_control: Symbol.symbol -> bool
wenzelm@61443
    23
  datatype kind = Itemize | Enumerate | Description
wenzelm@61449
    24
  val print_kind: kind -> string
wenzelm@61445
    25
  type marker = {indent: int, kind: kind}
wenzelm@61441
    26
  type line
wenzelm@61451
    27
  val line_source: line -> Antiquote.text_antiquote list
wenzelm@61443
    28
  val line_content: line -> Antiquote.text_antiquote list
wenzelm@61445
    29
  val make_line: Antiquote.text_antiquote list -> line
wenzelm@61445
    30
  val empty_line: line
wenzelm@61445
    31
  datatype block = Paragraph of line list | List of marker * block list
wenzelm@61445
    32
  val read_lines: line list -> block list
wenzelm@61457
    33
  val read_antiquotes: Antiquote.text_antiquote list -> block list
wenzelm@61457
    34
  val read_source: Input.source -> block list
wenzelm@61457
    35
  val text_reports: Antiquote.text_antiquote list -> Position.report list
wenzelm@61449
    36
  val reports: block list -> Position.report list
wenzelm@61441
    37
end;
wenzelm@61441
    38
wenzelm@61441
    39
structure Markdown: MARKDOWN =
wenzelm@61441
    40
struct
wenzelm@61441
    41
wenzelm@61445
    42
(* document lines *)
wenzelm@61441
    43
wenzelm@61457
    44
val is_control = member (op =) ["\\<^item>", "\\<^enum>", "\\<^descr>"];
wenzelm@61457
    45
wenzelm@61441
    46
datatype kind = Itemize | Enumerate | Description;
wenzelm@61449
    47
wenzelm@61449
    48
fun print_kind Itemize = "itemize"
wenzelm@61449
    49
  | print_kind Enumerate = "enumerate"
wenzelm@61449
    50
  | print_kind Description = "description";
wenzelm@61449
    51
wenzelm@61445
    52
type marker = {indent: int, kind: kind};
wenzelm@61441
    53
wenzelm@61441
    54
datatype line =
wenzelm@61441
    55
  Line of
wenzelm@61451
    56
   {source: Antiquote.text_antiquote list,
wenzelm@61451
    57
    content: Antiquote.text_antiquote list,
wenzelm@61441
    58
    is_empty: bool,
wenzelm@61445
    59
    marker: (marker * Position.T) option};
wenzelm@61445
    60
wenzelm@61445
    61
val eof_line =
wenzelm@61451
    62
  Line {source = [Antiquote.Text [(Symbol.eof, Position.none)]],
wenzelm@61451
    63
    content = [], is_empty = false, marker = NONE};
wenzelm@61441
    64
wenzelm@61451
    65
fun line_source (Line {source, ...}) = source;
wenzelm@61441
    66
fun line_content (Line {content, ...}) = content;
wenzelm@61441
    67
fun line_is_empty (Line {is_empty, ...}) = is_empty;
wenzelm@61441
    68
fun line_marker (Line {marker, ...}) = marker;
wenzelm@61441
    69
wenzelm@61443
    70
wenzelm@61443
    71
(* make line *)
wenzelm@61443
    72
wenzelm@61441
    73
local
wenzelm@61441
    74
wenzelm@61441
    75
fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space;
wenzelm@61441
    76
val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []);
wenzelm@61441
    77
wenzelm@61451
    78
fun check_blanks source =
wenzelm@61451
    79
  (case bad_blanks source of
wenzelm@61441
    80
    [] => ()
wenzelm@61441
    81
  | (c, pos) :: _ =>
wenzelm@61441
    82
      error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos));
wenzelm@61441
    83
wenzelm@61441
    84
fun is_space ((s, _): Symbol_Pos.T) = s = Symbol.space;
wenzelm@61441
    85
val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false);
wenzelm@61441
    86
wenzelm@61444
    87
val scan_marker =
wenzelm@61449
    88
  Scan.many is_space -- Symbol_Pos.scan_pos --
wenzelm@61454
    89
  (Symbol_Pos.$$ "\\<^item>" >> K Itemize ||
wenzelm@61454
    90
   Symbol_Pos.$$ "\\<^enum>" >> K Enumerate ||
wenzelm@61454
    91
   Symbol_Pos.$$ "\\<^descr>" >> K Description)
wenzelm@61449
    92
  >> (fn ((spaces, pos), kind) => ({indent = length spaces, kind = kind}, pos));
wenzelm@61442
    93
wenzelm@61451
    94
fun read_marker (Antiquote.Text ss :: rest) =
wenzelm@61451
    95
      (case Scan.finite Symbol_Pos.stopper (Scan.option scan_marker --| Scan.many is_space) ss of
wenzelm@61451
    96
        (marker, []) => (marker, rest)
wenzelm@61451
    97
      | (marker, ss') => (marker, Antiquote.Text ss' :: rest))
wenzelm@61451
    98
  | read_marker source = (NONE, source);
wenzelm@61441
    99
wenzelm@61441
   100
in
wenzelm@61441
   101
wenzelm@61451
   102
fun make_line source =
wenzelm@61441
   103
  let
wenzelm@61451
   104
    val _ = check_blanks source;
wenzelm@61451
   105
    val (marker, content) = read_marker source;
wenzelm@61451
   106
  in Line {source = source, content = content, is_empty = is_empty source, marker = marker} end;
wenzelm@61441
   107
wenzelm@61445
   108
val empty_line = make_line [];
wenzelm@61445
   109
wenzelm@61441
   110
end;
wenzelm@61441
   111
wenzelm@61441
   112
wenzelm@61445
   113
(* document blocks *)
wenzelm@61445
   114
wenzelm@61445
   115
datatype block = Paragraph of line list | List of marker * block list;
wenzelm@61445
   116
wenzelm@61450
   117
fun block_lines (Paragraph lines) = lines
wenzelm@61450
   118
  | block_lines (List (_, blocks)) = maps block_lines blocks;
wenzelm@61450
   119
wenzelm@61452
   120
fun block_range (Paragraph lines) = Antiquote.range (maps line_content lines)
wenzelm@61452
   121
  | block_range (List (_, blocks)) = Antiquote.range (maps line_source (maps block_lines blocks));
wenzelm@61452
   122
wenzelm@61452
   123
wenzelm@61452
   124
(* read document *)
wenzelm@61452
   125
wenzelm@61452
   126
local
wenzelm@61450
   127
wenzelm@61445
   128
fun add_span (opt_marker, body) document =
wenzelm@61445
   129
  (case (opt_marker, document) of
wenzelm@61446
   130
    (SOME marker, (list as List (list_marker, list_body)) :: rest) =>
wenzelm@61446
   131
      if marker = list_marker then
wenzelm@61446
   132
        List (list_marker, body @ list_body) :: rest
wenzelm@61446
   133
      else if #indent marker < #indent list_marker then
wenzelm@61453
   134
        add_span (opt_marker, body @ [list]) rest
wenzelm@61446
   135
      else
wenzelm@61446
   136
        List (marker, body) :: document
wenzelm@61445
   137
  | (SOME marker, _) => List (marker, body) :: document
wenzelm@61445
   138
  | (NONE, _) => body @ document);
wenzelm@61445
   139
wenzelm@61442
   140
fun plain_line line =
wenzelm@61445
   141
  not (line_is_empty line) andalso is_none (line_marker line) andalso line <> eof_line;
wenzelm@61441
   142
wenzelm@61444
   143
val parse_paragraph = Scan.many1 plain_line >> Paragraph;
wenzelm@61444
   144
wenzelm@61441
   145
val parse_span =
wenzelm@61444
   146
  parse_paragraph >> (fn par => (NONE, [par])) ||
wenzelm@61444
   147
  Scan.one (is_some o line_marker) -- Scan.many plain_line --
wenzelm@61444
   148
    Scan.repeat (Scan.one line_is_empty |-- parse_paragraph) >>
wenzelm@61445
   149
      (fn ((line, lines), pars) =>
wenzelm@61445
   150
        (Option.map #1 (line_marker line), Paragraph (line :: lines) :: pars));
wenzelm@61443
   151
wenzelm@61443
   152
val parse_document =
wenzelm@61445
   153
  parse_span ::: Scan.repeat (Scan.option (Scan.one line_is_empty) |-- parse_span)
wenzelm@61445
   154
    >> (fn spans => fold_rev add_span spans []);
wenzelm@61441
   155
wenzelm@61441
   156
in
wenzelm@61441
   157
wenzelm@61445
   158
val read_lines =
wenzelm@61445
   159
  Scan.read (Scan.stopper (K eof_line) (fn line => line = eof_line))
wenzelm@61445
   160
    (Scan.repeat (Scan.many line_is_empty |-- parse_document) --| Scan.many line_is_empty) #>
wenzelm@61445
   161
  the_default [] #> flat;
wenzelm@61441
   162
wenzelm@61457
   163
val read_antiquotes = Antiquote.split_lines #> map make_line #> read_lines;
wenzelm@61457
   164
val read_source = Antiquote.read #> read_antiquotes;
wenzelm@61441
   165
wenzelm@61457
   166
end;
wenzelm@61445
   167
wenzelm@61449
   168
wenzelm@61449
   169
(* PIDE reports *)
wenzelm@61449
   170
wenzelm@61457
   171
val text_reports =
wenzelm@61457
   172
  maps (fn Antiquote.Text ss => [(#1 (Symbol_Pos.range ss), Markup.words)] | _ => []);
wenzelm@61457
   173
wenzelm@61449
   174
local
wenzelm@61449
   175
wenzelm@61457
   176
fun line_reports depth (Line {marker = SOME (_, pos), content, ...}) =
wenzelm@61457
   177
      cons (pos, Markup.markdown_item depth) #>
wenzelm@61457
   178
      append (text_reports content)
wenzelm@61449
   179
  | line_reports _ _ = I;
wenzelm@61449
   180
wenzelm@61452
   181
fun block_reports depth block =
wenzelm@61452
   182
  (case block of
wenzelm@61452
   183
    Paragraph lines =>
wenzelm@61452
   184
      cons (#1 (block_range block), Markup.markdown_paragraph) #>
wenzelm@61450
   185
      fold (line_reports depth) lines
wenzelm@61452
   186
  | List ({kind, ...}, body) =>
wenzelm@61452
   187
      cons (#1 (block_range block), Markup.markdown_list (print_kind kind)) #>
wenzelm@61452
   188
      fold (block_reports (depth + 1)) body);
wenzelm@61449
   189
wenzelm@61449
   190
in
wenzelm@61449
   191
wenzelm@61450
   192
fun reports blocks =
wenzelm@61450
   193
  filter (Position.is_reported o #1) (fold (block_reports 0) blocks []);
wenzelm@61449
   194
wenzelm@61441
   195
end;
wenzelm@61449
   196
wenzelm@61449
   197
end;