src/Pure/Thy/markdown.ML
author wenzelm
Thu Oct 15 13:28:36 2015 +0200 (2015-10-15)
changeset 61446 9b09acfb7e06
parent 61445 31aadb15eda5
child 61448 25e40e78f6d4
permissions -rw-r--r--
proper nesting of adjacent lists;
wenzelm@61441
     1
(*  Title:      Pure/Thy/markdown.ML
wenzelm@61441
     2
    Author:     Makarius
wenzelm@61441
     3
wenzelm@61441
     4
Minimal support for Markdown documents (see also http://commonmark.org).
wenzelm@61441
     5
*)
wenzelm@61441
     6
wenzelm@61441
     7
signature MARKDOWN =
wenzelm@61441
     8
sig
wenzelm@61443
     9
  datatype kind = Itemize | Enumerate | Description
wenzelm@61445
    10
  type marker = {indent: int, kind: kind}
wenzelm@61441
    11
  type line
wenzelm@61443
    12
  val line_content: line -> Antiquote.text_antiquote list
wenzelm@61445
    13
  val make_line: Antiquote.text_antiquote list -> line
wenzelm@61445
    14
  val empty_line: line
wenzelm@61445
    15
  datatype block = Paragraph of line list | List of marker * block list
wenzelm@61445
    16
  val read_lines: line list -> block list
wenzelm@61445
    17
  val read: Input.source -> block list
wenzelm@61441
    18
end;
wenzelm@61441
    19
wenzelm@61441
    20
structure Markdown: MARKDOWN =
wenzelm@61441
    21
struct
wenzelm@61441
    22
wenzelm@61445
    23
(* document lines *)
wenzelm@61441
    24
wenzelm@61441
    25
datatype kind = Itemize | Enumerate | Description;
wenzelm@61445
    26
type marker = {indent: int, kind: kind};
wenzelm@61441
    27
wenzelm@61441
    28
datatype line =
wenzelm@61441
    29
  Line of
wenzelm@61441
    30
   {content: Antiquote.text_antiquote list,
wenzelm@61441
    31
    is_empty: bool,
wenzelm@61445
    32
    marker: (marker * Position.T) option};
wenzelm@61445
    33
wenzelm@61445
    34
val eof_line =
wenzelm@61445
    35
  Line {content = [Antiquote.Text [(Symbol.eof, Position.none)]],
wenzelm@61445
    36
    is_empty = false, marker = NONE};
wenzelm@61441
    37
wenzelm@61441
    38
fun line_content (Line {content, ...}) = content;
wenzelm@61441
    39
fun line_is_empty (Line {is_empty, ...}) = is_empty;
wenzelm@61441
    40
fun line_marker (Line {marker, ...}) = marker;
wenzelm@61441
    41
wenzelm@61443
    42
wenzelm@61443
    43
(* make line *)
wenzelm@61443
    44
wenzelm@61441
    45
local
wenzelm@61441
    46
wenzelm@61441
    47
fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space;
wenzelm@61441
    48
val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []);
wenzelm@61441
    49
wenzelm@61441
    50
fun check_blanks content =
wenzelm@61441
    51
  (case bad_blanks content of
wenzelm@61441
    52
    [] => ()
wenzelm@61441
    53
  | (c, pos) :: _ =>
wenzelm@61441
    54
      error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos));
wenzelm@61441
    55
wenzelm@61441
    56
fun is_space ((s, _): Symbol_Pos.T) = s = Symbol.space;
wenzelm@61441
    57
val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false);
wenzelm@61441
    58
wenzelm@61444
    59
val scan_marker =
wenzelm@61444
    60
  Scan.many is_space --
wenzelm@61445
    61
  (Symbol_Pos.$$ "\<^item>" >> K Itemize ||
wenzelm@61445
    62
   Symbol_Pos.$$ "\<^enum>" >> K Enumerate ||
wenzelm@61445
    63
   Symbol_Pos.$$ "\<^descr>" >> K Description) >> (fn (a, b) => {indent = length a, kind = b});
wenzelm@61442
    64
wenzelm@61444
    65
fun read_marker (Antiquote.Text ss :: _) =
wenzelm@61445
    66
      #1 (Scan.finite Symbol_Pos.stopper (Scan.option (scan_marker -- Symbol_Pos.scan_pos)) ss)
wenzelm@61444
    67
  | read_marker _ = NONE;
wenzelm@61441
    68
wenzelm@61441
    69
in
wenzelm@61441
    70
wenzelm@61441
    71
fun make_line content =
wenzelm@61441
    72
  let
wenzelm@61441
    73
    val _ = check_blanks content;
wenzelm@61444
    74
    val marker = read_marker content;
wenzelm@61444
    75
  in Line {content = content, is_empty = is_empty content, marker = marker} end;
wenzelm@61441
    76
wenzelm@61445
    77
val empty_line = make_line [];
wenzelm@61445
    78
wenzelm@61441
    79
end;
wenzelm@61441
    80
wenzelm@61441
    81
wenzelm@61445
    82
(* document blocks *)
wenzelm@61445
    83
wenzelm@61445
    84
datatype block = Paragraph of line list | List of marker * block list;
wenzelm@61445
    85
wenzelm@61445
    86
fun add_span (opt_marker, body) document =
wenzelm@61445
    87
  (case (opt_marker, document) of
wenzelm@61446
    88
    (SOME marker, (list as List (list_marker, list_body)) :: rest) =>
wenzelm@61446
    89
      if marker = list_marker then
wenzelm@61446
    90
        List (list_marker, body @ list_body) :: rest
wenzelm@61446
    91
      else if #indent marker < #indent list_marker then
wenzelm@61446
    92
        List (marker, body @ [list]) :: rest
wenzelm@61446
    93
      else
wenzelm@61446
    94
        List (marker, body) :: document
wenzelm@61445
    95
  | (SOME marker, _) => List (marker, body) :: document
wenzelm@61445
    96
  | (NONE, _) => body @ document);
wenzelm@61445
    97
wenzelm@61445
    98
wenzelm@61443
    99
(* read document *)
wenzelm@61441
   100
wenzelm@61441
   101
local
wenzelm@61441
   102
wenzelm@61442
   103
fun plain_line line =
wenzelm@61445
   104
  not (line_is_empty line) andalso is_none (line_marker line) andalso line <> eof_line;
wenzelm@61441
   105
wenzelm@61444
   106
val parse_paragraph = Scan.many1 plain_line >> Paragraph;
wenzelm@61444
   107
wenzelm@61441
   108
val parse_span =
wenzelm@61444
   109
  parse_paragraph >> (fn par => (NONE, [par])) ||
wenzelm@61444
   110
  Scan.one (is_some o line_marker) -- Scan.many plain_line --
wenzelm@61444
   111
    Scan.repeat (Scan.one line_is_empty |-- parse_paragraph) >>
wenzelm@61445
   112
      (fn ((line, lines), pars) =>
wenzelm@61445
   113
        (Option.map #1 (line_marker line), Paragraph (line :: lines) :: pars));
wenzelm@61443
   114
wenzelm@61443
   115
val parse_document =
wenzelm@61445
   116
  parse_span ::: Scan.repeat (Scan.option (Scan.one line_is_empty) |-- parse_span)
wenzelm@61445
   117
    >> (fn spans => fold_rev add_span spans []);
wenzelm@61441
   118
wenzelm@61441
   119
in
wenzelm@61441
   120
wenzelm@61445
   121
val read_lines =
wenzelm@61445
   122
  Scan.read (Scan.stopper (K eof_line) (fn line => line = eof_line))
wenzelm@61445
   123
    (Scan.repeat (Scan.many line_is_empty |-- parse_document) --| Scan.many line_is_empty) #>
wenzelm@61445
   124
  the_default [] #> flat;
wenzelm@61441
   125
wenzelm@61441
   126
end;
wenzelm@61441
   127
wenzelm@61445
   128
val read = Antiquote.read #> Antiquote.split_lines #> map make_line #> read_lines;
wenzelm@61445
   129
wenzelm@61441
   130
end;