src/Pure/Thy/markdown.ML
author wenzelm
Wed Oct 14 21:18:37 2015 +0200 (2015-10-14)
changeset 61444 1fcdfc1a7e50
parent 61443 78bbfadd1034
child 61445 31aadb15eda5
permissions -rw-r--r--
more document structure;
wenzelm@61441
     1
(*  Title:      Pure/Thy/markdown.ML
wenzelm@61441
     2
    Author:     Makarius
wenzelm@61441
     3
wenzelm@61441
     4
Minimal support for Markdown documents (see also http://commonmark.org).
wenzelm@61441
     5
*)
wenzelm@61441
     6
wenzelm@61441
     7
signature MARKDOWN =
wenzelm@61441
     8
sig
wenzelm@61443
     9
  datatype kind = Itemize | Enumerate | Description
wenzelm@61441
    10
  type line
wenzelm@61443
    11
  val line_content: line -> Antiquote.text_antiquote list
wenzelm@61444
    12
  datatype block = Paragraph of line list | List of kind * block list
wenzelm@61443
    13
  val read_document: Input.source -> block list
wenzelm@61441
    14
end;
wenzelm@61441
    15
wenzelm@61441
    16
structure Markdown: MARKDOWN =
wenzelm@61441
    17
struct
wenzelm@61441
    18
wenzelm@61443
    19
(* document structure *)
wenzelm@61441
    20
wenzelm@61441
    21
datatype kind = Itemize | Enumerate | Description;
wenzelm@61444
    22
type marker = {indent: int, kind: kind, pos: Position.T};
wenzelm@61441
    23
wenzelm@61441
    24
datatype line =
wenzelm@61441
    25
  Line of
wenzelm@61441
    26
   {content: Antiquote.text_antiquote list,
wenzelm@61441
    27
    is_empty: bool,
wenzelm@61444
    28
    marker: marker option};
wenzelm@61441
    29
wenzelm@61441
    30
fun line_content (Line {content, ...}) = content;
wenzelm@61441
    31
fun line_is_empty (Line {is_empty, ...}) = is_empty;
wenzelm@61441
    32
fun line_marker (Line {marker, ...}) = marker;
wenzelm@61441
    33
wenzelm@61444
    34
datatype block = Paragraph of line list | List of kind * block list;
wenzelm@61443
    35
wenzelm@61443
    36
wenzelm@61443
    37
(* make line *)
wenzelm@61443
    38
wenzelm@61441
    39
local
wenzelm@61441
    40
wenzelm@61441
    41
fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space;
wenzelm@61441
    42
val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []);
wenzelm@61441
    43
wenzelm@61441
    44
fun check_blanks content =
wenzelm@61441
    45
  (case bad_blanks content of
wenzelm@61441
    46
    [] => ()
wenzelm@61441
    47
  | (c, pos) :: _ =>
wenzelm@61441
    48
      error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos));
wenzelm@61441
    49
wenzelm@61441
    50
fun is_space ((s, _): Symbol_Pos.T) = s = Symbol.space;
wenzelm@61441
    51
val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false);
wenzelm@61441
    52
wenzelm@61444
    53
val scan_marker =
wenzelm@61444
    54
  Scan.many is_space --
wenzelm@61444
    55
    (Symbol_Pos.$$ "\<^item>" >> K Itemize ||
wenzelm@61444
    56
     Symbol_Pos.$$ "\<^enum>" >> K Enumerate ||
wenzelm@61444
    57
     Symbol_Pos.$$ "\<^descr>" >> K Description)
wenzelm@61444
    58
    -- Symbol_Pos.scan_pos
wenzelm@61444
    59
    >> (fn ((a, b), c) => ({indent = length a, kind = b, pos = c}: marker));
wenzelm@61442
    60
wenzelm@61444
    61
fun read_marker (Antiquote.Text ss :: _) =
wenzelm@61444
    62
      #1 (Scan.finite Symbol_Pos.stopper (Scan.option scan_marker) ss)
wenzelm@61444
    63
  | read_marker _ = NONE;
wenzelm@61441
    64
wenzelm@61441
    65
in
wenzelm@61441
    66
wenzelm@61441
    67
fun make_line content =
wenzelm@61441
    68
  let
wenzelm@61441
    69
    val _ = check_blanks content;
wenzelm@61444
    70
    val marker = read_marker content;
wenzelm@61444
    71
  in Line {content = content, is_empty = is_empty content, marker = marker} end;
wenzelm@61441
    72
wenzelm@61441
    73
end;
wenzelm@61441
    74
wenzelm@61441
    75
wenzelm@61443
    76
(* read document *)
wenzelm@61441
    77
wenzelm@61441
    78
local
wenzelm@61441
    79
wenzelm@61442
    80
val eof =
wenzelm@61442
    81
  Line {content = [Antiquote.Text [(Symbol.eof, Position.none)]],
wenzelm@61444
    82
    is_empty = false, marker = NONE};
wenzelm@61444
    83
wenzelm@61441
    84
val stopper = Scan.stopper (K eof) (fn line => line = eof);
wenzelm@61441
    85
wenzelm@61442
    86
fun plain_line line =
wenzelm@61442
    87
  not (line_is_empty line) andalso is_none (line_marker line) andalso line <> eof;
wenzelm@61441
    88
wenzelm@61444
    89
val parse_paragraph = Scan.many1 plain_line >> Paragraph;
wenzelm@61444
    90
wenzelm@61441
    91
val parse_span =
wenzelm@61444
    92
  parse_paragraph >> (fn par => (NONE, [par])) ||
wenzelm@61444
    93
  Scan.one (is_some o line_marker) -- Scan.many plain_line --
wenzelm@61444
    94
    Scan.repeat (Scan.one line_is_empty |-- parse_paragraph) >>
wenzelm@61444
    95
      (fn ((line, lines), pars) => ((line_marker line), Paragraph (line :: lines) :: pars));
wenzelm@61443
    96
wenzelm@61443
    97
val parse_document =
wenzelm@61444
    98
  parse_span ::: Scan.repeat (Scan.option (Scan.one line_is_empty) |-- parse_span) >> maps snd;
wenzelm@61443
    99
wenzelm@61443
   100
val parse_documents =
wenzelm@61444
   101
  Scan.repeat (Scan.many line_is_empty |-- parse_document) --| Scan.many line_is_empty >> flat;
wenzelm@61441
   102
wenzelm@61441
   103
in
wenzelm@61441
   104
wenzelm@61443
   105
val read_document =
wenzelm@61443
   106
  Antiquote.read #> Antiquote.split_lines #> map make_line #>
wenzelm@61443
   107
  Scan.read stopper parse_documents #> the_default [];
wenzelm@61441
   108
wenzelm@61441
   109
end;
wenzelm@61441
   110
wenzelm@61441
   111
end;