src/Pure/Thy/markdown.ML
author wenzelm
Wed Oct 14 17:24:21 2015 +0200 (2015-10-14)
changeset 61441 20ff1d5c74e1
child 61442 467ebb937294
permissions -rw-r--r--
minimal support for Markdown documents;
wenzelm@61441
     1
(*  Title:      Pure/Thy/markdown.ML
wenzelm@61441
     2
    Author:     Makarius
wenzelm@61441
     3
wenzelm@61441
     4
Minimal support for Markdown documents (see also http://commonmark.org).
wenzelm@61441
     5
*)
wenzelm@61441
     6
wenzelm@61441
     7
signature MARKDOWN =
wenzelm@61441
     8
sig
wenzelm@61441
     9
  type line
wenzelm@61441
    10
  val read: Input.source -> line list list
wenzelm@61441
    11
end;
wenzelm@61441
    12
wenzelm@61441
    13
structure Markdown: MARKDOWN =
wenzelm@61441
    14
struct
wenzelm@61441
    15
wenzelm@61441
    16
(* line with optional item marker *)
wenzelm@61441
    17
wenzelm@61441
    18
datatype kind = Itemize | Enumerate | Description;
wenzelm@61441
    19
wenzelm@61441
    20
datatype line =
wenzelm@61441
    21
  Line of
wenzelm@61441
    22
   {content: Antiquote.text_antiquote list,
wenzelm@61441
    23
    is_empty: bool,
wenzelm@61441
    24
    indent: int,
wenzelm@61441
    25
    marker: (kind * Position.T) option};
wenzelm@61441
    26
wenzelm@61441
    27
fun line_content (Line {content, ...}) = content;
wenzelm@61441
    28
fun line_is_empty (Line {is_empty, ...}) = is_empty;
wenzelm@61441
    29
fun line_indent (Line {indent, ...}) = indent;
wenzelm@61441
    30
fun line_marker (Line {marker, ...}) = marker;
wenzelm@61441
    31
wenzelm@61441
    32
local
wenzelm@61441
    33
wenzelm@61441
    34
fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space;
wenzelm@61441
    35
val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []);
wenzelm@61441
    36
wenzelm@61441
    37
fun check_blanks content =
wenzelm@61441
    38
  (case bad_blanks content of
wenzelm@61441
    39
    [] => ()
wenzelm@61441
    40
  | (c, pos) :: _ =>
wenzelm@61441
    41
      error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos));
wenzelm@61441
    42
wenzelm@61441
    43
fun is_space ((s, _): Symbol_Pos.T) = s = Symbol.space;
wenzelm@61441
    44
val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false);
wenzelm@61441
    45
wenzelm@61441
    46
fun line_kind (Antiquote.Text ss :: _) =
wenzelm@61441
    47
      let
wenzelm@61441
    48
        val (spaces, rest) = take_prefix is_space ss;
wenzelm@61441
    49
        fun make_marker kind =
wenzelm@61441
    50
          (case rest of
wenzelm@61441
    51
            [(_, pos)] => (kind, pos)
wenzelm@61441
    52
          | (_, pos) :: (" ", _) :: _ => (kind, pos)
wenzelm@61441
    53
          | (_, pos) :: _ => error ("Missing space after item marker" ^ Position.here pos));
wenzelm@61441
    54
        val marker =
wenzelm@61441
    55
          (case rest of
wenzelm@61441
    56
            ("\<^item>", _) :: _ => SOME (make_marker Itemize)
wenzelm@61441
    57
          | ("\<^enum>", _) :: _ => SOME (make_marker Enumerate)
wenzelm@61441
    58
          | ("\<^descr>", _) :: _ => SOME (make_marker Description)
wenzelm@61441
    59
          | _ => NONE);
wenzelm@61441
    60
      in (length spaces, marker) end
wenzelm@61441
    61
  | line_kind _ = (0, NONE);
wenzelm@61441
    62
wenzelm@61441
    63
in
wenzelm@61441
    64
wenzelm@61441
    65
fun make_line content =
wenzelm@61441
    66
  let
wenzelm@61441
    67
    val _ = check_blanks content;
wenzelm@61441
    68
    val (indent, marker) = line_kind content;
wenzelm@61441
    69
  in Line {content = content, is_empty = is_empty content, indent = indent, marker = marker} end;
wenzelm@61441
    70
wenzelm@61441
    71
end;
wenzelm@61441
    72
wenzelm@61441
    73
wenzelm@61441
    74
(* spans of related lines *)
wenzelm@61441
    75
wenzelm@61441
    76
local
wenzelm@61441
    77
wenzelm@61441
    78
val eof = make_line [Antiquote.Text [(Symbol.eof, Position.none)]];
wenzelm@61441
    79
val stopper = Scan.stopper (K eof) (fn line => line = eof);
wenzelm@61441
    80
wenzelm@61441
    81
fun item_line line = is_some (line_marker line);
wenzelm@61441
    82
fun plain_line line = is_none (line_marker line) andalso line <> eof;
wenzelm@61441
    83
wenzelm@61441
    84
val parse_span =
wenzelm@61441
    85
  Scan.one item_line -- Scan.many plain_line >> op :: ||
wenzelm@61441
    86
  Scan.many1 plain_line ||
wenzelm@61441
    87
  Scan.many1 line_is_empty;
wenzelm@61441
    88
wenzelm@61441
    89
in
wenzelm@61441
    90
wenzelm@61441
    91
fun read_spans lines =
wenzelm@61441
    92
  the_default [] (Scan.read stopper (Scan.repeat parse_span) lines);
wenzelm@61441
    93
wenzelm@61441
    94
end;
wenzelm@61441
    95
wenzelm@61441
    96
wenzelm@61441
    97
(* document structure *)
wenzelm@61441
    98
wenzelm@61441
    99
fun read input =
wenzelm@61441
   100
  Antiquote.read input
wenzelm@61441
   101
  |> Antiquote.split_lines
wenzelm@61441
   102
  |> map make_line
wenzelm@61441
   103
  |> read_spans;
wenzelm@61441
   104
wenzelm@61441
   105
end;