src/Pure/Thy/markdown.ML
author wenzelm
Wed, 14 Oct 2015 19:44:43 +0200
changeset 61443 78bbfadd1034
parent 61442 467ebb937294
child 61444 1fcdfc1a7e50
permissions -rw-r--r--
more document structure;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
     1
(*  Title:      Pure/Thy/markdown.ML
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
     3
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
     4
Minimal support for Markdown documents (see also http://commonmark.org).
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
     5
*)
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
     6
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
     7
signature MARKDOWN =
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
     8
sig
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
     9
  datatype kind = Itemize | Enumerate | Description
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    10
  type line
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    11
  val line_content: line -> Antiquote.text_antiquote list
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    12
  datatype block = Paragraph of line list | List of kind * block list list
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    13
  val read_document: Input.source -> block list
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    14
end;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    15
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    16
structure Markdown: MARKDOWN =
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    17
struct
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    18
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    19
(* document structure *)
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    20
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    21
datatype kind = Itemize | Enumerate | Description;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    22
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    23
datatype line =
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    24
  Line of
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    25
   {content: Antiquote.text_antiquote list,
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    26
    is_empty: bool,
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    27
    indent: int,
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    28
    marker: (kind * Position.T) option};
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    29
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    30
fun line_content (Line {content, ...}) = content;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    31
fun line_is_empty (Line {is_empty, ...}) = is_empty;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    32
fun line_indent (Line {indent, ...}) = indent;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    33
fun line_marker (Line {marker, ...}) = marker;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    34
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    35
datatype block = Paragraph of line list | List of kind * block list list;
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    36
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    37
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    38
(* make line *)
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    39
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    40
local
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    41
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    42
fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    43
val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []);
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    44
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    45
fun check_blanks content =
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    46
  (case bad_blanks content of
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    47
    [] => ()
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    48
  | (c, pos) :: _ =>
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    49
      error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos));
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    50
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    51
fun is_space ((s, _): Symbol_Pos.T) = s = Symbol.space;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    52
val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false);
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    53
61442
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    54
val scan_prefix =
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    55
  (Scan.many is_space >> length) --
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    56
  Scan.option
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    57
    ((Symbol_Pos.$$ "\<^item>" >> K Itemize ||
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    58
      Symbol_Pos.$$ "\<^enum>" >> K Enumerate ||
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    59
      Symbol_Pos.$$ "\<^descr>" >> K Description) -- Symbol_Pos.scan_pos);
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    60
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    61
fun scan_line (Antiquote.Text ss :: _) =
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    62
      the_default (0, NONE) (Scan.read Symbol_Pos.stopper scan_prefix ss)
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    63
  | scan_line _ = (0, NONE);
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    64
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    65
in
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    66
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    67
fun make_line content =
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    68
  let
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    69
    val _ = check_blanks content;
61442
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    70
    val (indent, marker) = scan_line content;
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    71
  in Line {content = content, is_empty = is_empty content, indent = indent, marker = marker} end;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    72
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    73
end;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    74
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    75
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    76
(* make blocks *)
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    77
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    78
fun make_blocks spans = map Paragraph spans;
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    79
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    80
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    81
(* read document *)
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    82
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    83
local
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    84
61442
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    85
val eof =
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    86
  Line {content = [Antiquote.Text [(Symbol.eof, Position.none)]],
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    87
    is_empty = false, indent = 0, marker = NONE};
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    88
val stopper = Scan.stopper (K eof) (fn line => line = eof);
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    89
61442
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    90
fun plain_line line =
467ebb937294 clarified;
wenzelm
parents: 61441
diff changeset
    91
  not (line_is_empty line) andalso is_none (line_marker line) andalso line <> eof;
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    92
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
    93
val parse_span =
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    94
  Scan.many1 plain_line || Scan.one (is_some o line_marker) -- Scan.many plain_line >> op ::;
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    95
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    96
val parse_document =
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    97
  parse_span ::: Scan.repeat (Scan.one line_is_empty |-- parse_span) >> make_blocks;
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    98
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
    99
val parse_documents =
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
   100
  (Scan.many line_is_empty |-- parse_document) :::
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
   101
    (Scan.repeat (Scan.many1 line_is_empty |-- parse_document) --| Scan.many line_is_empty)
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
   102
  >> flat;
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   103
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   104
in
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   105
61443
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
   106
val read_document =
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
   107
  Antiquote.read #> Antiquote.split_lines #> map make_line #>
78bbfadd1034 more document structure;
wenzelm
parents: 61442
diff changeset
   108
  Scan.read stopper parse_documents #> the_default [];
61441
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   109
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   110
end;
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   111
20ff1d5c74e1 minimal support for Markdown documents;
wenzelm
parents:
diff changeset
   112
end;