src/Pure/Thy/markdown.ML
author wenzelm
Thu Oct 15 15:06:03 2015 +0200 (2015-10-15)
changeset 61449 4f31f79cf2d1
parent 61448 25e40e78f6d4
child 61450 239a04ec2d4c
permissions -rw-r--r--
report Markdown document structure;
wenzelm@61441
     1
(*  Title:      Pure/Thy/markdown.ML
wenzelm@61441
     2
    Author:     Makarius
wenzelm@61441
     3
wenzelm@61448
     4
Minimal support for Markdown documents (see also http://commonmark.org)
wenzelm@61448
     5
that consist only of paragraphs and (nested) lists:
wenzelm@61448
     6
wenzelm@61448
     7
  * list items start with marker \<^item> (itemize), \<^enum> (enumerate), \<^descr> (description)
wenzelm@61448
     8
  * adjacent list items with same indentation and same marker are grouped
wenzelm@61448
     9
    into a single list
wenzelm@61448
    10
  * singleton blank lines separate paragraphs
wenzelm@61448
    11
  * multiple blank lines escape from the current list hierarchy
wenzelm@61448
    12
wenzelm@61448
    13
Notable differences to official Markdown:
wenzelm@61448
    14
wenzelm@61448
    15
  * indentation of list items needs to match exactly
wenzelm@61448
    16
  * indentation is unlimited (Markdown interprets 4 spaces as block quote)
wenzelm@61448
    17
  * list items always consist of paragraphs -- no notion of "tight" list
wenzelm@61441
    18
*)
wenzelm@61441
    19
wenzelm@61441
    20
signature MARKDOWN =
wenzelm@61441
    21
sig
wenzelm@61443
    22
  datatype kind = Itemize | Enumerate | Description
wenzelm@61449
    23
  val print_kind: kind -> string
wenzelm@61445
    24
  type marker = {indent: int, kind: kind}
wenzelm@61441
    25
  type line
wenzelm@61443
    26
  val line_content: line -> Antiquote.text_antiquote list
wenzelm@61445
    27
  val make_line: Antiquote.text_antiquote list -> line
wenzelm@61445
    28
  val empty_line: line
wenzelm@61445
    29
  datatype block = Paragraph of line list | List of marker * block list
wenzelm@61445
    30
  val read_lines: line list -> block list
wenzelm@61445
    31
  val read: Input.source -> block list
wenzelm@61449
    32
  val reports: block list -> Position.report list
wenzelm@61441
    33
end;
wenzelm@61441
    34
wenzelm@61441
    35
structure Markdown: MARKDOWN =
wenzelm@61441
    36
struct
wenzelm@61441
    37
wenzelm@61445
    38
(* document lines *)
wenzelm@61441
    39
wenzelm@61441
    40
datatype kind = Itemize | Enumerate | Description;
wenzelm@61449
    41
wenzelm@61449
    42
fun print_kind Itemize = "itemize"
wenzelm@61449
    43
  | print_kind Enumerate = "enumerate"
wenzelm@61449
    44
  | print_kind Description = "description";
wenzelm@61449
    45
wenzelm@61445
    46
type marker = {indent: int, kind: kind};
wenzelm@61441
    47
wenzelm@61441
    48
datatype line =
wenzelm@61441
    49
  Line of
wenzelm@61441
    50
   {content: Antiquote.text_antiquote list,
wenzelm@61441
    51
    is_empty: bool,
wenzelm@61445
    52
    marker: (marker * Position.T) option};
wenzelm@61445
    53
wenzelm@61445
    54
val eof_line =
wenzelm@61445
    55
  Line {content = [Antiquote.Text [(Symbol.eof, Position.none)]],
wenzelm@61445
    56
    is_empty = false, marker = NONE};
wenzelm@61441
    57
wenzelm@61441
    58
fun line_content (Line {content, ...}) = content;
wenzelm@61441
    59
fun line_is_empty (Line {is_empty, ...}) = is_empty;
wenzelm@61441
    60
fun line_marker (Line {marker, ...}) = marker;
wenzelm@61441
    61
wenzelm@61443
    62
wenzelm@61443
    63
(* make line *)
wenzelm@61443
    64
wenzelm@61441
    65
local
wenzelm@61441
    66
wenzelm@61441
    67
fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space;
wenzelm@61441
    68
val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []);
wenzelm@61441
    69
wenzelm@61441
    70
fun check_blanks content =
wenzelm@61441
    71
  (case bad_blanks content of
wenzelm@61441
    72
    [] => ()
wenzelm@61441
    73
  | (c, pos) :: _ =>
wenzelm@61441
    74
      error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos));
wenzelm@61441
    75
wenzelm@61441
    76
fun is_space ((s, _): Symbol_Pos.T) = s = Symbol.space;
wenzelm@61441
    77
val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false);
wenzelm@61441
    78
wenzelm@61444
    79
val scan_marker =
wenzelm@61449
    80
  Scan.many is_space -- Symbol_Pos.scan_pos --
wenzelm@61445
    81
  (Symbol_Pos.$$ "\<^item>" >> K Itemize ||
wenzelm@61445
    82
   Symbol_Pos.$$ "\<^enum>" >> K Enumerate ||
wenzelm@61449
    83
   Symbol_Pos.$$ "\<^descr>" >> K Description)
wenzelm@61449
    84
  >> (fn ((spaces, pos), kind) => ({indent = length spaces, kind = kind}, pos));
wenzelm@61442
    85
wenzelm@61444
    86
fun read_marker (Antiquote.Text ss :: _) =
wenzelm@61449
    87
      #1 (Scan.finite Symbol_Pos.stopper (Scan.option scan_marker) ss)
wenzelm@61444
    88
  | read_marker _ = NONE;
wenzelm@61441
    89
wenzelm@61441
    90
in
wenzelm@61441
    91
wenzelm@61441
    92
fun make_line content =
wenzelm@61441
    93
  let
wenzelm@61441
    94
    val _ = check_blanks content;
wenzelm@61444
    95
    val marker = read_marker content;
wenzelm@61444
    96
  in Line {content = content, is_empty = is_empty content, marker = marker} end;
wenzelm@61441
    97
wenzelm@61445
    98
val empty_line = make_line [];
wenzelm@61445
    99
wenzelm@61441
   100
end;
wenzelm@61441
   101
wenzelm@61441
   102
wenzelm@61445
   103
(* document blocks *)
wenzelm@61445
   104
wenzelm@61445
   105
datatype block = Paragraph of line list | List of marker * block list;
wenzelm@61445
   106
wenzelm@61445
   107
fun add_span (opt_marker, body) document =
wenzelm@61445
   108
  (case (opt_marker, document) of
wenzelm@61446
   109
    (SOME marker, (list as List (list_marker, list_body)) :: rest) =>
wenzelm@61446
   110
      if marker = list_marker then
wenzelm@61446
   111
        List (list_marker, body @ list_body) :: rest
wenzelm@61446
   112
      else if #indent marker < #indent list_marker then
wenzelm@61446
   113
        List (marker, body @ [list]) :: rest
wenzelm@61446
   114
      else
wenzelm@61446
   115
        List (marker, body) :: document
wenzelm@61445
   116
  | (SOME marker, _) => List (marker, body) :: document
wenzelm@61445
   117
  | (NONE, _) => body @ document);
wenzelm@61445
   118
wenzelm@61445
   119
wenzelm@61443
   120
(* read document *)
wenzelm@61441
   121
wenzelm@61441
   122
local
wenzelm@61441
   123
wenzelm@61442
   124
fun plain_line line =
wenzelm@61445
   125
  not (line_is_empty line) andalso is_none (line_marker line) andalso line <> eof_line;
wenzelm@61441
   126
wenzelm@61444
   127
val parse_paragraph = Scan.many1 plain_line >> Paragraph;
wenzelm@61444
   128
wenzelm@61441
   129
val parse_span =
wenzelm@61444
   130
  parse_paragraph >> (fn par => (NONE, [par])) ||
wenzelm@61444
   131
  Scan.one (is_some o line_marker) -- Scan.many plain_line --
wenzelm@61444
   132
    Scan.repeat (Scan.one line_is_empty |-- parse_paragraph) >>
wenzelm@61445
   133
      (fn ((line, lines), pars) =>
wenzelm@61445
   134
        (Option.map #1 (line_marker line), Paragraph (line :: lines) :: pars));
wenzelm@61443
   135
wenzelm@61443
   136
val parse_document =
wenzelm@61445
   137
  parse_span ::: Scan.repeat (Scan.option (Scan.one line_is_empty) |-- parse_span)
wenzelm@61445
   138
    >> (fn spans => fold_rev add_span spans []);
wenzelm@61441
   139
wenzelm@61441
   140
in
wenzelm@61441
   141
wenzelm@61445
   142
val read_lines =
wenzelm@61445
   143
  Scan.read (Scan.stopper (K eof_line) (fn line => line = eof_line))
wenzelm@61445
   144
    (Scan.repeat (Scan.many line_is_empty |-- parse_document) --| Scan.many line_is_empty) #>
wenzelm@61445
   145
  the_default [] #> flat;
wenzelm@61441
   146
wenzelm@61441
   147
end;
wenzelm@61441
   148
wenzelm@61445
   149
val read = Antiquote.read #> Antiquote.split_lines #> map make_line #> read_lines;
wenzelm@61445
   150
wenzelm@61449
   151
wenzelm@61449
   152
(* PIDE reports *)
wenzelm@61449
   153
wenzelm@61449
   154
local
wenzelm@61449
   155
wenzelm@61449
   156
fun line_reports depth (Line {marker = SOME (_, pos), ...}) =
wenzelm@61449
   157
      Position.is_reported pos ? cons (pos, Markup.markdown_item depth)
wenzelm@61449
   158
  | line_reports _ _ = I;
wenzelm@61449
   159
wenzelm@61449
   160
fun block_reports depth (Paragraph lines) = fold (line_reports depth) lines
wenzelm@61449
   161
  | block_reports depth (List (_, body)) = fold (block_reports (depth + 1)) body;
wenzelm@61449
   162
wenzelm@61449
   163
in
wenzelm@61449
   164
wenzelm@61449
   165
fun reports blocks = fold (block_reports 0) blocks [];
wenzelm@61449
   166
wenzelm@61441
   167
end;
wenzelm@61449
   168
wenzelm@61449
   169
end;