src/Pure/Thy/markdown.ML
author wenzelm
Thu Oct 15 13:48:47 2015 +0200 (2015-10-15)
changeset 61448 25e40e78f6d4
parent 61446 9b09acfb7e06
child 61449 4f31f79cf2d1
permissions -rw-r--r--
more comments;
     1 (*  Title:      Pure/Thy/markdown.ML
     2     Author:     Makarius
     3 
     4 Minimal support for Markdown documents (see also http://commonmark.org)
     5 that consist only of paragraphs and (nested) lists:
     6 
     7   * list items start with marker \<^item> (itemize), \<^enum> (enumerate), \<^descr> (description)
     8   * adjacent list items with same indentation and same marker are grouped
     9     into a single list
    10   * singleton blank lines separate paragraphs
    11   * multiple blank lines escape from the current list hierarchy
    12 
    13 Notable differences to official Markdown:
    14 
    15   * indentation of list items needs to match exactly
    16   * indentation is unlimited (Markdown interprets 4 spaces as block quote)
    17   * list items always consist of paragraphs -- no notion of "tight" list
    18 *)
    19 
    20 signature MARKDOWN =
    21 sig
    22   datatype kind = Itemize | Enumerate | Description
    23   type marker = {indent: int, kind: kind}
    24   type line
    25   val line_content: line -> Antiquote.text_antiquote list
    26   val make_line: Antiquote.text_antiquote list -> line
    27   val empty_line: line
    28   datatype block = Paragraph of line list | List of marker * block list
    29   val read_lines: line list -> block list
    30   val read: Input.source -> block list
    31 end;
    32 
    33 structure Markdown: MARKDOWN =
    34 struct
    35 
    36 (* document lines *)
    37 
    38 datatype kind = Itemize | Enumerate | Description;
    39 type marker = {indent: int, kind: kind};
    40 
    41 datatype line =
    42   Line of
    43    {content: Antiquote.text_antiquote list,
    44     is_empty: bool,
    45     marker: (marker * Position.T) option};
    46 
    47 val eof_line =
    48   Line {content = [Antiquote.Text [(Symbol.eof, Position.none)]],
    49     is_empty = false, marker = NONE};
    50 
    51 fun line_content (Line {content, ...}) = content;
    52 fun line_is_empty (Line {is_empty, ...}) = is_empty;
    53 fun line_marker (Line {marker, ...}) = marker;
    54 
    55 
    56 (* make line *)
    57 
    58 local
    59 
    60 fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space;
    61 val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []);
    62 
    63 fun check_blanks content =
    64   (case bad_blanks content of
    65     [] => ()
    66   | (c, pos) :: _ =>
    67       error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos));
    68 
    69 fun is_space ((s, _): Symbol_Pos.T) = s = Symbol.space;
    70 val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false);
    71 
    72 val scan_marker =
    73   Scan.many is_space --
    74   (Symbol_Pos.$$ "\<^item>" >> K Itemize ||
    75    Symbol_Pos.$$ "\<^enum>" >> K Enumerate ||
    76    Symbol_Pos.$$ "\<^descr>" >> K Description) >> (fn (a, b) => {indent = length a, kind = b});
    77 
    78 fun read_marker (Antiquote.Text ss :: _) =
    79       #1 (Scan.finite Symbol_Pos.stopper (Scan.option (scan_marker -- Symbol_Pos.scan_pos)) ss)
    80   | read_marker _ = NONE;
    81 
    82 in
    83 
    84 fun make_line content =
    85   let
    86     val _ = check_blanks content;
    87     val marker = read_marker content;
    88   in Line {content = content, is_empty = is_empty content, marker = marker} end;
    89 
    90 val empty_line = make_line [];
    91 
    92 end;
    93 
    94 
    95 (* document blocks *)
    96 
    97 datatype block = Paragraph of line list | List of marker * block list;
    98 
    99 fun add_span (opt_marker, body) document =
   100   (case (opt_marker, document) of
   101     (SOME marker, (list as List (list_marker, list_body)) :: rest) =>
   102       if marker = list_marker then
   103         List (list_marker, body @ list_body) :: rest
   104       else if #indent marker < #indent list_marker then
   105         List (marker, body @ [list]) :: rest
   106       else
   107         List (marker, body) :: document
   108   | (SOME marker, _) => List (marker, body) :: document
   109   | (NONE, _) => body @ document);
   110 
   111 
   112 (* read document *)
   113 
   114 local
   115 
   116 fun plain_line line =
   117   not (line_is_empty line) andalso is_none (line_marker line) andalso line <> eof_line;
   118 
   119 val parse_paragraph = Scan.many1 plain_line >> Paragraph;
   120 
   121 val parse_span =
   122   parse_paragraph >> (fn par => (NONE, [par])) ||
   123   Scan.one (is_some o line_marker) -- Scan.many plain_line --
   124     Scan.repeat (Scan.one line_is_empty |-- parse_paragraph) >>
   125       (fn ((line, lines), pars) =>
   126         (Option.map #1 (line_marker line), Paragraph (line :: lines) :: pars));
   127 
   128 val parse_document =
   129   parse_span ::: Scan.repeat (Scan.option (Scan.one line_is_empty) |-- parse_span)
   130     >> (fn spans => fold_rev add_span spans []);
   131 
   132 in
   133 
   134 val read_lines =
   135   Scan.read (Scan.stopper (K eof_line) (fn line => line = eof_line))
   136     (Scan.repeat (Scan.many line_is_empty |-- parse_document) --| Scan.many line_is_empty) #>
   137   the_default [] #> flat;
   138 
   139 end;
   140 
   141 val read = Antiquote.read #> Antiquote.split_lines #> map make_line #> read_lines;
   142 
   143 end;