src/Pure/Thy/markdown.ML
author wenzelm
Sat Oct 17 19:26:34 2015 +0200 (2015-10-17)
changeset 61459 5f2ddeb15c06
parent 61457 3e21699bb83b
child 61460 732028edfbc7
permissions -rw-r--r--
clarified nesting of paragraphs: indentation is taken into account more uniformly;
tuned;
     1 (*  Title:      Pure/Thy/markdown.ML
     2     Author:     Makarius
     3 
     4 Minimal support for Markdown documents (see also http://commonmark.org)
     5 that consist only of paragraphs and (nested) lists:
     6 
     7   * list items start with marker \<^item> (itemize), \<^enum> (enumerate), \<^descr> (description)
     8   * adjacent list items with same indentation and same marker are grouped
     9     into a single list
    10   * singleton blank lines separate paragraphs
    11   * multiple blank lines escape from the current list hierarchy
    12 
    13 Notable differences to official Markdown:
    14 
    15   * indentation of list items needs to match exactly
    16   * indentation is unlimited (Markdown interprets 4 spaces as block quote)
    17   * list items always consist of paragraphs -- no notion of "tight" list
    18 *)
    19 
    20 signature MARKDOWN =
    21 sig
    22   val is_control: Symbol.symbol -> bool
    23   datatype kind = Itemize | Enumerate | Description
    24   val print_kind: kind -> string
    25   type line
    26   val line_source: line -> Antiquote.text_antiquote list
    27   val line_content: line -> Antiquote.text_antiquote list
    28   val make_line: Antiquote.text_antiquote list -> line
    29   val empty_line: line
    30   datatype block = Paragraph of line list | List of {indent: int, kind: kind, body: block list}
    31   val read_lines: line list -> block list
    32   val read_antiquotes: Antiquote.text_antiquote list -> block list
    33   val read_source: Input.source -> block list
    34   val text_reports: Antiquote.text_antiquote list -> Position.report list
    35   val reports: block list -> Position.report list
    36 end;
    37 
    38 structure Markdown: MARKDOWN =
    39 struct
    40 
    41 (* document lines *)
    42 
    43 val is_control = member (op =) ["\\<^item>", "\\<^enum>", "\\<^descr>"];
    44 
    45 datatype kind = Itemize | Enumerate | Description;
    46 
    47 fun print_kind Itemize = "itemize"
    48   | print_kind Enumerate = "enumerate"
    49   | print_kind Description = "description";
    50 
    51 datatype line =
    52   Line of
    53    {source: Antiquote.text_antiquote list,
    54     is_empty: bool,
    55     indent: int,
    56     item: kind option,
    57     item_pos: Position.T,
    58     content: Antiquote.text_antiquote list};
    59 
    60 val eof_line =
    61   Line {source = [Antiquote.Text [(Symbol.eof, Position.none)]],
    62     is_empty = false, indent = 0, item = NONE, item_pos = Position.none, content = []};
    63 
    64 fun line_source (Line {source, ...}) = source;
    65 fun line_is_empty (Line {is_empty, ...}) = is_empty;
    66 fun line_indent (Line {indent, ...}) = indent;
    67 fun line_item (Line {item, ...}) = item;
    68 fun line_item_pos (Line {item_pos, ...}) = item_pos;
    69 fun line_content (Line {content, ...}) = content;
    70 
    71 
    72 (* make line *)
    73 
    74 local
    75 
    76 fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space;
    77 val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []);
    78 
    79 fun check_blanks source =
    80   (case bad_blanks source of
    81     [] => ()
    82   | (c, pos) :: _ =>
    83       error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos));
    84 
    85 fun is_space ((s, _): Symbol_Pos.T) = s = Symbol.space;
    86 val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false);
    87 
    88 val scan_marker =
    89   Scan.many is_space -- Symbol_Pos.scan_pos --
    90   Scan.option
    91    (Symbol_Pos.$$ "\\<^item>" >> K Itemize ||
    92     Symbol_Pos.$$ "\\<^enum>" >> K Enumerate ||
    93     Symbol_Pos.$$ "\\<^descr>" >> K Description) --| Scan.many is_space
    94   >> (fn ((sp, pos), item) => (length sp, item, if is_some item then pos else Position.none));
    95 
    96 fun read_marker (Antiquote.Text ss :: rest) =
    97       (case Scan.finite Symbol_Pos.stopper scan_marker ss of
    98         (marker, []) => (marker, rest)
    99       | (marker, ss') => (marker, Antiquote.Text ss' :: rest))
   100   | read_marker source = ((0, NONE, Position.none), source);
   101 
   102 in
   103 
   104 fun make_line source =
   105   let
   106     val _ = check_blanks source;
   107     val ((indent, item, item_pos), content) = read_marker source;
   108   in
   109     Line {source = source, is_empty = is_empty source, indent = indent,
   110       item = item, item_pos = item_pos, content = content}
   111   end;
   112 
   113 val empty_line = make_line [];
   114 
   115 end;
   116 
   117 
   118 (* document blocks *)
   119 
   120 datatype block =
   121   Paragraph of line list | List of {indent: int, kind: kind, body: block list};
   122 
   123 fun block_lines (Paragraph lines) = lines
   124   | block_lines (List {body, ...}) = maps block_lines body;
   125 
   126 fun block_range (Paragraph lines) = Antiquote.range (maps line_content lines)
   127   | block_range (List {body, ...}) = Antiquote.range (maps line_source (maps block_lines body));
   128 
   129 fun block_indent (List {indent, ...}) = indent
   130   | block_indent (Paragraph (line :: _)) = line_indent line
   131   | block_indent _ = 0;
   132 
   133 fun block_list indent0 kind0 (List {indent, kind, body}) =
   134       if indent0 = indent andalso kind0 = kind then SOME body else NONE
   135   | block_list _ _ _ = NONE;
   136 
   137 val is_list = fn List _ => true | _ => false;
   138 
   139 
   140 (* read document *)
   141 
   142 local
   143 
   144 fun build (indent, item, rev_body) document =
   145   (case (item, document) of
   146     (SOME kind, block :: blocks) =>
   147       (case block_list indent kind block of
   148         SOME list => List {indent = indent, kind = kind, body = fold cons rev_body list} :: blocks
   149       | NONE =>
   150           if (if is_list block then indent < block_indent block else indent <= block_indent block)
   151           then build (indent, item, block :: rev_body) blocks
   152           else List {indent = indent, kind = kind, body = rev rev_body} :: document)
   153   | (SOME kind, []) => [List {indent = indent, kind = kind, body = rev rev_body}]
   154   | (NONE, _) => fold cons rev_body document);
   155 
   156 fun plain_line line =
   157   not (line_is_empty line) andalso is_none (line_item line) andalso line <> eof_line;
   158 
   159 val parse_paragraph =
   160   Scan.one (fn line => line <> eof_line) -- Scan.many plain_line >> (fn (line, lines) =>
   161     let val block = Paragraph (line :: lines)
   162     in (line_indent line, line_item line, [block]) end);
   163 
   164 val parse_document =
   165   parse_paragraph ::: Scan.repeat (Scan.option (Scan.one line_is_empty) |-- parse_paragraph)
   166     >> (fn pars => fold_rev build pars []);
   167 
   168 in
   169 
   170 val read_lines =
   171   Scan.read (Scan.stopper (K eof_line) (fn line => line = eof_line))
   172     (Scan.repeat (Scan.many line_is_empty |-- parse_document) --| Scan.many line_is_empty) #>
   173   the_default [] #> flat;
   174 
   175 val read_antiquotes = Antiquote.split_lines #> map make_line #> read_lines;
   176 val read_source = Antiquote.read #> read_antiquotes;
   177 
   178 end;
   179 
   180 
   181 (* PIDE reports *)
   182 
   183 val text_reports =
   184   maps (fn Antiquote.Text ss => [(#1 (Symbol_Pos.range ss), Markup.words)] | _ => []);
   185 
   186 local
   187 
   188 fun line_reports depth line =
   189   cons (line_item_pos line, Markup.markdown_item depth) #>
   190   append (text_reports (line_content line));
   191 
   192 fun block_reports depth block =
   193   (case block of
   194     Paragraph lines =>
   195       cons (#1 (block_range block), Markup.markdown_paragraph) #>
   196       fold (line_reports depth) lines
   197   | List {kind, body, ...} =>
   198       cons (#1 (block_range block), Markup.markdown_list (print_kind kind)) #>
   199       fold (block_reports (depth + 1)) body);
   200 
   201 in
   202 
   203 fun reports blocks =
   204   filter (Position.is_reported o #1) (fold (block_reports 0) blocks []);
   205 
   206 end;
   207 
   208 end;