src/Pure/Thy/markdown.ML
author wenzelm
Sat Oct 17 19:47:34 2015 +0200 (2015-10-17)
changeset 61461 77c9643a6353
parent 61460 732028edfbc7
child 61595 3591274c607e
permissions -rw-r--r--
more explicit output of list items;
     1 (*  Title:      Pure/Thy/markdown.ML
     2     Author:     Makarius
     3 
     4 Minimal support for Markdown documents (see also http://commonmark.org)
     5 that consist only of paragraphs and (nested) lists:
     6 
     7   * list items start with marker \<^item> (itemize), \<^enum> (enumerate), \<^descr> (description)
     8   * adjacent list items with same indentation and same marker are grouped
     9     into a single list
    10   * singleton blank lines separate paragraphs
    11   * multiple blank lines escape from the current list hierarchy
    12 
    13 Notable differences to official Markdown:
    14 
    15   * indentation of list items needs to match exactly
    16   * indentation is unlimited (Markdown interprets 4 spaces as block quote)
    17   * list items always consist of paragraphs -- no notion of "tight" list
    18 *)
    19 
    20 signature MARKDOWN =
    21 sig
    22   val is_control: Symbol.symbol -> bool
    23   datatype kind = Itemize | Enumerate | Description
    24   val print_kind: kind -> string
    25   type line
    26   val line_source: line -> Antiquote.text_antiquote list
    27   val line_is_item: line -> bool
    28   val line_content: line -> Antiquote.text_antiquote list
    29   val make_line: Antiquote.text_antiquote list -> line
    30   val empty_line: line
    31   datatype block = Paragraph of line list | List of {indent: int, kind: kind, body: block list}
    32   val read_lines: line list -> block list
    33   val read_antiquotes: Antiquote.text_antiquote list -> block list
    34   val read_source: Input.source -> block list
    35   val text_reports: Antiquote.text_antiquote list -> Position.report list
    36   val reports: block list -> Position.report list
    37 end;
    38 
    39 structure Markdown: MARKDOWN =
    40 struct
    41 
    42 (* document lines *)
    43 
    44 val is_control = member (op =) ["\\<^item>", "\\<^enum>", "\\<^descr>"];
    45 
    46 datatype kind = Itemize | Enumerate | Description;
    47 
    48 fun print_kind Itemize = "itemize"
    49   | print_kind Enumerate = "enumerate"
    50   | print_kind Description = "description";
    51 
    52 datatype line =
    53   Line of
    54    {source: Antiquote.text_antiquote list,
    55     is_empty: bool,
    56     indent: int,
    57     item: kind option,
    58     item_pos: Position.T,
    59     content: Antiquote.text_antiquote list};
    60 
    61 val eof_line =
    62   Line {source = [Antiquote.Text [(Symbol.eof, Position.none)]],
    63     is_empty = false, indent = 0, item = NONE, item_pos = Position.none, content = []};
    64 
    65 fun line_source (Line {source, ...}) = source;
    66 fun line_is_empty (Line {is_empty, ...}) = is_empty;
    67 fun line_is_item (Line {item, ...}) = is_some item;
    68 fun line_content (Line {content, ...}) = content;
    69 
    70 
    71 (* make line *)
    72 
    73 local
    74 
    75 fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space;
    76 val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []);
    77 
    78 fun check_blanks source =
    79   (case bad_blanks source of
    80     [] => ()
    81   | (c, pos) :: _ =>
    82       error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos));
    83 
    84 fun is_space ((s, _): Symbol_Pos.T) = s = Symbol.space;
    85 val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false);
    86 
    87 val scan_marker =
    88   Scan.many is_space -- Symbol_Pos.scan_pos --
    89   Scan.option
    90    (Symbol_Pos.$$ "\\<^item>" >> K Itemize ||
    91     Symbol_Pos.$$ "\\<^enum>" >> K Enumerate ||
    92     Symbol_Pos.$$ "\\<^descr>" >> K Description) --| Scan.many is_space
    93   >> (fn ((sp, pos), item) => (length sp, item, if is_some item then pos else Position.none));
    94 
    95 fun read_marker (Antiquote.Text ss :: rest) =
    96       (case Scan.finite Symbol_Pos.stopper scan_marker ss of
    97         (marker, []) => (marker, rest)
    98       | (marker, ss') => (marker, Antiquote.Text ss' :: rest))
    99   | read_marker source = ((0, NONE, Position.none), source);
   100 
   101 in
   102 
   103 fun make_line source =
   104   let
   105     val _ = check_blanks source;
   106     val ((indent, item, item_pos), content) = read_marker source;
   107   in
   108     Line {source = source, is_empty = is_empty source, indent = indent,
   109       item = item, item_pos = item_pos, content = content}
   110   end;
   111 
   112 val empty_line = make_line [];
   113 
   114 end;
   115 
   116 
   117 (* document blocks *)
   118 
   119 datatype block =
   120   Paragraph of line list | List of {indent: int, kind: kind, body: block list};
   121 
   122 fun block_lines (Paragraph lines) = lines
   123   | block_lines (List {body, ...}) = maps block_lines body;
   124 
   125 fun block_range (Paragraph lines) = Antiquote.range (maps line_content lines)
   126   | block_range (List {body, ...}) = Antiquote.range (maps line_source (maps block_lines body));
   127 
   128 fun block_indent (List {indent, ...}) = indent
   129   | block_indent (Paragraph (Line {indent, ...} :: _)) = indent
   130   | block_indent _ = 0;
   131 
   132 fun block_list indent0 kind0 (List {indent, kind, body}) =
   133       if indent0 = indent andalso kind0 = kind then SOME body else NONE
   134   | block_list _ _ _ = NONE;
   135 
   136 val is_list = fn List _ => true | _ => false;
   137 
   138 
   139 (* read document *)
   140 
   141 local
   142 
   143 fun build (indent, item, rev_body) document =
   144   (case (item, document) of
   145     (SOME kind, block :: blocks) =>
   146       (case block_list indent kind block of
   147         SOME list => List {indent = indent, kind = kind, body = fold cons rev_body list} :: blocks
   148       | NONE =>
   149           if (if is_list block then indent < block_indent block else indent <= block_indent block)
   150           then build (indent, item, block :: rev_body) blocks
   151           else List {indent = indent, kind = kind, body = rev rev_body} :: document)
   152   | (SOME kind, []) => [List {indent = indent, kind = kind, body = rev rev_body}]
   153   | (NONE, _) => fold cons rev_body document);
   154 
   155 fun plain_line (line as Line {is_empty, item, ...}) =
   156   not is_empty andalso is_none item andalso line <> eof_line;
   157 
   158 val parse_paragraph =
   159   Scan.one (fn line => line <> eof_line) -- Scan.many plain_line >> (fn (line, lines) =>
   160     let
   161       val Line {indent, item, ...} = line;
   162       val block = Paragraph (line :: lines);
   163     in (indent, item, [block]) end);
   164 
   165 val parse_document =
   166   parse_paragraph ::: Scan.repeat (Scan.option (Scan.one line_is_empty) |-- parse_paragraph)
   167     >> (fn pars => fold_rev build pars []);
   168 
   169 in
   170 
   171 val read_lines =
   172   Scan.read (Scan.stopper (K eof_line) (fn line => line = eof_line))
   173     (Scan.repeat (Scan.many line_is_empty |-- parse_document) --| Scan.many line_is_empty) #>
   174   the_default [] #> flat;
   175 
   176 val read_antiquotes = Antiquote.split_lines #> map make_line #> read_lines;
   177 val read_source = Antiquote.read #> read_antiquotes;
   178 
   179 end;
   180 
   181 
   182 (* PIDE reports *)
   183 
   184 val text_reports =
   185   maps (fn Antiquote.Text ss => [(#1 (Symbol_Pos.range ss), Markup.words)] | _ => []);
   186 
   187 local
   188 
   189 fun line_reports depth (Line {item_pos, content, ...}) =
   190   cons (item_pos, Markup.markdown_item depth) #> append (text_reports content);
   191 
   192 fun block_reports depth block =
   193   (case block of
   194     Paragraph lines =>
   195       cons (#1 (block_range block), Markup.markdown_paragraph) #>
   196       fold (line_reports depth) lines
   197   | List {kind, body, ...} =>
   198       cons (#1 (block_range block), Markup.markdown_list (print_kind kind)) #>
   199       fold (block_reports (depth + 1)) body);
   200 
   201 in
   202 
   203 fun reports blocks =
   204   filter (Position.is_reported o #1) (fold (block_reports 0) blocks []);
   205 
   206 end;
   207 
   208 end;