src/Pure/Thy/markdown.ML
author wenzelm
Sun Oct 23 12:35:48 2016 +0200 (2016-10-23)
changeset 64357 e10fa8afc96c
parent 62804 7b9c5416f30e
child 67322 734a4e44b159
permissions -rw-r--r--
tuned signature: avoid conflict with "paragraph" as section heading;
     1 (*  Title:      Pure/Thy/markdown.ML
     2     Author:     Makarius
     3 
     4 Minimal support for Markdown documents (see also http://commonmark.org)
     5 that consist only of paragraphs and (nested) lists:
     6 
     7   * list items start with marker \<^item> (itemize), \<^enum> (enumerate), \<^descr> (description)
     8   * adjacent list items with same indentation and same marker are grouped
     9     into a single list
    10   * singleton blank lines separate paragraphs
    11   * multiple blank lines escape from the current list hierarchy
    12 
    13 Notable differences to official Markdown:
    14 
    15   * indentation of list items needs to match exactly
    16   * indentation is unlimited (Markdown interprets 4 spaces as block quote)
    17   * list items always consist of paragraphs -- no notion of "tight" list
    18 *)
    19 
    20 signature MARKDOWN =
    21 sig
    22   datatype kind = Itemize | Enumerate | Description
    23   val print_kind: kind -> string
    24   val is_control: Symbol.symbol -> bool
    25   type line
    26   val line_source: line -> Antiquote.text_antiquote list
    27   val line_is_item: line -> bool
    28   val line_content: line -> Antiquote.text_antiquote list
    29   val make_line: Antiquote.text_antiquote list -> line
    30   val empty_line: line
    31   datatype block = Par of line list | List of {indent: int, kind: kind, body: block list}
    32   val read_lines: line list -> block list
    33   val read_antiquotes: Antiquote.text_antiquote list -> block list
    34   val read_source: Input.source -> block list
    35   val text_reports: Antiquote.text_antiquote list -> Position.report list
    36   val reports: block list -> Position.report list
    37 end;
    38 
    39 structure Markdown: MARKDOWN =
    40 struct
    41 
    42 (* item kinds *)
    43 
    44 datatype kind = Itemize | Enumerate | Description;
    45 
    46 fun print_kind Itemize = "itemize"
    47   | print_kind Enumerate = "enumerate"
    48   | print_kind Description = "description";
    49 
    50 val kinds = [("item", Itemize), ("enum", Enumerate), ("descr", Description)];
    51 
    52 val is_control = member (op =) ["\<^item>", "\<^enum>", "\<^descr>"];
    53 
    54 
    55 (* document lines *)
    56 
    57 datatype line =
    58   Line of
    59    {source: Antiquote.text_antiquote list,
    60     is_empty: bool,
    61     indent: int,
    62     item: kind option,
    63     item_pos: Position.T,
    64     content: Antiquote.text_antiquote list};
    65 
    66 val eof_line =
    67   Line {source = [Antiquote.Text [(Symbol.eof, Position.none)]],
    68     is_empty = false, indent = 0, item = NONE, item_pos = Position.none, content = []};
    69 
    70 fun line_source (Line {source, ...}) = source;
    71 fun line_is_empty (Line {is_empty, ...}) = is_empty;
    72 fun line_is_item (Line {item, ...}) = is_some item;
    73 fun line_content (Line {content, ...}) = content;
    74 
    75 
    76 (* make line *)
    77 
    78 local
    79 
    80 fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space;
    81 val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []);
    82 
    83 fun check_blanks source =
    84   (case bad_blanks source of
    85     [] => ()
    86   | (c, pos) :: _ =>
    87       error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos));
    88 
    89 val is_space = Symbol.is_space o Symbol_Pos.symbol;
    90 val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false);
    91 
    92 fun strip_spaces (Antiquote.Text ss :: rest) =
    93       let val (sp, ss') = take_prefix is_space ss
    94       in (length sp, if null ss' then rest else Antiquote.Text ss' :: rest) end
    95   | strip_spaces source = (0, source);
    96 
    97 fun read_marker source =
    98   let val (indent, source') = strip_spaces source in
    99     (case source' of
   100       (control as Antiquote.Control {name = (name, pos), body = [], ...}) :: rest =>
   101         let
   102           val item = AList.lookup (op =) kinds name;
   103           val item_pos = if is_some item then pos else Position.none;
   104           val (_, rest') = strip_spaces (if is_some item then rest else control :: rest);
   105         in ((indent, item, item_pos), rest') end
   106     | _ => ((indent, NONE, Position.none), source'))
   107   end;
   108 
   109 in
   110 
   111 fun make_line source =
   112   let
   113     val _ = check_blanks source;
   114     val ((indent, item, item_pos), content) = read_marker source;
   115   in
   116     Line {source = source, is_empty = is_empty source, indent = indent,
   117       item = item, item_pos = item_pos, content = content}
   118   end;
   119 
   120 val empty_line = make_line [];
   121 
   122 end;
   123 
   124 
   125 (* document blocks *)
   126 
   127 datatype block = Par of line list | List of {indent: int, kind: kind, body: block list};
   128 
   129 fun block_lines (Par lines) = lines
   130   | block_lines (List {body, ...}) = maps block_lines body;
   131 
   132 fun block_range (Par lines) = Antiquote.range (maps line_content lines)
   133   | block_range (List {body, ...}) = Antiquote.range (maps line_source (maps block_lines body));
   134 
   135 fun block_indent (List {indent, ...}) = indent
   136   | block_indent (Par (Line {indent, ...} :: _)) = indent
   137   | block_indent _ = 0;
   138 
   139 fun block_list indent0 kind0 (List {indent, kind, body}) =
   140       if indent0 = indent andalso kind0 = kind then SOME body else NONE
   141   | block_list _ _ _ = NONE;
   142 
   143 val is_list = fn List _ => true | _ => false;
   144 
   145 
   146 (* read document *)
   147 
   148 local
   149 
   150 fun build (indent, item, rev_body) document =
   151   (case (item, document) of
   152     (SOME kind, block :: blocks) =>
   153       (case block_list indent kind block of
   154         SOME list => List {indent = indent, kind = kind, body = fold cons rev_body list} :: blocks
   155       | NONE =>
   156           if (if is_list block then indent < block_indent block else indent <= block_indent block)
   157           then build (indent, item, block :: rev_body) blocks
   158           else List {indent = indent, kind = kind, body = rev rev_body} :: document)
   159   | (SOME kind, []) => [List {indent = indent, kind = kind, body = rev rev_body}]
   160   | (NONE, _) => fold cons rev_body document);
   161 
   162 fun plain_line (line as Line {is_empty, item, ...}) =
   163   not is_empty andalso is_none item andalso line <> eof_line;
   164 
   165 val parse_paragraph =
   166   Scan.one (fn line => line <> eof_line) -- Scan.many plain_line >> (fn (line, lines) =>
   167     let
   168       val Line {indent, item, ...} = line;
   169       val block = Par (line :: lines);
   170     in (indent, item, [block]) end);
   171 
   172 val parse_document =
   173   parse_paragraph ::: Scan.repeat (Scan.option (Scan.one line_is_empty) |-- parse_paragraph)
   174     >> (fn pars => fold_rev build pars []);
   175 
   176 in
   177 
   178 val read_lines =
   179   Scan.read (Scan.stopper (K eof_line) (fn line => line = eof_line))
   180     (Scan.repeat (Scan.many line_is_empty |-- parse_document) --| Scan.many line_is_empty) #>
   181   the_default [] #> flat;
   182 
   183 val read_antiquotes = Antiquote.split_lines #> map make_line #> read_lines;
   184 val read_source = Antiquote.read #> read_antiquotes;
   185 
   186 end;
   187 
   188 
   189 (* PIDE reports *)
   190 
   191 val text_reports =
   192   maps (fn Antiquote.Text ss => [(#1 (Symbol_Pos.range ss), Markup.words)] | _ => []);
   193 
   194 local
   195 
   196 fun line_reports depth (Line {item_pos, content, ...}) =
   197   cons (item_pos, Markup.markdown_item depth) #> append (text_reports content);
   198 
   199 fun block_reports depth block =
   200   (case block of
   201     Par lines =>
   202       cons (#1 (block_range block), Markup.markdown_paragraph) #>
   203       fold (line_reports depth) lines
   204   | List {kind, body, ...} =>
   205       cons (#1 (block_range block), Markup.markdown_list (print_kind kind)) #>
   206       fold (block_reports (depth + 1)) body);
   207 
   208 in
   209 
   210 fun reports blocks =
   211   filter (Position.is_reported o #1) (fold (block_reports 0) blocks []);
   212 
   213 end;
   214 
   215 end;