src/Pure/Thy/markdown.ML
author wenzelm
Thu Oct 15 17:29:37 2015 +0200 (2015-10-15)
changeset 61454 c86286ae9fe5
parent 61453 3a3e3527445e
child 61457 3e21699bb83b
permissions -rw-r--r--
load markdown.ML into Pure;
     1 (*  Title:      Pure/Thy/markdown.ML
     2     Author:     Makarius
     3 
     4 Minimal support for Markdown documents (see also http://commonmark.org)
     5 that consist only of paragraphs and (nested) lists:
     6 
     7   * list items start with marker \<^item> (itemize), \<^enum> (enumerate), \<^descr> (description)
     8   * adjacent list items with same indentation and same marker are grouped
     9     into a single list
    10   * singleton blank lines separate paragraphs
    11   * multiple blank lines escape from the current list hierarchy
    12 
    13 Notable differences to official Markdown:
    14 
    15   * indentation of list items needs to match exactly
    16   * indentation is unlimited (Markdown interprets 4 spaces as block quote)
    17   * list items always consist of paragraphs -- no notion of "tight" list
    18 *)
    19 
    20 signature MARKDOWN =
    21 sig
    22   datatype kind = Itemize | Enumerate | Description
    23   val print_kind: kind -> string
    24   type marker = {indent: int, kind: kind}
    25   type line
    26   val line_source: line -> Antiquote.text_antiquote list
    27   val line_content: line -> Antiquote.text_antiquote list
    28   val make_line: Antiquote.text_antiquote list -> line
    29   val empty_line: line
    30   datatype block = Paragraph of line list | List of marker * block list
    31   val read_lines: line list -> block list
    32   val read: Input.source -> block list
    33   val reports: block list -> Position.report list
    34 end;
    35 
    36 structure Markdown: MARKDOWN =
    37 struct
    38 
    39 (* document lines *)
    40 
    41 datatype kind = Itemize | Enumerate | Description;
    42 
    43 fun print_kind Itemize = "itemize"
    44   | print_kind Enumerate = "enumerate"
    45   | print_kind Description = "description";
    46 
    47 type marker = {indent: int, kind: kind};
    48 
    49 datatype line =
    50   Line of
    51    {source: Antiquote.text_antiquote list,
    52     content: Antiquote.text_antiquote list,
    53     is_empty: bool,
    54     marker: (marker * Position.T) option};
    55 
    56 val eof_line =
    57   Line {source = [Antiquote.Text [(Symbol.eof, Position.none)]],
    58     content = [], is_empty = false, marker = NONE};
    59 
    60 fun line_source (Line {source, ...}) = source;
    61 fun line_content (Line {content, ...}) = content;
    62 fun line_is_empty (Line {is_empty, ...}) = is_empty;
    63 fun line_marker (Line {marker, ...}) = marker;
    64 
    65 
    66 (* make line *)
    67 
    68 local
    69 
    70 fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space;
    71 val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []);
    72 
    73 fun check_blanks source =
    74   (case bad_blanks source of
    75     [] => ()
    76   | (c, pos) :: _ =>
    77       error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos));
    78 
    79 fun is_space ((s, _): Symbol_Pos.T) = s = Symbol.space;
    80 val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false);
    81 
    82 val scan_marker =
    83   Scan.many is_space -- Symbol_Pos.scan_pos --
    84   (Symbol_Pos.$$ "\\<^item>" >> K Itemize ||
    85    Symbol_Pos.$$ "\\<^enum>" >> K Enumerate ||
    86    Symbol_Pos.$$ "\\<^descr>" >> K Description)
    87   >> (fn ((spaces, pos), kind) => ({indent = length spaces, kind = kind}, pos));
    88 
    89 fun read_marker (Antiquote.Text ss :: rest) =
    90       (case Scan.finite Symbol_Pos.stopper (Scan.option scan_marker --| Scan.many is_space) ss of
    91         (marker, []) => (marker, rest)
    92       | (marker, ss') => (marker, Antiquote.Text ss' :: rest))
    93   | read_marker source = (NONE, source);
    94 
    95 in
    96 
    97 fun make_line source =
    98   let
    99     val _ = check_blanks source;
   100     val (marker, content) = read_marker source;
   101   in Line {source = source, content = content, is_empty = is_empty source, marker = marker} end;
   102 
   103 val empty_line = make_line [];
   104 
   105 end;
   106 
   107 
   108 (* document blocks *)
   109 
   110 datatype block = Paragraph of line list | List of marker * block list;
   111 
   112 fun block_lines (Paragraph lines) = lines
   113   | block_lines (List (_, blocks)) = maps block_lines blocks;
   114 
   115 fun block_range (Paragraph lines) = Antiquote.range (maps line_content lines)
   116   | block_range (List (_, blocks)) = Antiquote.range (maps line_source (maps block_lines blocks));
   117 
   118 
   119 (* read document *)
   120 
   121 local
   122 
   123 fun add_span (opt_marker, body) document =
   124   (case (opt_marker, document) of
   125     (SOME marker, (list as List (list_marker, list_body)) :: rest) =>
   126       if marker = list_marker then
   127         List (list_marker, body @ list_body) :: rest
   128       else if #indent marker < #indent list_marker then
   129         add_span (opt_marker, body @ [list]) rest
   130       else
   131         List (marker, body) :: document
   132   | (SOME marker, _) => List (marker, body) :: document
   133   | (NONE, _) => body @ document);
   134 
   135 fun plain_line line =
   136   not (line_is_empty line) andalso is_none (line_marker line) andalso line <> eof_line;
   137 
   138 val parse_paragraph = Scan.many1 plain_line >> Paragraph;
   139 
   140 val parse_span =
   141   parse_paragraph >> (fn par => (NONE, [par])) ||
   142   Scan.one (is_some o line_marker) -- Scan.many plain_line --
   143     Scan.repeat (Scan.one line_is_empty |-- parse_paragraph) >>
   144       (fn ((line, lines), pars) =>
   145         (Option.map #1 (line_marker line), Paragraph (line :: lines) :: pars));
   146 
   147 val parse_document =
   148   parse_span ::: Scan.repeat (Scan.option (Scan.one line_is_empty) |-- parse_span)
   149     >> (fn spans => fold_rev add_span spans []);
   150 
   151 in
   152 
   153 val read_lines =
   154   Scan.read (Scan.stopper (K eof_line) (fn line => line = eof_line))
   155     (Scan.repeat (Scan.many line_is_empty |-- parse_document) --| Scan.many line_is_empty) #>
   156   the_default [] #> flat;
   157 
   158 end;
   159 
   160 val read = Antiquote.read #> Antiquote.split_lines #> map make_line #> read_lines;
   161 
   162 
   163 (* PIDE reports *)
   164 
   165 local
   166 
   167 fun line_reports depth (Line {marker = SOME (_, pos), ...}) =
   168       cons (pos, Markup.markdown_item depth)
   169   | line_reports _ _ = I;
   170 
   171 fun block_reports depth block =
   172   (case block of
   173     Paragraph lines =>
   174       cons (#1 (block_range block), Markup.markdown_paragraph) #>
   175       fold (line_reports depth) lines
   176   | List ({kind, ...}, body) =>
   177       cons (#1 (block_range block), Markup.markdown_list (print_kind kind)) #>
   178       fold (block_reports (depth + 1)) body);
   179 
   180 in
   181 
   182 fun reports blocks =
   183   filter (Position.is_reported o #1) (fold (block_reports 0) blocks []);
   184 
   185 end;
   186 
   187 end;