src/Pure/Thy/markdown.ML
author wenzelm
Fri Oct 16 10:11:20 2015 +0200 (2015-10-16)
changeset 61457 3e21699bb83b
parent 61454 c86286ae9fe5
child 61459 5f2ddeb15c06
permissions -rw-r--r--
clarified Antiquote.antiq_reports;
Thy_Output.output_text: support for markdown (inactive);
eliminared Thy_Output.check_text -- uniform use of Thy_Output.output_text;
     1 (*  Title:      Pure/Thy/markdown.ML
     2     Author:     Makarius
     3 
     4 Minimal support for Markdown documents (see also http://commonmark.org)
     5 that consist only of paragraphs and (nested) lists:
     6 
     7   * list items start with marker \<^item> (itemize), \<^enum> (enumerate), \<^descr> (description)
     8   * adjacent list items with same indentation and same marker are grouped
     9     into a single list
    10   * singleton blank lines separate paragraphs
    11   * multiple blank lines escape from the current list hierarchy
    12 
    13 Notable differences to official Markdown:
    14 
    15   * indentation of list items needs to match exactly
    16   * indentation is unlimited (Markdown interprets 4 spaces as block quote)
    17   * list items always consist of paragraphs -- no notion of "tight" list
    18 *)
    19 
    20 signature MARKDOWN =
    21 sig
    22   val is_control: Symbol.symbol -> bool
    23   datatype kind = Itemize | Enumerate | Description
    24   val print_kind: kind -> string
    25   type marker = {indent: int, kind: kind}
    26   type line
    27   val line_source: line -> Antiquote.text_antiquote list
    28   val line_content: line -> Antiquote.text_antiquote list
    29   val make_line: Antiquote.text_antiquote list -> line
    30   val empty_line: line
    31   datatype block = Paragraph of line list | List of marker * block list
    32   val read_lines: line list -> block list
    33   val read_antiquotes: Antiquote.text_antiquote list -> block list
    34   val read_source: Input.source -> block list
    35   val text_reports: Antiquote.text_antiquote list -> Position.report list
    36   val reports: block list -> Position.report list
    37 end;
    38 
    39 structure Markdown: MARKDOWN =
    40 struct
    41 
    42 (* document lines *)
    43 
    44 val is_control = member (op =) ["\\<^item>", "\\<^enum>", "\\<^descr>"];
    45 
    46 datatype kind = Itemize | Enumerate | Description;
    47 
    48 fun print_kind Itemize = "itemize"
    49   | print_kind Enumerate = "enumerate"
    50   | print_kind Description = "description";
    51 
    52 type marker = {indent: int, kind: kind};
    53 
    54 datatype line =
    55   Line of
    56    {source: Antiquote.text_antiquote list,
    57     content: Antiquote.text_antiquote list,
    58     is_empty: bool,
    59     marker: (marker * Position.T) option};
    60 
    61 val eof_line =
    62   Line {source = [Antiquote.Text [(Symbol.eof, Position.none)]],
    63     content = [], is_empty = false, marker = NONE};
    64 
    65 fun line_source (Line {source, ...}) = source;
    66 fun line_content (Line {content, ...}) = content;
    67 fun line_is_empty (Line {is_empty, ...}) = is_empty;
    68 fun line_marker (Line {marker, ...}) = marker;
    69 
    70 
    71 (* make line *)
    72 
    73 local
    74 
    75 fun bad_blank ((s, _): Symbol_Pos.T) = Symbol.is_ascii_blank s andalso s <> Symbol.space;
    76 val bad_blanks = maps (fn Antiquote.Text ss => filter bad_blank ss | _ => []);
    77 
    78 fun check_blanks source =
    79   (case bad_blanks source of
    80     [] => ()
    81   | (c, pos) :: _ =>
    82       error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos));
    83 
    84 fun is_space ((s, _): Symbol_Pos.T) = s = Symbol.space;
    85 val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false);
    86 
    87 val scan_marker =
    88   Scan.many is_space -- Symbol_Pos.scan_pos --
    89   (Symbol_Pos.$$ "\\<^item>" >> K Itemize ||
    90    Symbol_Pos.$$ "\\<^enum>" >> K Enumerate ||
    91    Symbol_Pos.$$ "\\<^descr>" >> K Description)
    92   >> (fn ((spaces, pos), kind) => ({indent = length spaces, kind = kind}, pos));
    93 
    94 fun read_marker (Antiquote.Text ss :: rest) =
    95       (case Scan.finite Symbol_Pos.stopper (Scan.option scan_marker --| Scan.many is_space) ss of
    96         (marker, []) => (marker, rest)
    97       | (marker, ss') => (marker, Antiquote.Text ss' :: rest))
    98   | read_marker source = (NONE, source);
    99 
   100 in
   101 
   102 fun make_line source =
   103   let
   104     val _ = check_blanks source;
   105     val (marker, content) = read_marker source;
   106   in Line {source = source, content = content, is_empty = is_empty source, marker = marker} end;
   107 
   108 val empty_line = make_line [];
   109 
   110 end;
   111 
   112 
   113 (* document blocks *)
   114 
   115 datatype block = Paragraph of line list | List of marker * block list;
   116 
   117 fun block_lines (Paragraph lines) = lines
   118   | block_lines (List (_, blocks)) = maps block_lines blocks;
   119 
   120 fun block_range (Paragraph lines) = Antiquote.range (maps line_content lines)
   121   | block_range (List (_, blocks)) = Antiquote.range (maps line_source (maps block_lines blocks));
   122 
   123 
   124 (* read document *)
   125 
   126 local
   127 
   128 fun add_span (opt_marker, body) document =
   129   (case (opt_marker, document) of
   130     (SOME marker, (list as List (list_marker, list_body)) :: rest) =>
   131       if marker = list_marker then
   132         List (list_marker, body @ list_body) :: rest
   133       else if #indent marker < #indent list_marker then
   134         add_span (opt_marker, body @ [list]) rest
   135       else
   136         List (marker, body) :: document
   137   | (SOME marker, _) => List (marker, body) :: document
   138   | (NONE, _) => body @ document);
   139 
   140 fun plain_line line =
   141   not (line_is_empty line) andalso is_none (line_marker line) andalso line <> eof_line;
   142 
   143 val parse_paragraph = Scan.many1 plain_line >> Paragraph;
   144 
   145 val parse_span =
   146   parse_paragraph >> (fn par => (NONE, [par])) ||
   147   Scan.one (is_some o line_marker) -- Scan.many plain_line --
   148     Scan.repeat (Scan.one line_is_empty |-- parse_paragraph) >>
   149       (fn ((line, lines), pars) =>
   150         (Option.map #1 (line_marker line), Paragraph (line :: lines) :: pars));
   151 
   152 val parse_document =
   153   parse_span ::: Scan.repeat (Scan.option (Scan.one line_is_empty) |-- parse_span)
   154     >> (fn spans => fold_rev add_span spans []);
   155 
   156 in
   157 
   158 val read_lines =
   159   Scan.read (Scan.stopper (K eof_line) (fn line => line = eof_line))
   160     (Scan.repeat (Scan.many line_is_empty |-- parse_document) --| Scan.many line_is_empty) #>
   161   the_default [] #> flat;
   162 
   163 val read_antiquotes = Antiquote.split_lines #> map make_line #> read_lines;
   164 val read_source = Antiquote.read #> read_antiquotes;
   165 
   166 end;
   167 
   168 
   169 (* PIDE reports *)
   170 
   171 val text_reports =
   172   maps (fn Antiquote.Text ss => [(#1 (Symbol_Pos.range ss), Markup.words)] | _ => []);
   173 
   174 local
   175 
   176 fun line_reports depth (Line {marker = SOME (_, pos), content, ...}) =
   177       cons (pos, Markup.markdown_item depth) #>
   178       append (text_reports content)
   179   | line_reports _ _ = I;
   180 
   181 fun block_reports depth block =
   182   (case block of
   183     Paragraph lines =>
   184       cons (#1 (block_range block), Markup.markdown_paragraph) #>
   185       fold (line_reports depth) lines
   186   | List ({kind, ...}, body) =>
   187       cons (#1 (block_range block), Markup.markdown_list (print_kind kind)) #>
   188       fold (block_reports (depth + 1)) body);
   189 
   190 in
   191 
   192 fun reports blocks =
   193   filter (Position.is_reported o #1) (fold (block_reports 0) blocks []);
   194 
   195 end;
   196 
   197 end;