src/Pure/General/antiquote.ML
author paulson
Sun, 15 Jul 2018 10:41:57 +0100
changeset 68633 ae4373f3d8d3
parent 67735 e2e002d4a4de
child 69592 a80d8ec6c998
permissions -rw-r--r--
merged
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
30587
ad19c99529eb moved Isar/antiquote.ML to General/antiquote.ML, which is loaded early;
wenzelm
parents: 30573
diff changeset
     1
(*  Title:      Pure/General/antiquote.ML
55511
984e210d412e antiquotations within plain text: Scala version in accordance to ML;
wenzelm
parents: 55107
diff changeset
     2
    Author:     Makarius
9138
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
     3
55511
984e210d412e antiquotations within plain text: Scala version in accordance to ML;
wenzelm
parents: 55107
diff changeset
     4
Antiquotations within plain text.
9138
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
     5
*)
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
     6
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
     7
signature ANTIQUOTE =
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
     8
sig
61473
34d1913f0b20 clarified control antiquotations: decode control symbol to get name;
wenzelm
parents: 61471
diff changeset
     9
  type control = {range: Position.range, name: string * Position.T, body: Symbol_Pos.T list}
34d1913f0b20 clarified control antiquotations: decode control symbol to get name;
wenzelm
parents: 61471
diff changeset
    10
  type antiq = {start: Position.T, stop: Position.T, range: Position.range, body: Symbol_Pos.T list}
34d1913f0b20 clarified control antiquotations: decode control symbol to get name;
wenzelm
parents: 61471
diff changeset
    11
  datatype 'a antiquote = Text of 'a | Control of control | Antiq of antiq
61434
46d6586eb04c added split_lines;
wenzelm
parents: 59112
diff changeset
    12
  type text_antiquote = Symbol_Pos.T list antiquote
67467
482b62d694ca clarified signature;
wenzelm
parents: 67426
diff changeset
    13
  val text_antiquote_range: text_antiquote -> Position.range
482b62d694ca clarified signature;
wenzelm
parents: 67426
diff changeset
    14
  val text_range: text_antiquote list -> Position.range
61434
46d6586eb04c added split_lines;
wenzelm
parents: 59112
diff changeset
    15
  val split_lines: text_antiquote list -> text_antiquote list list
61457
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61456
diff changeset
    16
  val antiq_reports: 'a antiquote list -> Position.report list
67426
6311cf9dc943 clarified signature;
wenzelm
parents: 67193
diff changeset
    17
  val scan_control: control scanner
6311cf9dc943 clarified signature;
wenzelm
parents: 67193
diff changeset
    18
  val scan_antiq: antiq scanner
6311cf9dc943 clarified signature;
wenzelm
parents: 67193
diff changeset
    19
  val scan_antiquote: text_antiquote scanner
67571
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    20
  val scan_antiquote_comments: text_antiquote scanner
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    21
  val parse_comments: Position.T -> Symbol_Pos.T list -> text_antiquote list
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    22
  val read_comments: Input.source -> text_antiquote list
9138
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
    23
end;
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
    24
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
    25
structure Antiquote: ANTIQUOTE =
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
    26
struct
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
    27
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
    28
(* datatype antiquote *)
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
    29
61473
34d1913f0b20 clarified control antiquotations: decode control symbol to get name;
wenzelm
parents: 61471
diff changeset
    30
type control = {range: Position.range, name: string * Position.T, body: Symbol_Pos.T list};
34d1913f0b20 clarified control antiquotations: decode control symbol to get name;
wenzelm
parents: 61471
diff changeset
    31
type antiq = {start: Position.T, stop: Position.T, range: Position.range, body: Symbol_Pos.T list};
34d1913f0b20 clarified control antiquotations: decode control symbol to get name;
wenzelm
parents: 61471
diff changeset
    32
datatype 'a antiquote = Text of 'a | Control of control | Antiq of antiq;
9138
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
    33
61434
46d6586eb04c added split_lines;
wenzelm
parents: 59112
diff changeset
    34
type text_antiquote = Symbol_Pos.T list antiquote;
46d6586eb04c added split_lines;
wenzelm
parents: 59112
diff changeset
    35
67467
482b62d694ca clarified signature;
wenzelm
parents: 67426
diff changeset
    36
fun text_antiquote_range (Text ss) = Symbol_Pos.range ss
482b62d694ca clarified signature;
wenzelm
parents: 67426
diff changeset
    37
  | text_antiquote_range (Control {range, ...}) = range
482b62d694ca clarified signature;
wenzelm
parents: 67426
diff changeset
    38
  | text_antiquote_range (Antiq {range, ...}) = range;
61450
239a04ec2d4c more markup;
wenzelm
parents: 61440
diff changeset
    39
67467
482b62d694ca clarified signature;
wenzelm
parents: 67426
diff changeset
    40
fun text_range ants =
61450
239a04ec2d4c more markup;
wenzelm
parents: 61440
diff changeset
    41
  if null ants then Position.no_range
67467
482b62d694ca clarified signature;
wenzelm
parents: 67426
diff changeset
    42
  else
482b62d694ca clarified signature;
wenzelm
parents: 67426
diff changeset
    43
    Position.range (#1 (text_antiquote_range (hd ants)), #2 (text_antiquote_range (List.last ants)));
61450
239a04ec2d4c more markup;
wenzelm
parents: 61440
diff changeset
    44
61434
46d6586eb04c added split_lines;
wenzelm
parents: 59112
diff changeset
    45
46d6586eb04c added split_lines;
wenzelm
parents: 59112
diff changeset
    46
(* split lines *)
46d6586eb04c added split_lines;
wenzelm
parents: 59112
diff changeset
    47
46d6586eb04c added split_lines;
wenzelm
parents: 59112
diff changeset
    48
fun split_lines input =
46d6586eb04c added split_lines;
wenzelm
parents: 59112
diff changeset
    49
  let
46d6586eb04c added split_lines;
wenzelm
parents: 59112
diff changeset
    50
    fun add a (line, lines) = (a :: line, lines);
46d6586eb04c added split_lines;
wenzelm
parents: 59112
diff changeset
    51
    fun flush (line, lines) = ([], rev line :: lines);
46d6586eb04c added split_lines;
wenzelm
parents: 59112
diff changeset
    52
    fun split (a as Text ss) =
67522
9e712280cc37 clarified take/drop/chop prefix/suffix;
wenzelm
parents: 67467
diff changeset
    53
          (case chop_prefix (fn ("\n", _) => false | _ => true) ss of
61434
46d6586eb04c added split_lines;
wenzelm
parents: 59112
diff changeset
    54
            ([], []) => I
46d6586eb04c added split_lines;
wenzelm
parents: 59112
diff changeset
    55
          | (_, []) => add a
46d6586eb04c added split_lines;
wenzelm
parents: 59112
diff changeset
    56
          | ([], _ :: rest) => flush #> split (Text rest)
46d6586eb04c added split_lines;
wenzelm
parents: 59112
diff changeset
    57
          | (prefix, _ :: rest) => add (Text prefix) #> flush #> split (Text rest))
46d6586eb04c added split_lines;
wenzelm
parents: 59112
diff changeset
    58
      | split a = add a;
61440
8626c2fed037 clarified;
wenzelm
parents: 61434
diff changeset
    59
  in if null input then [] else rev (#2 (flush (fold split input ([], [])))) end;
61434
46d6586eb04c added split_lines;
wenzelm
parents: 59112
diff changeset
    60
27342
3945da15d410 added Open/Close -- checked blocks;
wenzelm
parents: 26002
diff changeset
    61
44736
c2a3f1c84179 bulk reports for improved message throughput;
wenzelm
parents: 43947
diff changeset
    62
(* reports *)
30641
72980f8d7ee8 export report -- version that actually covers all cases;
wenzelm
parents: 30635
diff changeset
    63
61457
3e21699bb83b clarified Antiquote.antiq_reports;
wenzelm
parents: 61456
diff changeset
    64
fun antiq_reports ants = ants |> maps
61471
9d4c08af61b8 support control symbol antiquotations;
wenzelm
parents: 61457
diff changeset
    65
  (fn Text _ => []
61473
34d1913f0b20 clarified control antiquotations: decode control symbol to get name;
wenzelm
parents: 61471
diff changeset
    66
    | Control {range = (pos, _), ...} => [(pos, Markup.antiquoted)]
34d1913f0b20 clarified control antiquotations: decode control symbol to get name;
wenzelm
parents: 61471
diff changeset
    67
    | Antiq {start, stop, range = (pos, _), ...} =>
61471
9d4c08af61b8 support control symbol antiquotations;
wenzelm
parents: 61457
diff changeset
    68
        [(start, Markup.antiquote),
9d4c08af61b8 support control symbol antiquotations;
wenzelm
parents: 61457
diff changeset
    69
         (stop, Markup.antiquote),
9d4c08af61b8 support control symbol antiquotations;
wenzelm
parents: 61457
diff changeset
    70
         (pos, Markup.antiquoted),
9d4c08af61b8 support control symbol antiquotations;
wenzelm
parents: 61457
diff changeset
    71
         (pos, Markup.language_antiquotation)]);
9138
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
    72
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
    73
30590
1d9c9fcf8513 parameterized datatype antiquote and read operation;
wenzelm
parents: 30589
diff changeset
    74
(* scan *)
9138
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
    75
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29606
diff changeset
    76
open Basic_Symbol_Pos;
9138
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
    77
22114
560c5b5dda1c tuned signature;
wenzelm
parents: 19305
diff changeset
    78
local
9138
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
    79
48764
4fe0920d5049 proper error prefixes;
wenzelm
parents: 45666
diff changeset
    80
val err_prefix = "Antiquotation lexical error: ";
4fe0920d5049 proper error prefixes;
wenzelm
parents: 45666
diff changeset
    81
67193
4ade0d387429 scan only one line, for more detailed positions;
wenzelm
parents: 62797
diff changeset
    82
val scan_nl = Scan.one (fn (s, _) => s = "\n") >> single;
67571
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    83
val scan_nl_opt = Scan.optional scan_nl [];
67193
4ade0d387429 scan only one line, for more detailed positions;
wenzelm
parents: 62797
diff changeset
    84
67571
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    85
val scan_plain_txt =
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    86
  Scan.many1 (fn (s, _) =>
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    87
    not (Comment.is_symbol s) andalso
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    88
    not (Symbol.is_control s) andalso
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    89
    s <> Symbol.open_ andalso
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    90
    s <> "@" andalso
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    91
    s <> "\n" andalso
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    92
    Symbol.not_eof s) ||
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    93
  Scan.one (Comment.is_symbol o Symbol_Pos.symbol) >> single ||
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    94
  $$$ "@" --| Scan.ahead (~$$ "{");
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    95
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    96
val scan_text =
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    97
  scan_nl || Scan.repeats1 scan_plain_txt @@@ scan_nl_opt;
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    98
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
    99
val scan_text_comments =
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
   100
  scan_nl || Scan.repeats1 (Comment.scan >> #2 || scan_plain_txt) @@@ scan_nl_opt;
9138
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
   101
55512
75c68e05f9ea support ML antiquotations in Scala;
wenzelm
parents: 55511
diff changeset
   102
val scan_antiq_body =
48764
4fe0920d5049 proper error prefixes;
wenzelm
parents: 45666
diff changeset
   103
  Scan.trace (Symbol_Pos.scan_string_qq err_prefix || Symbol_Pos.scan_string_bq err_prefix) >> #2 ||
61481
wenzelm
parents: 61476
diff changeset
   104
  Symbol_Pos.scan_cartouche err_prefix ||
67735
e2e002d4a4de clarified syntax: reject formal comments explicitly, instead of ignoring them silently;
wenzelm
parents: 67571
diff changeset
   105
  Comment.scan --
e2e002d4a4de clarified syntax: reject formal comments explicitly, instead of ignoring them silently;
wenzelm
parents: 67571
diff changeset
   106
    Symbol_Pos.!!! (fn () => err_prefix ^ "bad formal comment in antiquote body") Scan.fail
e2e002d4a4de clarified syntax: reject formal comments explicitly, instead of ignoring them silently;
wenzelm
parents: 67571
diff changeset
   107
    >> K [] ||
58854
b979c781c2db discontinued obsolete \<^sync> marker;
wenzelm
parents: 55653
diff changeset
   108
  Scan.one (fn (s, _) => s <> "}" andalso Symbol.not_eof s) >> single;
9138
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
   109
61491
97261e6c1d42 another antiquotation short form: undecorated cartouche as alias for @{text};
wenzelm
parents: 61481
diff changeset
   110
fun control_name sym = (case Symbol.decode sym of Symbol.Control name => name);
97261e6c1d42 another antiquotation short form: undecorated cartouche as alias for @{text};
wenzelm
parents: 61481
diff changeset
   111
42508
e21362bf1d93 allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents: 42503
diff changeset
   112
in
e21362bf1d93 allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents: 42503
diff changeset
   113
61471
9d4c08af61b8 support control symbol antiquotations;
wenzelm
parents: 61457
diff changeset
   114
val scan_control =
61491
97261e6c1d42 another antiquotation short form: undecorated cartouche as alias for @{text};
wenzelm
parents: 61481
diff changeset
   115
  Scan.option (Scan.one (Symbol.is_control o Symbol_Pos.symbol)) --
61481
wenzelm
parents: 61476
diff changeset
   116
  Symbol_Pos.scan_cartouche err_prefix >>
61491
97261e6c1d42 another antiquotation short form: undecorated cartouche as alias for @{text};
wenzelm
parents: 61481
diff changeset
   117
    (fn (opt_control, body) =>
61473
34d1913f0b20 clarified control antiquotations: decode control symbol to get name;
wenzelm
parents: 61471
diff changeset
   118
      let
61491
97261e6c1d42 another antiquotation short form: undecorated cartouche as alias for @{text};
wenzelm
parents: 61481
diff changeset
   119
        val (name, range) =
97261e6c1d42 another antiquotation short form: undecorated cartouche as alias for @{text};
wenzelm
parents: 61481
diff changeset
   120
          (case opt_control of
97261e6c1d42 another antiquotation short form: undecorated cartouche as alias for @{text};
wenzelm
parents: 61481
diff changeset
   121
            SOME (sym, pos) => ((control_name sym, pos), Symbol_Pos.range ((sym, pos) :: body))
97261e6c1d42 another antiquotation short form: undecorated cartouche as alias for @{text};
wenzelm
parents: 61481
diff changeset
   122
          | NONE => (("cartouche", #2 (hd body)), Symbol_Pos.range body));
61595
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61491
diff changeset
   123
      in {name = name, range = range, body = body} end) ||
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61491
diff changeset
   124
  Scan.one (Symbol.is_control o Symbol_Pos.symbol) >>
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61491
diff changeset
   125
    (fn (sym, pos) =>
3591274c607e more formal treatment of control symbols;
wenzelm
parents: 61491
diff changeset
   126
      {name = (control_name sym, pos), range = Symbol_Pos.range [(sym, pos)], body = []});
61471
9d4c08af61b8 support control symbol antiquotations;
wenzelm
parents: 61457
diff changeset
   127
9138
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
   128
val scan_antiq =
55526
39708e59f4b0 more markup;
wenzelm
parents: 55512
diff changeset
   129
  Symbol_Pos.scan_pos -- ($$ "@" |-- $$ "{" |-- Symbol_Pos.scan_pos --
48764
4fe0920d5049 proper error prefixes;
wenzelm
parents: 45666
diff changeset
   130
    Symbol_Pos.!!! (fn () => err_prefix ^ "missing closing brace")
61476
1884c40f1539 tuned signature;
wenzelm
parents: 61475
diff changeset
   131
      (Scan.repeats scan_antiq_body -- Symbol_Pos.scan_pos -- ($$ "}" |-- Symbol_Pos.scan_pos))) >>
61473
34d1913f0b20 clarified control antiquotations: decode control symbol to get name;
wenzelm
parents: 61471
diff changeset
   132
    (fn (pos1, (pos2, ((body, pos3), pos4))) =>
62797
e08c44eed27f tuned signature;
wenzelm
parents: 62749
diff changeset
   133
      {start = Position.range_position (pos1, pos2),
e08c44eed27f tuned signature;
wenzelm
parents: 62749
diff changeset
   134
       stop = Position.range_position (pos3, pos4),
e08c44eed27f tuned signature;
wenzelm
parents: 62749
diff changeset
   135
       range = Position.range (pos1, pos4),
61476
1884c40f1539 tuned signature;
wenzelm
parents: 61475
diff changeset
   136
       body = body});
30590
1d9c9fcf8513 parameterized datatype antiquote and read operation;
wenzelm
parents: 30589
diff changeset
   137
61471
9d4c08af61b8 support control symbol antiquotations;
wenzelm
parents: 61457
diff changeset
   138
val scan_antiquote =
67571
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
   139
  scan_text >> Text || scan_control >> Control || scan_antiq >> Antiq;
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
   140
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
   141
val scan_antiquote_comments =
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
   142
  scan_text_comments >> Text || scan_control >> Control || scan_antiq >> Antiq;
9138
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
   143
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
   144
end;
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
   145
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
   146
67571
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
   147
(* parse and read (with formal comments) *)
9138
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
   148
67571
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
   149
fun parse_comments pos syms =
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
   150
  (case Scan.read Symbol_Pos.stopper (Scan.repeat scan_antiquote_comments) syms of
62749
eba34ff9671c clarified reports;
wenzelm
parents: 62213
diff changeset
   151
    SOME ants => ants
61456
b521b8b400f7 trim_blanks after read, before eval;
wenzelm
parents: 61450
diff changeset
   152
  | NONE => error ("Malformed quotation/antiquotation source" ^ Position.here pos));
b521b8b400f7 trim_blanks after read, before eval;
wenzelm
parents: 61450
diff changeset
   153
67571
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
   154
fun read_comments source =
62749
eba34ff9671c clarified reports;
wenzelm
parents: 62213
diff changeset
   155
  let
67571
f858fe5531ac more uniform treatment of formal comments within document source;
wenzelm
parents: 67522
diff changeset
   156
    val ants = parse_comments (Input.pos_of source) (Input.source_explode source);
62749
eba34ff9671c clarified reports;
wenzelm
parents: 62213
diff changeset
   157
    val _ = Position.reports (antiq_reports ants);
eba34ff9671c clarified reports;
wenzelm
parents: 62213
diff changeset
   158
  in ants end;
9138
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
   159
6a4fae41a75f Text with antiquotations of inner items (terms, types etc.).
wenzelm
parents:
diff changeset
   160
end;