| author | nipkow | 
| Tue, 30 May 2017 11:12:00 +0200 | |
| changeset 65964 | 3de7464450b0 | 
| parent 62800 | 7ac100f86863 | 
| child 67361 | f834d6f21c55 | 
| permissions | -rw-r--r-- | 
| 27763 | 1 | (* Title: Pure/General/symbol_pos.ML | 
| 2 | Author: Makarius | |
| 3 | ||
| 4 | Symbols with explicit position information. | |
| 5 | *) | |
| 6 | ||
| 36957 | 7 | signature SYMBOL_POS = | 
| 27763 | 8 | sig | 
| 9 | type T = Symbol.symbol * Position.T | |
| 10 | val symbol: T -> Symbol.symbol | |
| 55103 | 11 | val $$ : Symbol.symbol -> T list -> T * T list | 
| 55107 | 12 | val ~$$ : Symbol.symbol -> T list -> T * T list | 
| 27763 | 13 | val $$$ : Symbol.symbol -> T list -> T list * T list | 
| 14 | val ~$$$ : Symbol.symbol -> T list -> T list * T list | |
| 27797 | 15 | val content: T list -> string | 
| 55033 | 16 | val range: T list -> Position.range | 
| 61705 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 wenzelm parents: 
61502diff
changeset | 17 | val split_lines: T list -> T list list | 
| 61456 | 18 | val trim_blanks: T list -> T list | 
| 61705 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 wenzelm parents: 
61502diff
changeset | 19 | val trim_lines: T list -> T list | 
| 27763 | 20 | val is_eof: T -> bool | 
| 21 | val stopper: T Scan.stopper | |
| 43947 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
43773diff
changeset | 22 | val !!! : Scan.message -> (T list -> 'a) -> T list -> 'a | 
| 27778 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 wenzelm parents: 
27763diff
changeset | 23 | val scan_pos: T list -> Position.T * T list | 
| 48764 | 24 | val scan_string_q: string -> T list -> (Position.T * (T list * Position.T)) * T list | 
| 25 | val scan_string_qq: string -> T list -> (Position.T * (T list * Position.T)) * T list | |
| 26 | val scan_string_bq: string -> T list -> (Position.T * (T list * Position.T)) * T list | |
| 48743 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
43947diff
changeset | 27 | val recover_string_q: T list -> T list * T list | 
| 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
43947diff
changeset | 28 | val recover_string_qq: T list -> T list * T list | 
| 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
43947diff
changeset | 29 | val recover_string_bq: T list -> T list * T list | 
| 43773 | 30 | val quote_string_q: string -> string | 
| 31 | val quote_string_qq: string -> string | |
| 32 | val quote_string_bq: string -> string | |
| 62781 | 33 | val cartouche_content: T list -> T list | 
| 55105 | 34 | val scan_cartouche: string -> T list -> T list * T list | 
| 62781 | 35 | val scan_cartouche_content: string -> T list -> T list * T list | 
| 55033 | 36 | val recover_cartouche: T list -> T list * T list | 
| 55105 | 37 | val scan_comment: string -> T list -> T list * T list | 
| 38 | val scan_comment_body: string -> T list -> T list * T list | |
| 48743 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
43947diff
changeset | 39 | val recover_comment: T list -> T list * T list | 
| 27763 | 40 | val source: Position.T -> (Symbol.symbol, 'a) Source.source -> | 
| 41 | (T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source | |
| 27778 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 wenzelm parents: 
27763diff
changeset | 42 | type text = string | 
| 27797 | 43 | val implode: T list -> text | 
| 59112 | 44 | val implode_range: Position.range -> T list -> text * Position.range | 
| 27778 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 wenzelm parents: 
27763diff
changeset | 45 | val explode: text * Position.T -> T list | 
| 62751 | 46 | val explode0: string -> T list | 
| 50239 | 47 | val scan_ident: T list -> T list * T list | 
| 48 | val is_identifier: string -> bool | |
| 62782 | 49 | val scan_nat: T list -> T list * T list | 
| 50 | val scan_float: T list -> T list * T list | |
| 27763 | 51 | end; | 
| 52 | ||
| 30573 | 53 | structure Symbol_Pos: SYMBOL_POS = | 
| 27763 | 54 | struct | 
| 55 | ||
| 56 | (* type T *) | |
| 57 | ||
| 58 | type T = Symbol.symbol * Position.T; | |
| 59 | ||
| 60 | fun symbol ((s, _): T) = s; | |
| 27852 | 61 | |
| 27797 | 62 | val content = implode o map symbol; | 
| 27763 | 63 | |
| 55033 | 64 | fun range (syms as (_, pos) :: _) = | 
| 65 | let val pos' = List.last syms |-> Position.advance | |
| 62797 | 66 | in Position.range (pos, pos') end | 
| 55033 | 67 | | range [] = Position.no_range; | 
| 68 | ||
| 61705 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 wenzelm parents: 
61502diff
changeset | 69 | |
| 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 wenzelm parents: 
61502diff
changeset | 70 | (* lines and blanks *) | 
| 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 wenzelm parents: 
61502diff
changeset | 71 | |
| 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 wenzelm parents: 
61502diff
changeset | 72 | fun split_lines [] = [] | 
| 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 wenzelm parents: 
61502diff
changeset | 73 | | split_lines (list: T list) = | 
| 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 wenzelm parents: 
61502diff
changeset | 74 | let | 
| 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 wenzelm parents: 
61502diff
changeset | 75 | fun split syms = | 
| 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 wenzelm parents: 
61502diff
changeset | 76 | (case take_prefix (fn (s, _) => s <> "\n") syms of | 
| 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 wenzelm parents: 
61502diff
changeset | 77 | (line, []) => [line] | 
| 62239 | 78 | | (line, _ :: rest) => line :: split rest); | 
| 61705 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 wenzelm parents: 
61502diff
changeset | 79 | in split list end; | 
| 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 wenzelm parents: 
61502diff
changeset | 80 | |
| 61707 | 81 | val trim_blanks = trim (Symbol.is_blank o symbol); | 
| 61456 | 82 | |
| 61705 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 wenzelm parents: 
61502diff
changeset | 83 | val trim_lines = | 
| 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 wenzelm parents: 
61502diff
changeset | 84 | split_lines #> map trim_blanks #> separate [(Symbol.space, Position.none)] #> flat; | 
| 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 wenzelm parents: 
61502diff
changeset | 85 | |
| 27763 | 86 | |
| 87 | (* stopper *) | |
| 88 | ||
| 89 | fun mk_eof pos = (Symbol.eof, pos); | |
| 90 | val eof = mk_eof Position.none; | |
| 91 | ||
| 92 | val is_eof = Symbol.is_eof o symbol; | |
| 93 | ||
| 94 | val stopper = | |
| 95 | Scan.stopper (fn [] => eof | inp => mk_eof (List.last inp |-> Position.advance)) is_eof; | |
| 96 | ||
| 97 | ||
| 98 | (* basic scanners *) | |
| 99 | ||
| 100 | fun !!! text scan = | |
| 101 | let | |
| 48911 
5debc3e4fa81
tuned messages: end-of-input rarely means physical end-of-file from the past;
 wenzelm parents: 
48770diff
changeset | 102 | fun get_pos [] = " (end-of-input)" | 
| 48992 | 103 | | get_pos ((_, pos) :: _) = Position.here pos; | 
| 27763 | 104 | |
| 43947 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
43773diff
changeset | 105 | fun err (syms, msg) = fn () => | 
| 48770 | 106 | text () ^ get_pos syms ^ | 
| 50201 
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
 wenzelm parents: 
48992diff
changeset | 107 |       Markup.markup Markup.no_report (" at " ^ Symbol.beginning 10 (map symbol syms)) ^
 | 
| 43947 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
43773diff
changeset | 108 | (case msg of NONE => "" | SOME m => "\n" ^ m ()); | 
| 27763 | 109 | in Scan.!! err scan end; | 
| 110 | ||
| 55033 | 111 | fun $$ s = Scan.one (fn x => symbol x = s); | 
| 55107 | 112 | fun ~$$ s = Scan.one (fn x => symbol x <> s); | 
| 113 | ||
| 27763 | 114 | fun $$$ s = Scan.one (fn x => symbol x = s) >> single; | 
| 115 | fun ~$$$ s = Scan.one (fn x => symbol x <> s) >> single; | |
| 116 | ||
| 27778 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 wenzelm parents: 
27763diff
changeset | 117 | val scan_pos = Scan.ahead (Scan.one (K true)) >> (fn (_, pos): T => pos); | 
| 27763 | 118 | |
| 119 | ||
| 43773 | 120 | (* scan string literals *) | 
| 30586 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 121 | |
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 122 | local | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 123 | |
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 124 | val char_code = | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 125 | Scan.one (Symbol.is_ascii_digit o symbol) -- | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 126 | Scan.one (Symbol.is_ascii_digit o symbol) -- | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 127 | Scan.one (Symbol.is_ascii_digit o symbol) :|-- | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 128 | (fn (((a, pos), (b, _)), (c, _)) => | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 129 | let val (n, _) = Library.read_int [a, b, c] | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 130 | in if n <= 255 then Scan.succeed [(chr n, pos)] else Scan.fail end); | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 131 | |
| 48764 | 132 | fun scan_str q err_prefix = | 
| 133 | $$$ "\\" |-- !!! (fn () => err_prefix ^ "bad escape character in string") | |
| 134 | ($$$ q || $$$ "\\" || char_code) || | |
| 58854 | 135 | Scan.one (fn (s, _) => s <> q andalso s <> "\\" andalso Symbol.not_eof s) >> single; | 
| 30586 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 136 | |
| 48764 | 137 | fun scan_strs q err_prefix = | 
| 55103 | 138 | Scan.ahead ($$ q) |-- | 
| 139 | !!! (fn () => err_prefix ^ "unclosed string literal") | |
| 61476 | 140 | ((scan_pos --| $$$ q) -- (Scan.repeats (scan_str q err_prefix) -- ($$$ q |-- scan_pos))); | 
| 30586 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 141 | |
| 48743 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
43947diff
changeset | 142 | fun recover_strs q = | 
| 61476 | 143 | $$$ q @@@ Scan.repeats (Scan.permissive (scan_str q "")); | 
| 48743 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
43947diff
changeset | 144 | |
| 30586 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 145 | in | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 146 | |
| 42503 | 147 | val scan_string_q = scan_strs "'"; | 
| 148 | val scan_string_qq = scan_strs "\""; | |
| 149 | val scan_string_bq = scan_strs "`"; | |
| 30586 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 150 | |
| 48743 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
43947diff
changeset | 151 | val recover_string_q = recover_strs "'"; | 
| 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
43947diff
changeset | 152 | val recover_string_qq = recover_strs "\""; | 
| 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
43947diff
changeset | 153 | val recover_string_bq = recover_strs "`"; | 
| 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
43947diff
changeset | 154 | |
| 30586 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 155 | end; | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 156 | |
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 157 | |
| 43773 | 158 | (* quote string literals *) | 
| 159 | ||
| 160 | local | |
| 161 | ||
| 162 | fun char_code i = | |
| 163 | (if i < 10 then "00" else if i < 100 then "0" else "") ^ string_of_int i; | |
| 164 | ||
| 165 | fun quote_str q s = | |
| 166 | if Symbol.is_ascii_control s then "\\" ^ char_code (ord s) | |
| 167 | else if s = q orelse s = "\\" then "\\" ^ s | |
| 168 | else s; | |
| 169 | ||
| 170 | fun quote_string q = enclose q q o implode o map (quote_str q) o Symbol.explode; | |
| 171 | ||
| 172 | in | |
| 173 | ||
| 174 | val quote_string_q = quote_string "'"; | |
| 175 | val quote_string_qq = quote_string "\""; | |
| 176 | val quote_string_bq = quote_string "`"; | |
| 177 | ||
| 178 | end; | |
| 179 | ||
| 180 | ||
| 55033 | 181 | (* nested text cartouches *) | 
| 182 | ||
| 62781 | 183 | fun cartouche_content syms = | 
| 184 | let | |
| 185 | fun err () = | |
| 186 |       error ("Malformed text cartouche: "
 | |
| 187 | ^ quote (content syms) ^ Position.here (#1 (range syms))); | |
| 188 | in | |
| 189 | (case syms of | |
| 190 |       ("\<open>", _) :: rest =>
 | |
| 191 | (case rev rest of | |
| 192 |           ("\<close>", _) :: rrest => rev rrest
 | |
| 193 | | _ => err ()) | |
| 194 | | _ => err ()) | |
| 195 | end; | |
| 196 | ||
| 55104 
8284c0d5bf52
clarified scan_cartouche_depth, according to Scala version;
 wenzelm parents: 
55103diff
changeset | 197 | val scan_cartouche_depth = | 
| 61502 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 wenzelm parents: 
61476diff
changeset | 198 | Scan.repeat1 (Scan.depend (fn (depth: int option) => | 
| 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 wenzelm parents: 
61476diff
changeset | 199 | (case depth of | 
| 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 wenzelm parents: 
61476diff
changeset | 200 | SOME d => | 
| 62210 | 201 | $$ Symbol.open_ >> pair (SOME (d + 1)) || | 
| 61502 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 wenzelm parents: 
61476diff
changeset | 202 | (if d > 0 then | 
| 62210 | 203 | Scan.one (fn (s, _) => s <> Symbol.close andalso Symbol.not_eof s) >> pair depth || | 
| 204 | $$ Symbol.close >> pair (if d = 1 then NONE else SOME (d - 1)) | |
| 61502 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 wenzelm parents: 
61476diff
changeset | 205 | else Scan.fail) | 
| 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 wenzelm parents: 
61476diff
changeset | 206 | | NONE => Scan.fail))); | 
| 55033 | 207 | |
| 55105 | 208 | fun scan_cartouche err_prefix = | 
| 62210 | 209 | Scan.ahead ($$ Symbol.open_) |-- | 
| 55105 | 210 | !!! (fn () => err_prefix ^ "unclosed text cartouche") | 
| 61502 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 wenzelm parents: 
61476diff
changeset | 211 | (Scan.provide is_none (SOME 0) scan_cartouche_depth); | 
| 55033 | 212 | |
| 62781 | 213 | fun scan_cartouche_content err_prefix = | 
| 214 | scan_cartouche err_prefix >> cartouche_content; | |
| 55033 | 215 | |
| 62781 | 216 | val recover_cartouche = Scan.pass (SOME 0) scan_cartouche_depth; | 
| 55033 | 217 | |
| 218 | ||
| 27763 | 219 | (* ML-style comments *) | 
| 220 | ||
| 221 | local | |
| 222 | ||
| 223 | val scan_cmt = | |
| 224 |   Scan.depend (fn (d: int) => $$$ "(" @@@ $$$ "*" >> pair (d + 1)) ||
 | |
| 225 | Scan.depend (fn 0 => Scan.fail | d => $$$ "*" @@@ $$$ ")" >> pair (d - 1)) || | |
| 226 | Scan.lift ($$$ "*" --| Scan.ahead (~$$$ ")")) || | |
| 58854 | 227 | Scan.lift (Scan.one (fn (s, _) => s <> "*" andalso Symbol.not_eof s)) >> single; | 
| 27763 | 228 | |
| 61476 | 229 | val scan_cmts = Scan.pass 0 (Scan.repeats scan_cmt); | 
| 48743 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
43947diff
changeset | 230 | |
| 27763 | 231 | in | 
| 232 | ||
| 55105 | 233 | fun scan_comment err_prefix = | 
| 55106 | 234 |   Scan.ahead ($$ "(" -- $$ "*") |--
 | 
| 235 | !!! (fn () => err_prefix ^ "unclosed comment") | |
| 58850 | 236 |       ($$$ "(" @@@ $$$ "*" @@@ scan_cmts @@@ $$$ "*" @@@ $$$ ")");
 | 
| 27763 | 237 | |
| 55105 | 238 | fun scan_comment_body err_prefix = | 
| 55106 | 239 |   Scan.ahead ($$ "(" -- $$ "*") |--
 | 
| 240 | !!! (fn () => err_prefix ^ "unclosed comment") | |
| 58850 | 241 |       ($$ "(" |-- $$ "*" |-- scan_cmts --| $$ "*" --| $$ ")");
 | 
| 27763 | 242 | |
| 48743 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
43947diff
changeset | 243 | val recover_comment = | 
| 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
43947diff
changeset | 244 |   $$$ "(" @@@ $$$ "*" @@@ scan_cmts;
 | 
| 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
43947diff
changeset | 245 | |
| 27763 | 246 | end; | 
| 247 | ||
| 248 | ||
| 249 | (* source *) | |
| 250 | ||
| 251 | fun source pos = | |
| 252 | Source.source' pos Symbol.stopper (Scan.bulk (Scan.depend (fn pos => | |
| 58864 | 253 | Scan.one Symbol.not_eof >> (fn s => (Position.advance s pos, (s, pos)))))); | 
| 27763 | 254 | |
| 255 | ||
| 256 | (* compact representation -- with Symbol.DEL padding *) | |
| 257 | ||
| 27778 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 wenzelm parents: 
27763diff
changeset | 258 | type text = string; | 
| 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 wenzelm parents: 
27763diff
changeset | 259 | |
| 27763 | 260 | fun pad [] = [] | 
| 261 | | pad [(s, _)] = [s] | |
| 32784 | 262 | | pad ((s1, pos1) :: (rest as (_, pos2) :: _)) = | 
| 27763 | 263 | let | 
| 264 | val end_pos1 = Position.advance s1 pos1; | |
| 27797 | 265 | val d = Int.max (0, Position.distance_of end_pos1 pos2); | 
| 27763 | 266 | in s1 :: replicate d Symbol.DEL @ pad rest end; | 
| 267 | ||
| 27797 | 268 | val implode = implode o pad; | 
| 27763 | 269 | |
| 59112 | 270 | fun implode_range (pos1, pos2) syms = | 
| 27797 | 271 |   let val syms' = (("", pos1) :: syms @ [("", pos2)])
 | 
| 272 | in (implode syms', range syms') end; | |
| 27778 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 wenzelm parents: 
27763diff
changeset | 273 | |
| 27763 | 274 | fun explode (str, pos) = | 
| 41416 | 275 | let | 
| 276 | val (res, _) = | |
| 277 | fold (fn s => fn (res, p) => ((s, p) :: res, Position.advance s p)) | |
| 62800 | 278 | (Symbol.explode str) ([], Position.no_range_position pos); | 
| 41416 | 279 | in fold (fn (s, p) => if s = Symbol.DEL then I else cons (s, p)) res [] end; | 
| 27763 | 280 | |
| 62751 | 281 | fun explode0 str = explode (str, Position.none); | 
| 282 | ||
| 50239 | 283 | |
| 284 | (* identifiers *) | |
| 285 | ||
| 50242 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 286 | local | 
| 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 287 | |
| 52616 
3ac2878764f9
more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
 wenzelm parents: 
50493diff
changeset | 288 | val letter = Scan.one (symbol #> Symbol.is_letter); | 
| 
3ac2878764f9
more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
 wenzelm parents: 
50493diff
changeset | 289 | val letdigs1 = Scan.many1 (symbol #> Symbol.is_letdig); | 
| 53016 
fa9c38891cf2
disable old identifier syntax by default, legacy_isub_isup := true may be used temporarily as fall-back;
 wenzelm parents: 
52920diff
changeset | 290 | |
| 62529 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62239diff
changeset | 291 | val sub = Scan.one (symbol #> (fn s => s = "\<^sub>")); | 
| 50242 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 292 | |
| 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 293 | in | 
| 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 294 | |
| 61476 | 295 | val scan_ident = letter ::: Scan.repeats (letdigs1 || sub ::: letdigs1); | 
| 50242 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 296 | |
| 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 297 | end; | 
| 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 298 | |
| 50239 | 299 | fun is_identifier s = | 
| 50295 
3d6a4135a54f
eliminated redundant is_ident -- more official is_identifier;
 wenzelm parents: 
50253diff
changeset | 300 | Symbol.is_ascii_identifier s orelse | 
| 62751 | 301 | (case try (Scan.finite stopper scan_ident) (explode0 s) of | 
| 50295 
3d6a4135a54f
eliminated redundant is_ident -- more official is_identifier;
 wenzelm parents: 
50253diff
changeset | 302 | SOME (_, []) => true | 
| 
3d6a4135a54f
eliminated redundant is_ident -- more official is_identifier;
 wenzelm parents: 
50253diff
changeset | 303 | | _ => false); | 
| 50239 | 304 | |
| 62782 | 305 | |
| 306 | (* numerals *) | |
| 307 | ||
| 308 | val scan_nat = Scan.many1 (Symbol.is_digit o symbol); | |
| 309 | val scan_float = scan_nat @@@ $$$ "." @@@ scan_nat; | |
| 310 | ||
| 27763 | 311 | end; | 
| 312 | ||
| 36957 | 313 | structure Basic_Symbol_Pos = (*not open by default*) | 
| 314 | struct | |
| 55103 | 315 | val $$ = Symbol_Pos.$$; | 
| 55107 | 316 | val ~$$ = Symbol_Pos.~$$; | 
| 36957 | 317 | val $$$ = Symbol_Pos.$$$; | 
| 318 | val ~$$$ = Symbol_Pos.~$$$; | |
| 319 | end; |