author | wenzelm |
Sun, 07 Jan 2018 15:12:00 +0100 | |
changeset 67361 | f834d6f21c55 |
parent 62800 | 7ac100f86863 |
child 67413 | 2555713586c8 |
permissions | -rw-r--r-- |
27763 | 1 |
(* Title: Pure/General/symbol_pos.ML |
2 |
Author: Makarius |
|
3 |
||
4 |
Symbols with explicit position information. |
|
5 |
*) |
|
6 |
||
36957 | 7 |
signature SYMBOL_POS = |
27763 | 8 |
sig |
9 |
type T = Symbol.symbol * Position.T |
|
10 |
val symbol: T -> Symbol.symbol |
|
55103 | 11 |
val $$ : Symbol.symbol -> T list -> T * T list |
55107 | 12 |
val ~$$ : Symbol.symbol -> T list -> T * T list |
27763 | 13 |
val $$$ : Symbol.symbol -> T list -> T list * T list |
14 |
val ~$$$ : Symbol.symbol -> T list -> T list * T list |
|
27797 | 15 |
val content: T list -> string |
55033 | 16 |
val range: T list -> Position.range |
61705
546e6494049f
trim lines for @{theory_text} similarly to @{text};
wenzelm
parents:
61502
diff
changeset
|
17 |
val split_lines: T list -> T list list |
61456 | 18 |
val trim_blanks: T list -> T list |
61705
546e6494049f
trim lines for @{theory_text} similarly to @{text};
wenzelm
parents:
61502
diff
changeset
|
19 |
val trim_lines: T list -> T list |
27763 | 20 |
val is_eof: T -> bool |
21 |
val stopper: T Scan.stopper |
|
43947
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents:
43773
diff
changeset
|
22 |
val !!! : Scan.message -> (T list -> 'a) -> T list -> 'a |
27778
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
wenzelm
parents:
27763
diff
changeset
|
23 |
val scan_pos: T list -> Position.T * T list |
48764 | 24 |
val scan_string_q: string -> T list -> (Position.T * (T list * Position.T)) * T list |
25 |
val scan_string_qq: string -> T list -> (Position.T * (T list * Position.T)) * T list |
|
26 |
val scan_string_bq: string -> T list -> (Position.T * (T list * Position.T)) * T list |
|
48743
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
43947
diff
changeset
|
27 |
val recover_string_q: T list -> T list * T list |
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
43947
diff
changeset
|
28 |
val recover_string_qq: T list -> T list * T list |
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
43947
diff
changeset
|
29 |
val recover_string_bq: T list -> T list * T list |
43773 | 30 |
val quote_string_q: string -> string |
31 |
val quote_string_qq: string -> string |
|
32 |
val quote_string_bq: string -> string |
|
62781 | 33 |
val cartouche_content: T list -> T list |
55105 | 34 |
val scan_cartouche: string -> T list -> T list * T list |
62781 | 35 |
val scan_cartouche_content: string -> T list -> T list * T list |
55033 | 36 |
val recover_cartouche: T list -> T list * T list |
67361 | 37 |
val scan_comment_cartouche: string -> T list -> T list * T list |
55105 | 38 |
val scan_comment: string -> T list -> T list * T list |
39 |
val scan_comment_body: string -> T list -> T list * T list |
|
48743
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
43947
diff
changeset
|
40 |
val recover_comment: T list -> T list * T list |
27763 | 41 |
val source: Position.T -> (Symbol.symbol, 'a) Source.source -> |
42 |
(T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source |
|
27778
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
wenzelm
parents:
27763
diff
changeset
|
43 |
type text = string |
27797 | 44 |
val implode: T list -> text |
59112 | 45 |
val implode_range: Position.range -> T list -> text * Position.range |
27778
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
wenzelm
parents:
27763
diff
changeset
|
46 |
val explode: text * Position.T -> T list |
62751 | 47 |
val explode0: string -> T list |
50239 | 48 |
val scan_ident: T list -> T list * T list |
49 |
val is_identifier: string -> bool |
|
62782 | 50 |
val scan_nat: T list -> T list * T list |
51 |
val scan_float: T list -> T list * T list |
|
27763 | 52 |
end; |
53 |
||
30573 | 54 |
structure Symbol_Pos: SYMBOL_POS = |
27763 | 55 |
struct |
56 |
||
57 |
(* type T *) |
|
58 |
||
59 |
type T = Symbol.symbol * Position.T; |
|
60 |
||
61 |
fun symbol ((s, _): T) = s; |
|
27852 | 62 |
|
27797 | 63 |
val content = implode o map symbol; |
27763 | 64 |
|
55033 | 65 |
fun range (syms as (_, pos) :: _) = |
66 |
let val pos' = List.last syms |-> Position.advance |
|
62797 | 67 |
in Position.range (pos, pos') end |
55033 | 68 |
| range [] = Position.no_range; |
69 |
||
61705
546e6494049f
trim lines for @{theory_text} similarly to @{text};
wenzelm
parents:
61502
diff
changeset
|
70 |
|
546e6494049f
trim lines for @{theory_text} similarly to @{text};
wenzelm
parents:
61502
diff
changeset
|
71 |
(* lines and blanks *) |
546e6494049f
trim lines for @{theory_text} similarly to @{text};
wenzelm
parents:
61502
diff
changeset
|
72 |
|
546e6494049f
trim lines for @{theory_text} similarly to @{text};
wenzelm
parents:
61502
diff
changeset
|
73 |
fun split_lines [] = [] |
546e6494049f
trim lines for @{theory_text} similarly to @{text};
wenzelm
parents:
61502
diff
changeset
|
74 |
| split_lines (list: T list) = |
546e6494049f
trim lines for @{theory_text} similarly to @{text};
wenzelm
parents:
61502
diff
changeset
|
75 |
let |
546e6494049f
trim lines for @{theory_text} similarly to @{text};
wenzelm
parents:
61502
diff
changeset
|
76 |
fun split syms = |
546e6494049f
trim lines for @{theory_text} similarly to @{text};
wenzelm
parents:
61502
diff
changeset
|
77 |
(case take_prefix (fn (s, _) => s <> "\n") syms of |
546e6494049f
trim lines for @{theory_text} similarly to @{text};
wenzelm
parents:
61502
diff
changeset
|
78 |
(line, []) => [line] |
62239 | 79 |
| (line, _ :: rest) => line :: split rest); |
61705
546e6494049f
trim lines for @{theory_text} similarly to @{text};
wenzelm
parents:
61502
diff
changeset
|
80 |
in split list end; |
546e6494049f
trim lines for @{theory_text} similarly to @{text};
wenzelm
parents:
61502
diff
changeset
|
81 |
|
61707 | 82 |
val trim_blanks = trim (Symbol.is_blank o symbol); |
61456 | 83 |
|
61705
546e6494049f
trim lines for @{theory_text} similarly to @{text};
wenzelm
parents:
61502
diff
changeset
|
84 |
val trim_lines = |
546e6494049f
trim lines for @{theory_text} similarly to @{text};
wenzelm
parents:
61502
diff
changeset
|
85 |
split_lines #> map trim_blanks #> separate [(Symbol.space, Position.none)] #> flat; |
546e6494049f
trim lines for @{theory_text} similarly to @{text};
wenzelm
parents:
61502
diff
changeset
|
86 |
|
27763 | 87 |
|
88 |
(* stopper *) |
|
89 |
||
90 |
fun mk_eof pos = (Symbol.eof, pos); |
|
91 |
val eof = mk_eof Position.none; |
|
92 |
||
93 |
val is_eof = Symbol.is_eof o symbol; |
|
94 |
||
95 |
val stopper = |
|
96 |
Scan.stopper (fn [] => eof | inp => mk_eof (List.last inp |-> Position.advance)) is_eof; |
|
97 |
||
98 |
||
99 |
(* basic scanners *) |
|
100 |
||
101 |
fun !!! text scan = |
|
102 |
let |
|
48911
5debc3e4fa81
tuned messages: end-of-input rarely means physical end-of-file from the past;
wenzelm
parents:
48770
diff
changeset
|
103 |
fun get_pos [] = " (end-of-input)" |
48992 | 104 |
| get_pos ((_, pos) :: _) = Position.here pos; |
27763 | 105 |
|
43947
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents:
43773
diff
changeset
|
106 |
fun err (syms, msg) = fn () => |
48770 | 107 |
text () ^ get_pos syms ^ |
50201
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
wenzelm
parents:
48992
diff
changeset
|
108 |
Markup.markup Markup.no_report (" at " ^ Symbol.beginning 10 (map symbol syms)) ^ |
43947
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents:
43773
diff
changeset
|
109 |
(case msg of NONE => "" | SOME m => "\n" ^ m ()); |
27763 | 110 |
in Scan.!! err scan end; |
111 |
||
55033 | 112 |
fun $$ s = Scan.one (fn x => symbol x = s); |
55107 | 113 |
fun ~$$ s = Scan.one (fn x => symbol x <> s); |
114 |
||
27763 | 115 |
fun $$$ s = Scan.one (fn x => symbol x = s) >> single; |
116 |
fun ~$$$ s = Scan.one (fn x => symbol x <> s) >> single; |
|
117 |
||
27778
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
wenzelm
parents:
27763
diff
changeset
|
118 |
val scan_pos = Scan.ahead (Scan.one (K true)) >> (fn (_, pos): T => pos); |
27763 | 119 |
|
120 |
||
43773 | 121 |
(* scan string literals *) |
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
122 |
|
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
123 |
local |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
124 |
|
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
125 |
val char_code = |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
126 |
Scan.one (Symbol.is_ascii_digit o symbol) -- |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
127 |
Scan.one (Symbol.is_ascii_digit o symbol) -- |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
128 |
Scan.one (Symbol.is_ascii_digit o symbol) :|-- |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
129 |
(fn (((a, pos), (b, _)), (c, _)) => |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
130 |
let val (n, _) = Library.read_int [a, b, c] |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
131 |
in if n <= 255 then Scan.succeed [(chr n, pos)] else Scan.fail end); |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
132 |
|
48764 | 133 |
fun scan_str q err_prefix = |
134 |
$$$ "\\" |-- !!! (fn () => err_prefix ^ "bad escape character in string") |
|
135 |
($$$ q || $$$ "\\" || char_code) || |
|
58854 | 136 |
Scan.one (fn (s, _) => s <> q andalso s <> "\\" andalso Symbol.not_eof s) >> single; |
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
137 |
|
48764 | 138 |
fun scan_strs q err_prefix = |
55103 | 139 |
Scan.ahead ($$ q) |-- |
140 |
!!! (fn () => err_prefix ^ "unclosed string literal") |
|
61476 | 141 |
((scan_pos --| $$$ q) -- (Scan.repeats (scan_str q err_prefix) -- ($$$ q |-- scan_pos))); |
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
142 |
|
48743
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
43947
diff
changeset
|
143 |
fun recover_strs q = |
61476 | 144 |
$$$ q @@@ Scan.repeats (Scan.permissive (scan_str q "")); |
48743
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
43947
diff
changeset
|
145 |
|
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
146 |
in |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
147 |
|
42503 | 148 |
val scan_string_q = scan_strs "'"; |
149 |
val scan_string_qq = scan_strs "\""; |
|
150 |
val scan_string_bq = scan_strs "`"; |
|
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
151 |
|
48743
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
43947
diff
changeset
|
152 |
val recover_string_q = recover_strs "'"; |
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
43947
diff
changeset
|
153 |
val recover_string_qq = recover_strs "\""; |
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
43947
diff
changeset
|
154 |
val recover_string_bq = recover_strs "`"; |
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
43947
diff
changeset
|
155 |
|
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
156 |
end; |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
157 |
|
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
158 |
|
43773 | 159 |
(* quote string literals *) |
160 |
||
161 |
local |
|
162 |
||
163 |
fun char_code i = |
|
164 |
(if i < 10 then "00" else if i < 100 then "0" else "") ^ string_of_int i; |
|
165 |
||
166 |
fun quote_str q s = |
|
167 |
if Symbol.is_ascii_control s then "\\" ^ char_code (ord s) |
|
168 |
else if s = q orelse s = "\\" then "\\" ^ s |
|
169 |
else s; |
|
170 |
||
171 |
fun quote_string q = enclose q q o implode o map (quote_str q) o Symbol.explode; |
|
172 |
||
173 |
in |
|
174 |
||
175 |
val quote_string_q = quote_string "'"; |
|
176 |
val quote_string_qq = quote_string "\""; |
|
177 |
val quote_string_bq = quote_string "`"; |
|
178 |
||
179 |
end; |
|
180 |
||
181 |
||
55033 | 182 |
(* nested text cartouches *) |
183 |
||
62781 | 184 |
fun cartouche_content syms = |
185 |
let |
|
186 |
fun err () = |
|
187 |
error ("Malformed text cartouche: " |
|
188 |
^ quote (content syms) ^ Position.here (#1 (range syms))); |
|
189 |
in |
|
190 |
(case syms of |
|
191 |
("\<open>", _) :: rest => |
|
192 |
(case rev rest of |
|
193 |
("\<close>", _) :: rrest => rev rrest |
|
194 |
| _ => err ()) |
|
195 |
| _ => err ()) |
|
196 |
end; |
|
197 |
||
55104
8284c0d5bf52
clarified scan_cartouche_depth, according to Scala version;
wenzelm
parents:
55103
diff
changeset
|
198 |
val scan_cartouche_depth = |
61502
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
wenzelm
parents:
61476
diff
changeset
|
199 |
Scan.repeat1 (Scan.depend (fn (depth: int option) => |
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
wenzelm
parents:
61476
diff
changeset
|
200 |
(case depth of |
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
wenzelm
parents:
61476
diff
changeset
|
201 |
SOME d => |
62210 | 202 |
$$ Symbol.open_ >> pair (SOME (d + 1)) || |
61502
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
wenzelm
parents:
61476
diff
changeset
|
203 |
(if d > 0 then |
62210 | 204 |
Scan.one (fn (s, _) => s <> Symbol.close andalso Symbol.not_eof s) >> pair depth || |
205 |
$$ Symbol.close >> pair (if d = 1 then NONE else SOME (d - 1)) |
|
61502
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
wenzelm
parents:
61476
diff
changeset
|
206 |
else Scan.fail) |
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
wenzelm
parents:
61476
diff
changeset
|
207 |
| NONE => Scan.fail))); |
55033 | 208 |
|
55105 | 209 |
fun scan_cartouche err_prefix = |
62210 | 210 |
Scan.ahead ($$ Symbol.open_) |-- |
55105 | 211 |
!!! (fn () => err_prefix ^ "unclosed text cartouche") |
61502
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
wenzelm
parents:
61476
diff
changeset
|
212 |
(Scan.provide is_none (SOME 0) scan_cartouche_depth); |
55033 | 213 |
|
62781 | 214 |
fun scan_cartouche_content err_prefix = |
215 |
scan_cartouche err_prefix >> cartouche_content; |
|
55033 | 216 |
|
62781 | 217 |
val recover_cartouche = Scan.pass (SOME 0) scan_cartouche_depth; |
55033 | 218 |
|
67361 | 219 |
fun scan_comment_cartouche err_prefix = |
220 |
$$$ Symbol.comment @@@ Scan.many (Symbol.is_blank o symbol) @@@ |
|
221 |
!!! (fn () => err_prefix ^ "cartouche expected after " ^ quote Symbol.comment) |
|
222 |
(scan_cartouche err_prefix); |
|
223 |
||
55033 | 224 |
|
27763 | 225 |
(* ML-style comments *) |
226 |
||
227 |
local |
|
228 |
||
229 |
val scan_cmt = |
|
230 |
Scan.depend (fn (d: int) => $$$ "(" @@@ $$$ "*" >> pair (d + 1)) || |
|
231 |
Scan.depend (fn 0 => Scan.fail | d => $$$ "*" @@@ $$$ ")" >> pair (d - 1)) || |
|
232 |
Scan.lift ($$$ "*" --| Scan.ahead (~$$$ ")")) || |
|
58854 | 233 |
Scan.lift (Scan.one (fn (s, _) => s <> "*" andalso Symbol.not_eof s)) >> single; |
27763 | 234 |
|
61476 | 235 |
val scan_cmts = Scan.pass 0 (Scan.repeats scan_cmt); |
48743
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
43947
diff
changeset
|
236 |
|
27763 | 237 |
in |
238 |
||
55105 | 239 |
fun scan_comment err_prefix = |
55106 | 240 |
Scan.ahead ($$ "(" -- $$ "*") |-- |
241 |
!!! (fn () => err_prefix ^ "unclosed comment") |
|
58850 | 242 |
($$$ "(" @@@ $$$ "*" @@@ scan_cmts @@@ $$$ "*" @@@ $$$ ")"); |
27763 | 243 |
|
55105 | 244 |
fun scan_comment_body err_prefix = |
55106 | 245 |
Scan.ahead ($$ "(" -- $$ "*") |-- |
246 |
!!! (fn () => err_prefix ^ "unclosed comment") |
|
58850 | 247 |
($$ "(" |-- $$ "*" |-- scan_cmts --| $$ "*" --| $$ ")"); |
27763 | 248 |
|
48743
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
43947
diff
changeset
|
249 |
val recover_comment = |
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
43947
diff
changeset
|
250 |
$$$ "(" @@@ $$$ "*" @@@ scan_cmts; |
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
43947
diff
changeset
|
251 |
|
27763 | 252 |
end; |
253 |
||
254 |
||
255 |
(* source *) |
|
256 |
||
257 |
fun source pos = |
|
258 |
Source.source' pos Symbol.stopper (Scan.bulk (Scan.depend (fn pos => |
|
58864 | 259 |
Scan.one Symbol.not_eof >> (fn s => (Position.advance s pos, (s, pos)))))); |
27763 | 260 |
|
261 |
||
262 |
(* compact representation -- with Symbol.DEL padding *) |
|
263 |
||
27778
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
wenzelm
parents:
27763
diff
changeset
|
264 |
type text = string; |
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
wenzelm
parents:
27763
diff
changeset
|
265 |
|
27763 | 266 |
fun pad [] = [] |
267 |
| pad [(s, _)] = [s] |
|
32784 | 268 |
| pad ((s1, pos1) :: (rest as (_, pos2) :: _)) = |
27763 | 269 |
let |
270 |
val end_pos1 = Position.advance s1 pos1; |
|
27797 | 271 |
val d = Int.max (0, Position.distance_of end_pos1 pos2); |
27763 | 272 |
in s1 :: replicate d Symbol.DEL @ pad rest end; |
273 |
||
27797 | 274 |
val implode = implode o pad; |
27763 | 275 |
|
59112 | 276 |
fun implode_range (pos1, pos2) syms = |
27797 | 277 |
let val syms' = (("", pos1) :: syms @ [("", pos2)]) |
278 |
in (implode syms', range syms') end; |
|
27778
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
wenzelm
parents:
27763
diff
changeset
|
279 |
|
27763 | 280 |
fun explode (str, pos) = |
41416 | 281 |
let |
282 |
val (res, _) = |
|
283 |
fold (fn s => fn (res, p) => ((s, p) :: res, Position.advance s p)) |
|
62800 | 284 |
(Symbol.explode str) ([], Position.no_range_position pos); |
41416 | 285 |
in fold (fn (s, p) => if s = Symbol.DEL then I else cons (s, p)) res [] end; |
27763 | 286 |
|
62751 | 287 |
fun explode0 str = explode (str, Position.none); |
288 |
||
50239 | 289 |
|
290 |
(* identifiers *) |
|
291 |
||
50242
56b9c792a98b
support for sub-structured identifier syntax (inactive);
wenzelm
parents:
50239
diff
changeset
|
292 |
local |
56b9c792a98b
support for sub-structured identifier syntax (inactive);
wenzelm
parents:
50239
diff
changeset
|
293 |
|
52616
3ac2878764f9
more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
wenzelm
parents:
50493
diff
changeset
|
294 |
val letter = Scan.one (symbol #> Symbol.is_letter); |
3ac2878764f9
more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
wenzelm
parents:
50493
diff
changeset
|
295 |
val letdigs1 = Scan.many1 (symbol #> Symbol.is_letdig); |
53016
fa9c38891cf2
disable old identifier syntax by default, legacy_isub_isup := true may be used temporarily as fall-back;
wenzelm
parents:
52920
diff
changeset
|
296 |
|
62529
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
wenzelm
parents:
62239
diff
changeset
|
297 |
val sub = Scan.one (symbol #> (fn s => s = "\<^sub>")); |
50242
56b9c792a98b
support for sub-structured identifier syntax (inactive);
wenzelm
parents:
50239
diff
changeset
|
298 |
|
56b9c792a98b
support for sub-structured identifier syntax (inactive);
wenzelm
parents:
50239
diff
changeset
|
299 |
in |
56b9c792a98b
support for sub-structured identifier syntax (inactive);
wenzelm
parents:
50239
diff
changeset
|
300 |
|
61476 | 301 |
val scan_ident = letter ::: Scan.repeats (letdigs1 || sub ::: letdigs1); |
50242
56b9c792a98b
support for sub-structured identifier syntax (inactive);
wenzelm
parents:
50239
diff
changeset
|
302 |
|
56b9c792a98b
support for sub-structured identifier syntax (inactive);
wenzelm
parents:
50239
diff
changeset
|
303 |
end; |
56b9c792a98b
support for sub-structured identifier syntax (inactive);
wenzelm
parents:
50239
diff
changeset
|
304 |
|
50239 | 305 |
fun is_identifier s = |
50295
3d6a4135a54f
eliminated redundant is_ident -- more official is_identifier;
wenzelm
parents:
50253
diff
changeset
|
306 |
Symbol.is_ascii_identifier s orelse |
62751 | 307 |
(case try (Scan.finite stopper scan_ident) (explode0 s) of |
50295
3d6a4135a54f
eliminated redundant is_ident -- more official is_identifier;
wenzelm
parents:
50253
diff
changeset
|
308 |
SOME (_, []) => true |
3d6a4135a54f
eliminated redundant is_ident -- more official is_identifier;
wenzelm
parents:
50253
diff
changeset
|
309 |
| _ => false); |
50239 | 310 |
|
62782 | 311 |
|
312 |
(* numerals *) |
|
313 |
||
314 |
val scan_nat = Scan.many1 (Symbol.is_digit o symbol); |
|
315 |
val scan_float = scan_nat @@@ $$$ "." @@@ scan_nat; |
|
316 |
||
27763 | 317 |
end; |
318 |
||
36957 | 319 |
structure Basic_Symbol_Pos = (*not open by default*) |
320 |
struct |
|
55103 | 321 |
val $$ = Symbol_Pos.$$; |
55107 | 322 |
val ~$$ = Symbol_Pos.~$$; |
36957 | 323 |
val $$$ = Symbol_Pos.$$$; |
324 |
val ~$$$ = Symbol_Pos.~$$$; |
|
325 |
end; |