| author | wenzelm | 
| Fri, 25 Nov 2022 21:58:40 +0100 | |
| changeset 76537 | cdbe20024038 | 
| parent 74175 | 53e28c438f96 | 
| permissions | -rw-r--r-- | 
| 27763 | 1  | 
(* Title: Pure/General/symbol_pos.ML  | 
2  | 
Author: Makarius  | 
|
3  | 
||
4  | 
Symbols with explicit position information.  | 
|
5  | 
*)  | 
|
6  | 
||
| 36957 | 7  | 
signature SYMBOL_POS =  | 
| 27763 | 8  | 
sig  | 
9  | 
type T = Symbol.symbol * Position.T  | 
|
| 67426 | 10  | 
type 'a scanner = T list -> 'a * T list  | 
| 27763 | 11  | 
val symbol: T -> Symbol.symbol  | 
| 27797 | 12  | 
val content: T list -> string  | 
| 55033 | 13  | 
val range: T list -> Position.range  | 
| 27763 | 14  | 
val is_eof: T -> bool  | 
15  | 
val stopper: T Scan.stopper  | 
|
| 67426 | 16  | 
val !!! : Scan.message -> 'a scanner -> 'a scanner  | 
17  | 
val $$ : Symbol.symbol -> T scanner  | 
|
18  | 
val ~$$ : Symbol.symbol -> T scanner  | 
|
19  | 
val $$$ : Symbol.symbol -> T list scanner  | 
|
20  | 
val ~$$$ : Symbol.symbol -> T list scanner  | 
|
21  | 
val scan_pos: Position.T scanner  | 
|
22  | 
val scan_string_q: string -> (Position.T * (T list * Position.T)) scanner  | 
|
23  | 
val scan_string_qq: string -> (Position.T * (T list * Position.T)) scanner  | 
|
24  | 
val scan_string_bq: string -> (Position.T * (T list * Position.T)) scanner  | 
|
25  | 
val recover_string_q: T list scanner  | 
|
26  | 
val recover_string_qq: T list scanner  | 
|
27  | 
val recover_string_bq: T list scanner  | 
|
| 43773 | 28  | 
val quote_string_q: string -> string  | 
29  | 
val quote_string_qq: string -> string  | 
|
30  | 
val quote_string_bq: string -> string  | 
|
| 62781 | 31  | 
val cartouche_content: T list -> T list  | 
| 67426 | 32  | 
val scan_cartouche: string -> T list scanner  | 
33  | 
val scan_cartouche_content: string -> T list scanner  | 
|
34  | 
val recover_cartouche: T list scanner  | 
|
35  | 
val scan_comment: string -> T list scanner  | 
|
36  | 
val scan_comment_body: string -> T list scanner  | 
|
37  | 
val recover_comment: T list scanner  | 
|
| 27763 | 38  | 
val source: Position.T -> (Symbol.symbol, 'a) Source.source ->  | 
39  | 
(T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source  | 
|
| 
27778
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
40  | 
type text = string  | 
| 27797 | 41  | 
val implode: T list -> text  | 
| 59112 | 42  | 
val implode_range: Position.range -> T list -> text * Position.range  | 
| 74175 | 43  | 
val explode_deleted: string * Position.T -> Position.T list  | 
| 
27778
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
44  | 
val explode: text * Position.T -> T list  | 
| 62751 | 45  | 
val explode0: string -> T list  | 
| 67426 | 46  | 
val scan_ident: T list scanner  | 
| 50239 | 47  | 
val is_identifier: string -> bool  | 
| 67426 | 48  | 
val scan_nat: T list scanner  | 
49  | 
val scan_float: T list scanner  | 
|
| 27763 | 50  | 
end;  | 
51  | 
||
| 30573 | 52  | 
structure Symbol_Pos: SYMBOL_POS =  | 
| 27763 | 53  | 
struct  | 
54  | 
||
55  | 
(* type T *)  | 
|
56  | 
||
57  | 
type T = Symbol.symbol * Position.T;  | 
|
| 67426 | 58  | 
type 'a scanner = T list -> 'a * T list;  | 
| 27763 | 59  | 
|
60  | 
fun symbol ((s, _): T) = s;  | 
|
| 27852 | 61  | 
|
| 27797 | 62  | 
val content = implode o map symbol;  | 
| 27763 | 63  | 
|
| 55033 | 64  | 
fun range (syms as (_, pos) :: _) =  | 
| 74174 | 65  | 
let val pos' = List.last syms |-> Position.symbol  | 
| 62797 | 66  | 
in Position.range (pos, pos') end  | 
| 55033 | 67  | 
| range [] = Position.no_range;  | 
68  | 
||
| 
61705
 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 
wenzelm 
parents: 
61502 
diff
changeset
 | 
69  | 
|
| 27763 | 70  | 
(* stopper *)  | 
71  | 
||
72  | 
fun mk_eof pos = (Symbol.eof, pos);  | 
|
73  | 
val eof = mk_eof Position.none;  | 
|
74  | 
||
75  | 
val is_eof = Symbol.is_eof o symbol;  | 
|
76  | 
||
77  | 
val stopper =  | 
|
| 74174 | 78  | 
Scan.stopper (fn [] => eof | inp => mk_eof (List.last inp |-> Position.symbol)) is_eof;  | 
| 27763 | 79  | 
|
80  | 
||
81  | 
(* basic scanners *)  | 
|
82  | 
||
| 67426 | 83  | 
fun !!! text (scan: 'a scanner) =  | 
| 27763 | 84  | 
let  | 
| 
48911
 
5debc3e4fa81
tuned messages: end-of-input rarely means physical end-of-file from the past;
 
wenzelm 
parents: 
48770 
diff
changeset
 | 
85  | 
fun get_pos [] = " (end-of-input)"  | 
| 48992 | 86  | 
| get_pos ((_, pos) :: _) = Position.here pos;  | 
| 27763 | 87  | 
|
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43773 
diff
changeset
 | 
88  | 
fun err (syms, msg) = fn () =>  | 
| 48770 | 89  | 
text () ^ get_pos syms ^  | 
| 
50201
 
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
 
wenzelm 
parents: 
48992 
diff
changeset
 | 
90  | 
      Markup.markup Markup.no_report (" at " ^ Symbol.beginning 10 (map symbol syms)) ^
 | 
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43773 
diff
changeset
 | 
91  | 
(case msg of NONE => "" | SOME m => "\n" ^ m ());  | 
| 27763 | 92  | 
in Scan.!! err scan end;  | 
93  | 
||
| 55033 | 94  | 
fun $$ s = Scan.one (fn x => symbol x = s);  | 
| 55107 | 95  | 
fun ~$$ s = Scan.one (fn x => symbol x <> s);  | 
96  | 
||
| 27763 | 97  | 
fun $$$ s = Scan.one (fn x => symbol x = s) >> single;  | 
98  | 
fun ~$$$ s = Scan.one (fn x => symbol x <> s) >> single;  | 
|
99  | 
||
| 
27778
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
100  | 
val scan_pos = Scan.ahead (Scan.one (K true)) >> (fn (_, pos): T => pos);  | 
| 27763 | 101  | 
|
102  | 
||
| 43773 | 103  | 
(* scan string literals *)  | 
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
104  | 
|
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
105  | 
local  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
106  | 
|
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
107  | 
val char_code =  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
108  | 
Scan.one (Symbol.is_ascii_digit o symbol) --  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
109  | 
Scan.one (Symbol.is_ascii_digit o symbol) --  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
110  | 
Scan.one (Symbol.is_ascii_digit o symbol) :|--  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
111  | 
(fn (((a, pos), (b, _)), (c, _)) =>  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
112  | 
let val (n, _) = Library.read_int [a, b, c]  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
113  | 
in if n <= 255 then Scan.succeed [(chr n, pos)] else Scan.fail end);  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
114  | 
|
| 48764 | 115  | 
fun scan_str q err_prefix =  | 
116  | 
$$$ "\\" |-- !!! (fn () => err_prefix ^ "bad escape character in string")  | 
|
117  | 
($$$ q || $$$ "\\" || char_code) ||  | 
|
| 58854 | 118  | 
Scan.one (fn (s, _) => s <> q andalso s <> "\\" andalso Symbol.not_eof s) >> single;  | 
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
119  | 
|
| 48764 | 120  | 
fun scan_strs q err_prefix =  | 
| 55103 | 121  | 
Scan.ahead ($$ q) |--  | 
122  | 
!!! (fn () => err_prefix ^ "unclosed string literal")  | 
|
| 61476 | 123  | 
((scan_pos --| $$$ q) -- (Scan.repeats (scan_str q err_prefix) -- ($$$ q |-- scan_pos)));  | 
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
124  | 
|
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
125  | 
fun recover_strs q =  | 
| 61476 | 126  | 
$$$ q @@@ Scan.repeats (Scan.permissive (scan_str q ""));  | 
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
127  | 
|
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
128  | 
in  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
129  | 
|
| 42503 | 130  | 
val scan_string_q = scan_strs "'";  | 
131  | 
val scan_string_qq = scan_strs "\"";  | 
|
132  | 
val scan_string_bq = scan_strs "`";  | 
|
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
133  | 
|
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
134  | 
val recover_string_q = recover_strs "'";  | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
135  | 
val recover_string_qq = recover_strs "\"";  | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
136  | 
val recover_string_bq = recover_strs "`";  | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
137  | 
|
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
138  | 
end;  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
139  | 
|
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
140  | 
|
| 43773 | 141  | 
(* quote string literals *)  | 
142  | 
||
143  | 
local  | 
|
144  | 
||
145  | 
fun char_code i =  | 
|
146  | 
(if i < 10 then "00" else if i < 100 then "0" else "") ^ string_of_int i;  | 
|
147  | 
||
148  | 
fun quote_str q s =  | 
|
149  | 
if Symbol.is_ascii_control s then "\\" ^ char_code (ord s)  | 
|
150  | 
else if s = q orelse s = "\\" then "\\" ^ s  | 
|
151  | 
else s;  | 
|
152  | 
||
153  | 
fun quote_string q = enclose q q o implode o map (quote_str q) o Symbol.explode;  | 
|
154  | 
||
155  | 
in  | 
|
156  | 
||
157  | 
val quote_string_q = quote_string "'";  | 
|
158  | 
val quote_string_qq = quote_string "\"";  | 
|
159  | 
val quote_string_bq = quote_string "`";  | 
|
160  | 
||
161  | 
end;  | 
|
162  | 
||
163  | 
||
| 55033 | 164  | 
(* nested text cartouches *)  | 
165  | 
||
| 62781 | 166  | 
fun cartouche_content syms =  | 
167  | 
let  | 
|
168  | 
fun err () =  | 
|
169  | 
      error ("Malformed text cartouche: "
 | 
|
170  | 
^ quote (content syms) ^ Position.here (#1 (range syms)));  | 
|
171  | 
in  | 
|
172  | 
(case syms of  | 
|
173  | 
      ("\<open>", _) :: rest =>
 | 
|
174  | 
(case rev rest of  | 
|
175  | 
          ("\<close>", _) :: rrest => rev rrest
 | 
|
176  | 
| _ => err ())  | 
|
177  | 
| _ => err ())  | 
|
178  | 
end;  | 
|
179  | 
||
| 
55104
 
8284c0d5bf52
clarified scan_cartouche_depth, according to Scala version;
 
wenzelm 
parents: 
55103 
diff
changeset
 | 
180  | 
val scan_cartouche_depth =  | 
| 
61502
 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
181  | 
Scan.repeat1 (Scan.depend (fn (depth: int option) =>  | 
| 
 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
182  | 
(case depth of  | 
| 
 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
183  | 
SOME d =>  | 
| 62210 | 184  | 
$$ Symbol.open_ >> pair (SOME (d + 1)) ||  | 
| 
61502
 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
185  | 
(if d > 0 then  | 
| 62210 | 186  | 
Scan.one (fn (s, _) => s <> Symbol.close andalso Symbol.not_eof s) >> pair depth ||  | 
187  | 
$$ Symbol.close >> pair (if d = 1 then NONE else SOME (d - 1))  | 
|
| 
61502
 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
188  | 
else Scan.fail)  | 
| 
 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
189  | 
| NONE => Scan.fail)));  | 
| 55033 | 190  | 
|
| 55105 | 191  | 
fun scan_cartouche err_prefix =  | 
| 62210 | 192  | 
Scan.ahead ($$ Symbol.open_) |--  | 
| 55105 | 193  | 
!!! (fn () => err_prefix ^ "unclosed text cartouche")  | 
| 
61502
 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
194  | 
(Scan.provide is_none (SOME 0) scan_cartouche_depth);  | 
| 55033 | 195  | 
|
| 62781 | 196  | 
fun scan_cartouche_content err_prefix =  | 
197  | 
scan_cartouche err_prefix >> cartouche_content;  | 
|
| 55033 | 198  | 
|
| 62781 | 199  | 
val recover_cartouche = Scan.pass (SOME 0) scan_cartouche_depth;  | 
| 55033 | 200  | 
|
| 67413 | 201  | 
|
| 27763 | 202  | 
(* ML-style comments *)  | 
203  | 
||
204  | 
local  | 
|
205  | 
||
206  | 
val scan_cmt =  | 
|
207  | 
  Scan.depend (fn (d: int) => $$$ "(" @@@ $$$ "*" >> pair (d + 1)) ||
 | 
|
208  | 
Scan.depend (fn 0 => Scan.fail | d => $$$ "*" @@@ $$$ ")" >> pair (d - 1)) ||  | 
|
209  | 
Scan.lift ($$$ "*" --| Scan.ahead (~$$$ ")")) ||  | 
|
| 58854 | 210  | 
Scan.lift (Scan.one (fn (s, _) => s <> "*" andalso Symbol.not_eof s)) >> single;  | 
| 27763 | 211  | 
|
| 61476 | 212  | 
val scan_cmts = Scan.pass 0 (Scan.repeats scan_cmt);  | 
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
213  | 
|
| 27763 | 214  | 
in  | 
215  | 
||
| 55105 | 216  | 
fun scan_comment err_prefix =  | 
| 55106 | 217  | 
  Scan.ahead ($$ "(" -- $$ "*") |--
 | 
218  | 
!!! (fn () => err_prefix ^ "unclosed comment")  | 
|
| 58850 | 219  | 
      ($$$ "(" @@@ $$$ "*" @@@ scan_cmts @@@ $$$ "*" @@@ $$$ ")");
 | 
| 27763 | 220  | 
|
| 55105 | 221  | 
fun scan_comment_body err_prefix =  | 
| 55106 | 222  | 
  Scan.ahead ($$ "(" -- $$ "*") |--
 | 
223  | 
!!! (fn () => err_prefix ^ "unclosed comment")  | 
|
| 58850 | 224  | 
      ($$ "(" |-- $$ "*" |-- scan_cmts --| $$ "*" --| $$ ")");
 | 
| 27763 | 225  | 
|
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
226  | 
val recover_comment =  | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
227  | 
  $$$ "(" @@@ $$$ "*" @@@ scan_cmts;
 | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
228  | 
|
| 27763 | 229  | 
end;  | 
230  | 
||
231  | 
||
232  | 
(* source *)  | 
|
233  | 
||
234  | 
fun source pos =  | 
|
235  | 
Source.source' pos Symbol.stopper (Scan.bulk (Scan.depend (fn pos =>  | 
|
| 74174 | 236  | 
Scan.one Symbol.not_eof >> (fn s => (Position.symbol s pos, (s, pos))))));  | 
| 27763 | 237  | 
|
238  | 
||
239  | 
(* compact representation -- with Symbol.DEL padding *)  | 
|
240  | 
||
| 
27778
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
241  | 
type text = string;  | 
| 
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
242  | 
|
| 27763 | 243  | 
fun pad [] = []  | 
244  | 
| pad [(s, _)] = [s]  | 
|
| 32784 | 245  | 
| pad ((s1, pos1) :: (rest as (_, pos2) :: _)) =  | 
| 27763 | 246  | 
let  | 
| 74174 | 247  | 
val end_pos1 = Position.symbol s1 pos1;  | 
| 68177 | 248  | 
val d = Int.max (0, the_default 0 (Position.distance_of (end_pos1, pos2)));  | 
| 27763 | 249  | 
in s1 :: replicate d Symbol.DEL @ pad rest end;  | 
250  | 
||
| 27797 | 251  | 
val implode = implode o pad;  | 
| 27763 | 252  | 
|
| 59112 | 253  | 
fun implode_range (pos1, pos2) syms =  | 
| 27797 | 254  | 
  let val syms' = (("", pos1) :: syms @ [("", pos2)])
 | 
255  | 
in (implode syms', range syms') end;  | 
|
| 
27778
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
256  | 
|
| 74175 | 257  | 
local  | 
258  | 
||
259  | 
fun rev_explode (str, pos) =  | 
|
260  | 
fold (fn s => fn (res, p) => ((s, p) :: res, Position.symbol s p))  | 
|
261  | 
(Symbol.explode str) ([], Position.no_range_position pos)  | 
|
262  | 
|> #1;  | 
|
263  | 
||
264  | 
in  | 
|
| 27763 | 265  | 
|
| 74175 | 266  | 
fun explode_deleted arg =  | 
267  | 
fold (fn (s, p) => s = Symbol.DEL ? cons p) (rev_explode arg) [];  | 
|
268  | 
||
269  | 
fun explode arg =  | 
|
270  | 
fold (fn (s, p) => s <> Symbol.DEL ? cons (s, p)) (rev_explode arg) [];  | 
|
271  | 
||
| 62751 | 272  | 
fun explode0 str = explode (str, Position.none);  | 
273  | 
||
| 74175 | 274  | 
end;  | 
275  | 
||
| 50239 | 276  | 
|
277  | 
(* identifiers *)  | 
|
278  | 
||
| 
50242
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
279  | 
local  | 
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
280  | 
|
| 
52616
 
3ac2878764f9
more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
 
wenzelm 
parents: 
50493 
diff
changeset
 | 
281  | 
val letter = Scan.one (symbol #> Symbol.is_letter);  | 
| 
 
3ac2878764f9
more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
 
wenzelm 
parents: 
50493 
diff
changeset
 | 
282  | 
val letdigs1 = Scan.many1 (symbol #> Symbol.is_letdig);  | 
| 
53016
 
fa9c38891cf2
disable old identifier syntax by default, legacy_isub_isup := true may be used temporarily as fall-back;
 
wenzelm 
parents: 
52920 
diff
changeset
 | 
283  | 
|
| 
62529
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62239 
diff
changeset
 | 
284  | 
val sub = Scan.one (symbol #> (fn s => s = "\<^sub>"));  | 
| 
50242
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
285  | 
|
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
286  | 
in  | 
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
287  | 
|
| 61476 | 288  | 
val scan_ident = letter ::: Scan.repeats (letdigs1 || sub ::: letdigs1);  | 
| 
50242
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
289  | 
|
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
290  | 
end;  | 
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
291  | 
|
| 50239 | 292  | 
fun is_identifier s =  | 
| 
50295
 
3d6a4135a54f
eliminated redundant is_ident -- more official is_identifier;
 
wenzelm 
parents: 
50253 
diff
changeset
 | 
293  | 
Symbol.is_ascii_identifier s orelse  | 
| 62751 | 294  | 
(case try (Scan.finite stopper scan_ident) (explode0 s) of  | 
| 
50295
 
3d6a4135a54f
eliminated redundant is_ident -- more official is_identifier;
 
wenzelm 
parents: 
50253 
diff
changeset
 | 
295  | 
SOME (_, []) => true  | 
| 
 
3d6a4135a54f
eliminated redundant is_ident -- more official is_identifier;
 
wenzelm 
parents: 
50253 
diff
changeset
 | 
296  | 
| _ => false);  | 
| 50239 | 297  | 
|
| 62782 | 298  | 
|
299  | 
(* numerals *)  | 
|
300  | 
||
301  | 
val scan_nat = Scan.many1 (Symbol.is_digit o symbol);  | 
|
302  | 
val scan_float = scan_nat @@@ $$$ "." @@@ scan_nat;  | 
|
303  | 
||
| 27763 | 304  | 
end;  | 
305  | 
||
| 36957 | 306  | 
structure Basic_Symbol_Pos = (*not open by default*)  | 
307  | 
struct  | 
|
| 55103 | 308  | 
val $$ = Symbol_Pos.$$;  | 
| 55107 | 309  | 
val ~$$ = Symbol_Pos.~$$;  | 
| 36957 | 310  | 
val $$$ = Symbol_Pos.$$$;  | 
311  | 
val ~$$$ = Symbol_Pos.~$$$;  | 
|
312  | 
end;  | 
|
| 67426 | 313  | 
|
314  | 
type 'a scanner = 'a Symbol_Pos.scanner;  |