| author | wenzelm | 
| Tue, 31 Mar 2015 21:12:22 +0200 | |
| changeset 59885 | 3470a265d404 | 
| parent 59112 | e670969f34df | 
| child 61456 | b521b8b400f7 | 
| permissions | -rw-r--r-- | 
| 27763 | 1  | 
(* Title: Pure/General/symbol_pos.ML  | 
2  | 
Author: Makarius  | 
|
3  | 
||
4  | 
Symbols with explicit position information.  | 
|
5  | 
*)  | 
|
6  | 
||
| 36957 | 7  | 
signature SYMBOL_POS =  | 
| 27763 | 8  | 
sig  | 
9  | 
type T = Symbol.symbol * Position.T  | 
|
10  | 
val symbol: T -> Symbol.symbol  | 
|
| 55103 | 11  | 
val $$ : Symbol.symbol -> T list -> T * T list  | 
| 55107 | 12  | 
val ~$$ : Symbol.symbol -> T list -> T * T list  | 
| 27763 | 13  | 
val $$$ : Symbol.symbol -> T list -> T list * T list  | 
14  | 
val ~$$$ : Symbol.symbol -> T list -> T list * T list  | 
|
| 27797 | 15  | 
val content: T list -> string  | 
| 55033 | 16  | 
val range: T list -> Position.range  | 
| 27763 | 17  | 
val is_eof: T -> bool  | 
18  | 
val stopper: T Scan.stopper  | 
|
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43773 
diff
changeset
 | 
19  | 
val !!! : Scan.message -> (T list -> 'a) -> T list -> 'a  | 
| 
27778
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
20  | 
val scan_pos: T list -> Position.T * T list  | 
| 48764 | 21  | 
val scan_string_q: string -> T list -> (Position.T * (T list * Position.T)) * T list  | 
22  | 
val scan_string_qq: string -> T list -> (Position.T * (T list * Position.T)) * T list  | 
|
23  | 
val scan_string_bq: string -> T list -> (Position.T * (T list * Position.T)) * T list  | 
|
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
24  | 
val recover_string_q: T list -> T list * T list  | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
25  | 
val recover_string_qq: T list -> T list * T list  | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
26  | 
val recover_string_bq: T list -> T list * T list  | 
| 43773 | 27  | 
val quote_string_q: string -> string  | 
28  | 
val quote_string_qq: string -> string  | 
|
29  | 
val quote_string_bq: string -> string  | 
|
| 55105 | 30  | 
val scan_cartouche: string -> T list -> T list * T list  | 
| 55033 | 31  | 
val recover_cartouche: T list -> T list * T list  | 
32  | 
val cartouche_content: T list -> T list  | 
|
| 55105 | 33  | 
val scan_comment: string -> T list -> T list * T list  | 
34  | 
val scan_comment_body: string -> T list -> T list * T list  | 
|
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
35  | 
val recover_comment: T list -> T list * T list  | 
| 27763 | 36  | 
val source: Position.T -> (Symbol.symbol, 'a) Source.source ->  | 
37  | 
(T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source  | 
|
| 
27778
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
38  | 
type text = string  | 
| 27797 | 39  | 
val implode: T list -> text  | 
| 59112 | 40  | 
val implode_range: Position.range -> T list -> text * Position.range  | 
| 
27778
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
41  | 
val explode: text * Position.T -> T list  | 
| 50239 | 42  | 
val scan_ident: T list -> T list * T list  | 
43  | 
val is_identifier: string -> bool  | 
|
| 27763 | 44  | 
end;  | 
45  | 
||
| 30573 | 46  | 
structure Symbol_Pos: SYMBOL_POS =  | 
| 27763 | 47  | 
struct  | 
48  | 
||
49  | 
(* type T *)  | 
|
50  | 
||
51  | 
type T = Symbol.symbol * Position.T;  | 
|
52  | 
||
53  | 
fun symbol ((s, _): T) = s;  | 
|
| 27852 | 54  | 
|
| 27797 | 55  | 
val content = implode o map symbol;  | 
| 27763 | 56  | 
|
| 55033 | 57  | 
fun range (syms as (_, pos) :: _) =  | 
58  | 
let val pos' = List.last syms |-> Position.advance  | 
|
59  | 
in Position.range pos pos' end  | 
|
60  | 
| range [] = Position.no_range;  | 
|
61  | 
||
| 27763 | 62  | 
|
63  | 
(* stopper *)  | 
|
64  | 
||
65  | 
fun mk_eof pos = (Symbol.eof, pos);  | 
|
66  | 
val eof = mk_eof Position.none;  | 
|
67  | 
||
68  | 
val is_eof = Symbol.is_eof o symbol;  | 
|
69  | 
||
70  | 
val stopper =  | 
|
71  | 
Scan.stopper (fn [] => eof | inp => mk_eof (List.last inp |-> Position.advance)) is_eof;  | 
|
72  | 
||
73  | 
||
74  | 
(* basic scanners *)  | 
|
75  | 
||
76  | 
fun !!! text scan =  | 
|
77  | 
let  | 
|
| 
48911
 
5debc3e4fa81
tuned messages: end-of-input rarely means physical end-of-file from the past;
 
wenzelm 
parents: 
48770 
diff
changeset
 | 
78  | 
fun get_pos [] = " (end-of-input)"  | 
| 48992 | 79  | 
| get_pos ((_, pos) :: _) = Position.here pos;  | 
| 27763 | 80  | 
|
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43773 
diff
changeset
 | 
81  | 
fun err (syms, msg) = fn () =>  | 
| 48770 | 82  | 
text () ^ get_pos syms ^  | 
| 
50201
 
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
 
wenzelm 
parents: 
48992 
diff
changeset
 | 
83  | 
      Markup.markup Markup.no_report (" at " ^ Symbol.beginning 10 (map symbol syms)) ^
 | 
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43773 
diff
changeset
 | 
84  | 
(case msg of NONE => "" | SOME m => "\n" ^ m ());  | 
| 27763 | 85  | 
in Scan.!! err scan end;  | 
86  | 
||
| 55033 | 87  | 
fun $$ s = Scan.one (fn x => symbol x = s);  | 
| 55107 | 88  | 
fun ~$$ s = Scan.one (fn x => symbol x <> s);  | 
89  | 
||
| 27763 | 90  | 
fun $$$ s = Scan.one (fn x => symbol x = s) >> single;  | 
91  | 
fun ~$$$ s = Scan.one (fn x => symbol x <> s) >> single;  | 
|
92  | 
||
| 
27778
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
93  | 
val scan_pos = Scan.ahead (Scan.one (K true)) >> (fn (_, pos): T => pos);  | 
| 27763 | 94  | 
|
95  | 
||
| 43773 | 96  | 
(* scan string literals *)  | 
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
97  | 
|
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
98  | 
local  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
99  | 
|
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
100  | 
val char_code =  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
101  | 
Scan.one (Symbol.is_ascii_digit o symbol) --  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
102  | 
Scan.one (Symbol.is_ascii_digit o symbol) --  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
103  | 
Scan.one (Symbol.is_ascii_digit o symbol) :|--  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
104  | 
(fn (((a, pos), (b, _)), (c, _)) =>  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
105  | 
let val (n, _) = Library.read_int [a, b, c]  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
106  | 
in if n <= 255 then Scan.succeed [(chr n, pos)] else Scan.fail end);  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
107  | 
|
| 48764 | 108  | 
fun scan_str q err_prefix =  | 
109  | 
$$$ "\\" |-- !!! (fn () => err_prefix ^ "bad escape character in string")  | 
|
110  | 
($$$ q || $$$ "\\" || char_code) ||  | 
|
| 58854 | 111  | 
Scan.one (fn (s, _) => s <> q andalso s <> "\\" andalso Symbol.not_eof s) >> single;  | 
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
112  | 
|
| 48764 | 113  | 
fun scan_strs q err_prefix =  | 
| 55103 | 114  | 
Scan.ahead ($$ q) |--  | 
115  | 
!!! (fn () => err_prefix ^ "unclosed string literal")  | 
|
116  | 
((scan_pos --| $$$ q) --  | 
|
| 58850 | 117  | 
((Scan.repeat (scan_str q err_prefix) >> flat) -- ($$$ q |-- scan_pos)));  | 
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
118  | 
|
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
119  | 
fun recover_strs q =  | 
| 48764 | 120  | 
$$$ q @@@ (Scan.repeat (Scan.permissive (scan_str q "")) >> flat);  | 
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
121  | 
|
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
122  | 
in  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
123  | 
|
| 42503 | 124  | 
val scan_string_q = scan_strs "'";  | 
125  | 
val scan_string_qq = scan_strs "\"";  | 
|
126  | 
val scan_string_bq = scan_strs "`";  | 
|
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
127  | 
|
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
128  | 
val recover_string_q = recover_strs "'";  | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
129  | 
val recover_string_qq = recover_strs "\"";  | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
130  | 
val recover_string_bq = recover_strs "`";  | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
131  | 
|
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
132  | 
end;  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
133  | 
|
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
134  | 
|
| 43773 | 135  | 
(* quote string literals *)  | 
136  | 
||
137  | 
local  | 
|
138  | 
||
139  | 
fun char_code i =  | 
|
140  | 
(if i < 10 then "00" else if i < 100 then "0" else "") ^ string_of_int i;  | 
|
141  | 
||
142  | 
fun quote_str q s =  | 
|
143  | 
if Symbol.is_ascii_control s then "\\" ^ char_code (ord s)  | 
|
144  | 
else if s = q orelse s = "\\" then "\\" ^ s  | 
|
145  | 
else s;  | 
|
146  | 
||
147  | 
fun quote_string q = enclose q q o implode o map (quote_str q) o Symbol.explode;  | 
|
148  | 
||
149  | 
in  | 
|
150  | 
||
151  | 
val quote_string_q = quote_string "'";  | 
|
152  | 
val quote_string_qq = quote_string "\"";  | 
|
153  | 
val quote_string_bq = quote_string "`";  | 
|
154  | 
||
155  | 
end;  | 
|
156  | 
||
157  | 
||
| 55033 | 158  | 
(* nested text cartouches *)  | 
159  | 
||
| 
55104
 
8284c0d5bf52
clarified scan_cartouche_depth, according to Scala version;
 
wenzelm 
parents: 
55103 
diff
changeset
 | 
160  | 
val scan_cartouche_depth =  | 
| 
 
8284c0d5bf52
clarified scan_cartouche_depth, according to Scala version;
 
wenzelm 
parents: 
55103 
diff
changeset
 | 
161  | 
Scan.repeat1 (Scan.depend (fn (d: int) =>  | 
| 
 
8284c0d5bf52
clarified scan_cartouche_depth, according to Scala version;
 
wenzelm 
parents: 
55103 
diff
changeset
 | 
162  | 
$$ "\\<open>" >> pair (d + 1) ||  | 
| 
 
8284c0d5bf52
clarified scan_cartouche_depth, according to Scala version;
 
wenzelm 
parents: 
55103 
diff
changeset
 | 
163  | 
(if d > 0 then  | 
| 58854 | 164  | 
Scan.one (fn (s, _) => s <> "\\<close>" andalso Symbol.not_eof s) >> pair d ||  | 
| 
55104
 
8284c0d5bf52
clarified scan_cartouche_depth, according to Scala version;
 
wenzelm 
parents: 
55103 
diff
changeset
 | 
165  | 
$$ "\\<close>" >> pair (d - 1)  | 
| 
 
8284c0d5bf52
clarified scan_cartouche_depth, according to Scala version;
 
wenzelm 
parents: 
55103 
diff
changeset
 | 
166  | 
else Scan.fail)));  | 
| 55033 | 167  | 
|
| 55105 | 168  | 
fun scan_cartouche err_prefix =  | 
| 
55104
 
8284c0d5bf52
clarified scan_cartouche_depth, according to Scala version;
 
wenzelm 
parents: 
55103 
diff
changeset
 | 
169  | 
Scan.ahead ($$ "\\<open>") |--  | 
| 55105 | 170  | 
!!! (fn () => err_prefix ^ "unclosed text cartouche")  | 
| 58850 | 171  | 
(Scan.provide (fn d => d = 0) 0 scan_cartouche_depth);  | 
| 55033 | 172  | 
|
| 
55104
 
8284c0d5bf52
clarified scan_cartouche_depth, according to Scala version;
 
wenzelm 
parents: 
55103 
diff
changeset
 | 
173  | 
val recover_cartouche = Scan.pass 0 scan_cartouche_depth;  | 
| 55033 | 174  | 
|
175  | 
fun cartouche_content syms =  | 
|
176  | 
let  | 
|
177  | 
fun err () =  | 
|
| 
55709
 
4e5a83a46ded
clarified Token.range_of in accordance to Symbol_Pos.range;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
178  | 
      error ("Malformed text cartouche: "
 | 
| 
 
4e5a83a46ded
clarified Token.range_of in accordance to Symbol_Pos.range;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
179  | 
^ quote (content syms) ^ Position.here (#1 (range syms)));  | 
| 55033 | 180  | 
in  | 
181  | 
(case syms of  | 
|
182  | 
      ("\\<open>", _) :: rest =>
 | 
|
183  | 
(case rev rest of  | 
|
184  | 
          ("\\<close>", _) :: rrest => rev rrest
 | 
|
185  | 
| _ => err ())  | 
|
186  | 
| _ => err ())  | 
|
187  | 
end;  | 
|
188  | 
||
189  | 
||
| 27763 | 190  | 
(* ML-style comments *)  | 
191  | 
||
192  | 
local  | 
|
193  | 
||
194  | 
val scan_cmt =  | 
|
195  | 
  Scan.depend (fn (d: int) => $$$ "(" @@@ $$$ "*" >> pair (d + 1)) ||
 | 
|
196  | 
Scan.depend (fn 0 => Scan.fail | d => $$$ "*" @@@ $$$ ")" >> pair (d - 1)) ||  | 
|
197  | 
Scan.lift ($$$ "*" --| Scan.ahead (~$$$ ")")) ||  | 
|
| 58854 | 198  | 
Scan.lift (Scan.one (fn (s, _) => s <> "*" andalso Symbol.not_eof s)) >> single;  | 
| 27763 | 199  | 
|
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
200  | 
val scan_cmts = Scan.pass 0 (Scan.repeat scan_cmt >> flat);  | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
201  | 
|
| 27763 | 202  | 
in  | 
203  | 
||
| 55105 | 204  | 
fun scan_comment err_prefix =  | 
| 55106 | 205  | 
  Scan.ahead ($$ "(" -- $$ "*") |--
 | 
206  | 
!!! (fn () => err_prefix ^ "unclosed comment")  | 
|
| 58850 | 207  | 
      ($$$ "(" @@@ $$$ "*" @@@ scan_cmts @@@ $$$ "*" @@@ $$$ ")");
 | 
| 27763 | 208  | 
|
| 55105 | 209  | 
fun scan_comment_body err_prefix =  | 
| 55106 | 210  | 
  Scan.ahead ($$ "(" -- $$ "*") |--
 | 
211  | 
!!! (fn () => err_prefix ^ "unclosed comment")  | 
|
| 58850 | 212  | 
      ($$ "(" |-- $$ "*" |-- scan_cmts --| $$ "*" --| $$ ")");
 | 
| 27763 | 213  | 
|
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
214  | 
val recover_comment =  | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
215  | 
  $$$ "(" @@@ $$$ "*" @@@ scan_cmts;
 | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
216  | 
|
| 27763 | 217  | 
end;  | 
218  | 
||
219  | 
||
220  | 
(* source *)  | 
|
221  | 
||
222  | 
fun source pos =  | 
|
223  | 
Source.source' pos Symbol.stopper (Scan.bulk (Scan.depend (fn pos =>  | 
|
| 58864 | 224  | 
Scan.one Symbol.not_eof >> (fn s => (Position.advance s pos, (s, pos))))));  | 
| 27763 | 225  | 
|
226  | 
||
227  | 
(* compact representation -- with Symbol.DEL padding *)  | 
|
228  | 
||
| 
27778
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
229  | 
type text = string;  | 
| 
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
230  | 
|
| 27763 | 231  | 
fun pad [] = []  | 
232  | 
| pad [(s, _)] = [s]  | 
|
| 32784 | 233  | 
| pad ((s1, pos1) :: (rest as (_, pos2) :: _)) =  | 
| 27763 | 234  | 
let  | 
235  | 
val end_pos1 = Position.advance s1 pos1;  | 
|
| 27797 | 236  | 
val d = Int.max (0, Position.distance_of end_pos1 pos2);  | 
| 27763 | 237  | 
in s1 :: replicate d Symbol.DEL @ pad rest end;  | 
238  | 
||
| 27797 | 239  | 
val implode = implode o pad;  | 
| 27763 | 240  | 
|
| 59112 | 241  | 
fun implode_range (pos1, pos2) syms =  | 
| 27797 | 242  | 
  let val syms' = (("", pos1) :: syms @ [("", pos2)])
 | 
243  | 
in (implode syms', range syms') end;  | 
|
| 
27778
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
244  | 
|
| 27763 | 245  | 
fun explode (str, pos) =  | 
| 41416 | 246  | 
let  | 
247  | 
val (res, _) =  | 
|
248  | 
fold (fn s => fn (res, p) => ((s, p) :: res, Position.advance s p))  | 
|
249  | 
(Symbol.explode str) ([], Position.reset_range pos);  | 
|
250  | 
in fold (fn (s, p) => if s = Symbol.DEL then I else cons (s, p)) res [] end;  | 
|
| 27763 | 251  | 
|
| 50239 | 252  | 
|
253  | 
(* identifiers *)  | 
|
254  | 
||
| 
50242
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
255  | 
local  | 
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
256  | 
|
| 
52616
 
3ac2878764f9
more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
 
wenzelm 
parents: 
50493 
diff
changeset
 | 
257  | 
val letter = Scan.one (symbol #> Symbol.is_letter);  | 
| 
 
3ac2878764f9
more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
 
wenzelm 
parents: 
50493 
diff
changeset
 | 
258  | 
val letdigs1 = Scan.many1 (symbol #> Symbol.is_letdig);  | 
| 
53016
 
fa9c38891cf2
disable old identifier syntax by default, legacy_isub_isup := true may be used temporarily as fall-back;
 
wenzelm 
parents: 
52920 
diff
changeset
 | 
259  | 
|
| 54732 | 260  | 
val sub = Scan.one (symbol #> (fn s => s = "\\<^sub>"));  | 
| 
50242
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
261  | 
|
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
262  | 
in  | 
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
263  | 
|
| 
52920
 
4539e4a06339
more strict identifier syntax: disallow superscripts, which tend to be used in notation such as \<^sup>\<omega>;
 
wenzelm 
parents: 
52616 
diff
changeset
 | 
264  | 
val scan_ident = letter ::: (Scan.repeat (letdigs1 || sub ::: letdigs1) >> flat);  | 
| 
50242
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
265  | 
|
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
266  | 
end;  | 
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
267  | 
|
| 50239 | 268  | 
fun is_identifier s =  | 
| 
50295
 
3d6a4135a54f
eliminated redundant is_ident -- more official is_identifier;
 
wenzelm 
parents: 
50253 
diff
changeset
 | 
269  | 
Symbol.is_ascii_identifier s orelse  | 
| 
 
3d6a4135a54f
eliminated redundant is_ident -- more official is_identifier;
 
wenzelm 
parents: 
50253 
diff
changeset
 | 
270  | 
(case try (Scan.finite stopper scan_ident) (explode (s, Position.none)) of  | 
| 
 
3d6a4135a54f
eliminated redundant is_ident -- more official is_identifier;
 
wenzelm 
parents: 
50253 
diff
changeset
 | 
271  | 
SOME (_, []) => true  | 
| 
 
3d6a4135a54f
eliminated redundant is_ident -- more official is_identifier;
 
wenzelm 
parents: 
50253 
diff
changeset
 | 
272  | 
| _ => false);  | 
| 50239 | 273  | 
|
| 27763 | 274  | 
end;  | 
275  | 
||
| 36957 | 276  | 
structure Basic_Symbol_Pos = (*not open by default*)  | 
277  | 
struct  | 
|
| 55103 | 278  | 
val $$ = Symbol_Pos.$$;  | 
| 55107 | 279  | 
val ~$$ = Symbol_Pos.~$$;  | 
| 36957 | 280  | 
val $$$ = Symbol_Pos.$$$;  | 
281  | 
val ~$$$ = Symbol_Pos.~$$$;  | 
|
282  | 
end;  | 
|
| 27763 | 283  |