| author | wenzelm | 
| Tue, 02 Aug 2016 17:39:38 +0200 | |
| changeset 63580 | 7f06347a5013 | 
| parent 62800 | 7ac100f86863 | 
| child 67361 | f834d6f21c55 | 
| permissions | -rw-r--r-- | 
| 27763 | 1  | 
(* Title: Pure/General/symbol_pos.ML  | 
2  | 
Author: Makarius  | 
|
3  | 
||
4  | 
Symbols with explicit position information.  | 
|
5  | 
*)  | 
|
6  | 
||
| 36957 | 7  | 
signature SYMBOL_POS =  | 
| 27763 | 8  | 
sig  | 
9  | 
type T = Symbol.symbol * Position.T  | 
|
10  | 
val symbol: T -> Symbol.symbol  | 
|
| 55103 | 11  | 
val $$ : Symbol.symbol -> T list -> T * T list  | 
| 55107 | 12  | 
val ~$$ : Symbol.symbol -> T list -> T * T list  | 
| 27763 | 13  | 
val $$$ : Symbol.symbol -> T list -> T list * T list  | 
14  | 
val ~$$$ : Symbol.symbol -> T list -> T list * T list  | 
|
| 27797 | 15  | 
val content: T list -> string  | 
| 55033 | 16  | 
val range: T list -> Position.range  | 
| 
61705
 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 
wenzelm 
parents: 
61502 
diff
changeset
 | 
17  | 
val split_lines: T list -> T list list  | 
| 61456 | 18  | 
val trim_blanks: T list -> T list  | 
| 
61705
 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 
wenzelm 
parents: 
61502 
diff
changeset
 | 
19  | 
val trim_lines: T list -> T list  | 
| 27763 | 20  | 
val is_eof: T -> bool  | 
21  | 
val stopper: T Scan.stopper  | 
|
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43773 
diff
changeset
 | 
22  | 
val !!! : Scan.message -> (T list -> 'a) -> T list -> 'a  | 
| 
27778
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
23  | 
val scan_pos: T list -> Position.T * T list  | 
| 48764 | 24  | 
val scan_string_q: string -> T list -> (Position.T * (T list * Position.T)) * T list  | 
25  | 
val scan_string_qq: string -> T list -> (Position.T * (T list * Position.T)) * T list  | 
|
26  | 
val scan_string_bq: string -> T list -> (Position.T * (T list * Position.T)) * T list  | 
|
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
27  | 
val recover_string_q: T list -> T list * T list  | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
28  | 
val recover_string_qq: T list -> T list * T list  | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
29  | 
val recover_string_bq: T list -> T list * T list  | 
| 43773 | 30  | 
val quote_string_q: string -> string  | 
31  | 
val quote_string_qq: string -> string  | 
|
32  | 
val quote_string_bq: string -> string  | 
|
| 62781 | 33  | 
val cartouche_content: T list -> T list  | 
| 55105 | 34  | 
val scan_cartouche: string -> T list -> T list * T list  | 
| 62781 | 35  | 
val scan_cartouche_content: string -> T list -> T list * T list  | 
| 55033 | 36  | 
val recover_cartouche: T list -> T list * T list  | 
| 55105 | 37  | 
val scan_comment: string -> T list -> T list * T list  | 
38  | 
val scan_comment_body: string -> T list -> T list * T list  | 
|
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
39  | 
val recover_comment: T list -> T list * T list  | 
| 27763 | 40  | 
val source: Position.T -> (Symbol.symbol, 'a) Source.source ->  | 
41  | 
(T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source  | 
|
| 
27778
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
42  | 
type text = string  | 
| 27797 | 43  | 
val implode: T list -> text  | 
| 59112 | 44  | 
val implode_range: Position.range -> T list -> text * Position.range  | 
| 
27778
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
45  | 
val explode: text * Position.T -> T list  | 
| 62751 | 46  | 
val explode0: string -> T list  | 
| 50239 | 47  | 
val scan_ident: T list -> T list * T list  | 
48  | 
val is_identifier: string -> bool  | 
|
| 62782 | 49  | 
val scan_nat: T list -> T list * T list  | 
50  | 
val scan_float: T list -> T list * T list  | 
|
| 27763 | 51  | 
end;  | 
52  | 
||
| 30573 | 53  | 
structure Symbol_Pos: SYMBOL_POS =  | 
| 27763 | 54  | 
struct  | 
55  | 
||
56  | 
(* type T *)  | 
|
57  | 
||
58  | 
type T = Symbol.symbol * Position.T;  | 
|
59  | 
||
60  | 
fun symbol ((s, _): T) = s;  | 
|
| 27852 | 61  | 
|
| 27797 | 62  | 
val content = implode o map symbol;  | 
| 27763 | 63  | 
|
| 55033 | 64  | 
fun range (syms as (_, pos) :: _) =  | 
65  | 
let val pos' = List.last syms |-> Position.advance  | 
|
| 62797 | 66  | 
in Position.range (pos, pos') end  | 
| 55033 | 67  | 
| range [] = Position.no_range;  | 
68  | 
||
| 
61705
 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 
wenzelm 
parents: 
61502 
diff
changeset
 | 
69  | 
|
| 
 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 
wenzelm 
parents: 
61502 
diff
changeset
 | 
70  | 
(* lines and blanks *)  | 
| 
 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 
wenzelm 
parents: 
61502 
diff
changeset
 | 
71  | 
|
| 
 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 
wenzelm 
parents: 
61502 
diff
changeset
 | 
72  | 
fun split_lines [] = []  | 
| 
 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 
wenzelm 
parents: 
61502 
diff
changeset
 | 
73  | 
| split_lines (list: T list) =  | 
| 
 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 
wenzelm 
parents: 
61502 
diff
changeset
 | 
74  | 
let  | 
| 
 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 
wenzelm 
parents: 
61502 
diff
changeset
 | 
75  | 
fun split syms =  | 
| 
 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 
wenzelm 
parents: 
61502 
diff
changeset
 | 
76  | 
(case take_prefix (fn (s, _) => s <> "\n") syms of  | 
| 
 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 
wenzelm 
parents: 
61502 
diff
changeset
 | 
77  | 
(line, []) => [line]  | 
| 62239 | 78  | 
| (line, _ :: rest) => line :: split rest);  | 
| 
61705
 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 
wenzelm 
parents: 
61502 
diff
changeset
 | 
79  | 
in split list end;  | 
| 
 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 
wenzelm 
parents: 
61502 
diff
changeset
 | 
80  | 
|
| 61707 | 81  | 
val trim_blanks = trim (Symbol.is_blank o symbol);  | 
| 61456 | 82  | 
|
| 
61705
 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 
wenzelm 
parents: 
61502 
diff
changeset
 | 
83  | 
val trim_lines =  | 
| 
 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 
wenzelm 
parents: 
61502 
diff
changeset
 | 
84  | 
split_lines #> map trim_blanks #> separate [(Symbol.space, Position.none)] #> flat;  | 
| 
 
546e6494049f
trim lines for @{theory_text} similarly to @{text};
 
wenzelm 
parents: 
61502 
diff
changeset
 | 
85  | 
|
| 27763 | 86  | 
|
87  | 
(* stopper *)  | 
|
88  | 
||
89  | 
fun mk_eof pos = (Symbol.eof, pos);  | 
|
90  | 
val eof = mk_eof Position.none;  | 
|
91  | 
||
92  | 
val is_eof = Symbol.is_eof o symbol;  | 
|
93  | 
||
94  | 
val stopper =  | 
|
95  | 
Scan.stopper (fn [] => eof | inp => mk_eof (List.last inp |-> Position.advance)) is_eof;  | 
|
96  | 
||
97  | 
||
98  | 
(* basic scanners *)  | 
|
99  | 
||
100  | 
fun !!! text scan =  | 
|
101  | 
let  | 
|
| 
48911
 
5debc3e4fa81
tuned messages: end-of-input rarely means physical end-of-file from the past;
 
wenzelm 
parents: 
48770 
diff
changeset
 | 
102  | 
fun get_pos [] = " (end-of-input)"  | 
| 48992 | 103  | 
| get_pos ((_, pos) :: _) = Position.here pos;  | 
| 27763 | 104  | 
|
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43773 
diff
changeset
 | 
105  | 
fun err (syms, msg) = fn () =>  | 
| 48770 | 106  | 
text () ^ get_pos syms ^  | 
| 
50201
 
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
 
wenzelm 
parents: 
48992 
diff
changeset
 | 
107  | 
      Markup.markup Markup.no_report (" at " ^ Symbol.beginning 10 (map symbol syms)) ^
 | 
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43773 
diff
changeset
 | 
108  | 
(case msg of NONE => "" | SOME m => "\n" ^ m ());  | 
| 27763 | 109  | 
in Scan.!! err scan end;  | 
110  | 
||
| 55033 | 111  | 
fun $$ s = Scan.one (fn x => symbol x = s);  | 
| 55107 | 112  | 
fun ~$$ s = Scan.one (fn x => symbol x <> s);  | 
113  | 
||
| 27763 | 114  | 
fun $$$ s = Scan.one (fn x => symbol x = s) >> single;  | 
115  | 
fun ~$$$ s = Scan.one (fn x => symbol x <> s) >> single;  | 
|
116  | 
||
| 
27778
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
117  | 
val scan_pos = Scan.ahead (Scan.one (K true)) >> (fn (_, pos): T => pos);  | 
| 27763 | 118  | 
|
119  | 
||
| 43773 | 120  | 
(* scan string literals *)  | 
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
121  | 
|
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
122  | 
local  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
123  | 
|
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
124  | 
val char_code =  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
125  | 
Scan.one (Symbol.is_ascii_digit o symbol) --  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
126  | 
Scan.one (Symbol.is_ascii_digit o symbol) --  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
127  | 
Scan.one (Symbol.is_ascii_digit o symbol) :|--  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
128  | 
(fn (((a, pos), (b, _)), (c, _)) =>  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
129  | 
let val (n, _) = Library.read_int [a, b, c]  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
130  | 
in if n <= 255 then Scan.succeed [(chr n, pos)] else Scan.fail end);  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
131  | 
|
| 48764 | 132  | 
fun scan_str q err_prefix =  | 
133  | 
$$$ "\\" |-- !!! (fn () => err_prefix ^ "bad escape character in string")  | 
|
134  | 
($$$ q || $$$ "\\" || char_code) ||  | 
|
| 58854 | 135  | 
Scan.one (fn (s, _) => s <> q andalso s <> "\\" andalso Symbol.not_eof s) >> single;  | 
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
136  | 
|
| 48764 | 137  | 
fun scan_strs q err_prefix =  | 
| 55103 | 138  | 
Scan.ahead ($$ q) |--  | 
139  | 
!!! (fn () => err_prefix ^ "unclosed string literal")  | 
|
| 61476 | 140  | 
((scan_pos --| $$$ q) -- (Scan.repeats (scan_str q err_prefix) -- ($$$ q |-- scan_pos)));  | 
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
141  | 
|
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
142  | 
fun recover_strs q =  | 
| 61476 | 143  | 
$$$ q @@@ Scan.repeats (Scan.permissive (scan_str q ""));  | 
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
144  | 
|
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
145  | 
in  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
146  | 
|
| 42503 | 147  | 
val scan_string_q = scan_strs "'";  | 
148  | 
val scan_string_qq = scan_strs "\"";  | 
|
149  | 
val scan_string_bq = scan_strs "`";  | 
|
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
150  | 
|
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
151  | 
val recover_string_q = recover_strs "'";  | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
152  | 
val recover_string_qq = recover_strs "\"";  | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
153  | 
val recover_string_bq = recover_strs "`";  | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
154  | 
|
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
155  | 
end;  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
156  | 
|
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
157  | 
|
| 43773 | 158  | 
(* quote string literals *)  | 
159  | 
||
160  | 
local  | 
|
161  | 
||
162  | 
fun char_code i =  | 
|
163  | 
(if i < 10 then "00" else if i < 100 then "0" else "") ^ string_of_int i;  | 
|
164  | 
||
165  | 
fun quote_str q s =  | 
|
166  | 
if Symbol.is_ascii_control s then "\\" ^ char_code (ord s)  | 
|
167  | 
else if s = q orelse s = "\\" then "\\" ^ s  | 
|
168  | 
else s;  | 
|
169  | 
||
170  | 
fun quote_string q = enclose q q o implode o map (quote_str q) o Symbol.explode;  | 
|
171  | 
||
172  | 
in  | 
|
173  | 
||
174  | 
val quote_string_q = quote_string "'";  | 
|
175  | 
val quote_string_qq = quote_string "\"";  | 
|
176  | 
val quote_string_bq = quote_string "`";  | 
|
177  | 
||
178  | 
end;  | 
|
179  | 
||
180  | 
||
| 55033 | 181  | 
(* nested text cartouches *)  | 
182  | 
||
| 62781 | 183  | 
fun cartouche_content syms =  | 
184  | 
let  | 
|
185  | 
fun err () =  | 
|
186  | 
      error ("Malformed text cartouche: "
 | 
|
187  | 
^ quote (content syms) ^ Position.here (#1 (range syms)));  | 
|
188  | 
in  | 
|
189  | 
(case syms of  | 
|
190  | 
      ("\<open>", _) :: rest =>
 | 
|
191  | 
(case rev rest of  | 
|
192  | 
          ("\<close>", _) :: rrest => rev rrest
 | 
|
193  | 
| _ => err ())  | 
|
194  | 
| _ => err ())  | 
|
195  | 
end;  | 
|
196  | 
||
| 
55104
 
8284c0d5bf52
clarified scan_cartouche_depth, according to Scala version;
 
wenzelm 
parents: 
55103 
diff
changeset
 | 
197  | 
val scan_cartouche_depth =  | 
| 
61502
 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
198  | 
Scan.repeat1 (Scan.depend (fn (depth: int option) =>  | 
| 
 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
199  | 
(case depth of  | 
| 
 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
200  | 
SOME d =>  | 
| 62210 | 201  | 
$$ Symbol.open_ >> pair (SOME (d + 1)) ||  | 
| 
61502
 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
202  | 
(if d > 0 then  | 
| 62210 | 203  | 
Scan.one (fn (s, _) => s <> Symbol.close andalso Symbol.not_eof s) >> pair depth ||  | 
204  | 
$$ Symbol.close >> pair (if d = 1 then NONE else SOME (d - 1))  | 
|
| 
61502
 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
205  | 
else Scan.fail)  | 
| 
 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
206  | 
| NONE => Scan.fail)));  | 
| 55033 | 207  | 
|
| 55105 | 208  | 
fun scan_cartouche err_prefix =  | 
| 62210 | 209  | 
Scan.ahead ($$ Symbol.open_) |--  | 
| 55105 | 210  | 
!!! (fn () => err_prefix ^ "unclosed text cartouche")  | 
| 
61502
 
760e21900b01
clarified scan_cartouche_depth (amending 8284c0d5bf52): finish after outermost cartouche;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
211  | 
(Scan.provide is_none (SOME 0) scan_cartouche_depth);  | 
| 55033 | 212  | 
|
| 62781 | 213  | 
fun scan_cartouche_content err_prefix =  | 
214  | 
scan_cartouche err_prefix >> cartouche_content;  | 
|
| 55033 | 215  | 
|
| 62781 | 216  | 
val recover_cartouche = Scan.pass (SOME 0) scan_cartouche_depth;  | 
| 55033 | 217  | 
|
218  | 
||
| 27763 | 219  | 
(* ML-style comments *)  | 
220  | 
||
221  | 
local  | 
|
222  | 
||
223  | 
val scan_cmt =  | 
|
224  | 
  Scan.depend (fn (d: int) => $$$ "(" @@@ $$$ "*" >> pair (d + 1)) ||
 | 
|
225  | 
Scan.depend (fn 0 => Scan.fail | d => $$$ "*" @@@ $$$ ")" >> pair (d - 1)) ||  | 
|
226  | 
Scan.lift ($$$ "*" --| Scan.ahead (~$$$ ")")) ||  | 
|
| 58854 | 227  | 
Scan.lift (Scan.one (fn (s, _) => s <> "*" andalso Symbol.not_eof s)) >> single;  | 
| 27763 | 228  | 
|
| 61476 | 229  | 
val scan_cmts = Scan.pass 0 (Scan.repeats scan_cmt);  | 
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
230  | 
|
| 27763 | 231  | 
in  | 
232  | 
||
| 55105 | 233  | 
fun scan_comment err_prefix =  | 
| 55106 | 234  | 
  Scan.ahead ($$ "(" -- $$ "*") |--
 | 
235  | 
!!! (fn () => err_prefix ^ "unclosed comment")  | 
|
| 58850 | 236  | 
      ($$$ "(" @@@ $$$ "*" @@@ scan_cmts @@@ $$$ "*" @@@ $$$ ")");
 | 
| 27763 | 237  | 
|
| 55105 | 238  | 
fun scan_comment_body err_prefix =  | 
| 55106 | 239  | 
  Scan.ahead ($$ "(" -- $$ "*") |--
 | 
240  | 
!!! (fn () => err_prefix ^ "unclosed comment")  | 
|
| 58850 | 241  | 
      ($$ "(" |-- $$ "*" |-- scan_cmts --| $$ "*" --| $$ ")");
 | 
| 27763 | 242  | 
|
| 
48743
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
243  | 
val recover_comment =  | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
244  | 
  $$$ "(" @@@ $$$ "*" @@@ scan_cmts;
 | 
| 
 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
245  | 
|
| 27763 | 246  | 
end;  | 
247  | 
||
248  | 
||
249  | 
(* source *)  | 
|
250  | 
||
251  | 
fun source pos =  | 
|
252  | 
Source.source' pos Symbol.stopper (Scan.bulk (Scan.depend (fn pos =>  | 
|
| 58864 | 253  | 
Scan.one Symbol.not_eof >> (fn s => (Position.advance s pos, (s, pos))))));  | 
| 27763 | 254  | 
|
255  | 
||
256  | 
(* compact representation -- with Symbol.DEL padding *)  | 
|
257  | 
||
| 
27778
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
258  | 
type text = string;  | 
| 
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
259  | 
|
| 27763 | 260  | 
fun pad [] = []  | 
261  | 
| pad [(s, _)] = [s]  | 
|
| 32784 | 262  | 
| pad ((s1, pos1) :: (rest as (_, pos2) :: _)) =  | 
| 27763 | 263  | 
let  | 
264  | 
val end_pos1 = Position.advance s1 pos1;  | 
|
| 27797 | 265  | 
val d = Int.max (0, Position.distance_of end_pos1 pos2);  | 
| 27763 | 266  | 
in s1 :: replicate d Symbol.DEL @ pad rest end;  | 
267  | 
||
| 27797 | 268  | 
val implode = implode o pad;  | 
| 27763 | 269  | 
|
| 59112 | 270  | 
fun implode_range (pos1, pos2) syms =  | 
| 27797 | 271  | 
  let val syms' = (("", pos1) :: syms @ [("", pos2)])
 | 
272  | 
in (implode syms', range syms') end;  | 
|
| 
27778
 
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
 
wenzelm 
parents: 
27763 
diff
changeset
 | 
273  | 
|
| 27763 | 274  | 
fun explode (str, pos) =  | 
| 41416 | 275  | 
let  | 
276  | 
val (res, _) =  | 
|
277  | 
fold (fn s => fn (res, p) => ((s, p) :: res, Position.advance s p))  | 
|
| 62800 | 278  | 
(Symbol.explode str) ([], Position.no_range_position pos);  | 
| 41416 | 279  | 
in fold (fn (s, p) => if s = Symbol.DEL then I else cons (s, p)) res [] end;  | 
| 27763 | 280  | 
|
| 62751 | 281  | 
fun explode0 str = explode (str, Position.none);  | 
282  | 
||
| 50239 | 283  | 
|
284  | 
(* identifiers *)  | 
|
285  | 
||
| 
50242
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
286  | 
local  | 
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
287  | 
|
| 
52616
 
3ac2878764f9
more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
 
wenzelm 
parents: 
50493 
diff
changeset
 | 
288  | 
val letter = Scan.one (symbol #> Symbol.is_letter);  | 
| 
 
3ac2878764f9
more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
 
wenzelm 
parents: 
50493 
diff
changeset
 | 
289  | 
val letdigs1 = Scan.many1 (symbol #> Symbol.is_letdig);  | 
| 
53016
 
fa9c38891cf2
disable old identifier syntax by default, legacy_isub_isup := true may be used temporarily as fall-back;
 
wenzelm 
parents: 
52920 
diff
changeset
 | 
290  | 
|
| 
62529
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62239 
diff
changeset
 | 
291  | 
val sub = Scan.one (symbol #> (fn s => s = "\<^sub>"));  | 
| 
50242
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
292  | 
|
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
293  | 
in  | 
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
294  | 
|
| 61476 | 295  | 
val scan_ident = letter ::: Scan.repeats (letdigs1 || sub ::: letdigs1);  | 
| 
50242
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
296  | 
|
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
297  | 
end;  | 
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
298  | 
|
| 50239 | 299  | 
fun is_identifier s =  | 
| 
50295
 
3d6a4135a54f
eliminated redundant is_ident -- more official is_identifier;
 
wenzelm 
parents: 
50253 
diff
changeset
 | 
300  | 
Symbol.is_ascii_identifier s orelse  | 
| 62751 | 301  | 
(case try (Scan.finite stopper scan_ident) (explode0 s) of  | 
| 
50295
 
3d6a4135a54f
eliminated redundant is_ident -- more official is_identifier;
 
wenzelm 
parents: 
50253 
diff
changeset
 | 
302  | 
SOME (_, []) => true  | 
| 
 
3d6a4135a54f
eliminated redundant is_ident -- more official is_identifier;
 
wenzelm 
parents: 
50253 
diff
changeset
 | 
303  | 
| _ => false);  | 
| 50239 | 304  | 
|
| 62782 | 305  | 
|
306  | 
(* numerals *)  | 
|
307  | 
||
308  | 
val scan_nat = Scan.many1 (Symbol.is_digit o symbol);  | 
|
309  | 
val scan_float = scan_nat @@@ $$$ "." @@@ scan_nat;  | 
|
310  | 
||
| 27763 | 311  | 
end;  | 
312  | 
||
| 36957 | 313  | 
structure Basic_Symbol_Pos = (*not open by default*)  | 
314  | 
struct  | 
|
| 55103 | 315  | 
val $$ = Symbol_Pos.$$;  | 
| 55107 | 316  | 
val ~$$ = Symbol_Pos.~$$;  | 
| 36957 | 317  | 
val $$$ = Symbol_Pos.$$$;  | 
318  | 
val ~$$$ = Symbol_Pos.~$$$;  | 
|
319  | 
end;  |