author | wenzelm |
Mon, 17 May 2010 15:02:44 +0200 | |
changeset 36957 | cdb9e83abfbe |
parent 32784 | 1a5dde5079ac |
child 40525 | 14a2e686bdac |
permissions | -rw-r--r-- |
27763 | 1 |
(* Title: Pure/General/symbol_pos.ML |
2 |
Author: Makarius |
|
3 |
||
4 |
Symbols with explicit position information. |
|
5 |
*) |
|
6 |
||
36957 | 7 |
signature SYMBOL_POS = |
27763 | 8 |
sig |
9 |
type T = Symbol.symbol * Position.T |
|
10 |
val symbol: T -> Symbol.symbol |
|
11 |
val $$$ : Symbol.symbol -> T list -> T list * T list |
|
12 |
val ~$$$ : Symbol.symbol -> T list -> T list * T list |
|
27797 | 13 |
val content: T list -> string |
27852 | 14 |
val untabify_content: T list -> string |
27763 | 15 |
val is_eof: T -> bool |
16 |
val stopper: T Scan.stopper |
|
17 |
val !!! : string -> (T list -> 'a) -> T list -> 'a |
|
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
18 |
val change_prompt: ('a -> 'b) -> 'a -> 'b |
27778
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
wenzelm
parents:
27763
diff
changeset
|
19 |
val scan_pos: T list -> Position.T * T list |
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
20 |
val scan_string: T list -> (Position.T * (T list * Position.T)) * T list |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
21 |
val scan_alt_string: T list -> (Position.T * (T list * Position.T)) * T list |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
22 |
val scan_quoted: T list -> T list * T list |
27763 | 23 |
val scan_comment: (string -> (T list -> T list * T list) -> T list -> T list * T list) -> |
24 |
T list -> T list * T list |
|
25 |
val scan_comment_body: (string -> (T list -> T list * T list) -> T list -> T list * T list) -> |
|
26 |
T list -> T list * T list |
|
27 |
val source: Position.T -> (Symbol.symbol, 'a) Source.source -> |
|
28 |
(T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source |
|
27778
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
wenzelm
parents:
27763
diff
changeset
|
29 |
type text = string |
27797 | 30 |
val implode: T list -> text |
31 |
val range: T list -> Position.range |
|
32 |
val implode_range: Position.T -> Position.T -> T list -> text * Position.range |
|
27778
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
wenzelm
parents:
27763
diff
changeset
|
33 |
val explode: text * Position.T -> T list |
27763 | 34 |
end; |
35 |
||
30573 | 36 |
structure Symbol_Pos: SYMBOL_POS = |
27763 | 37 |
struct |
38 |
||
39 |
(* type T *) |
|
40 |
||
41 |
type T = Symbol.symbol * Position.T; |
|
42 |
||
43 |
fun symbol ((s, _): T) = s; |
|
27852 | 44 |
|
45 |
||
46 |
(* content *) |
|
47 |
||
27797 | 48 |
val content = implode o map symbol; |
27763 | 49 |
|
50 |
||
27864 | 51 |
val tab_width = (8: int); |
27852 | 52 |
|
53 |
fun untabify ("\t", pos) = |
|
54 |
(case Position.column_of pos of |
|
55 |
SOME n => Symbol.spaces (tab_width - ((n - 1) mod tab_width)) |
|
27984
b4dd58cff97c
untabify: silently turn tab into space if column information is unavailable;
wenzelm
parents:
27864
diff
changeset
|
56 |
| NONE => Symbol.space) |
27852 | 57 |
| untabify (s, _) = s; |
58 |
||
59 |
val untabify_content = implode o map untabify; |
|
60 |
||
61 |
||
27763 | 62 |
(* stopper *) |
63 |
||
64 |
fun mk_eof pos = (Symbol.eof, pos); |
|
65 |
val eof = mk_eof Position.none; |
|
66 |
||
67 |
val is_eof = Symbol.is_eof o symbol; |
|
68 |
||
69 |
val stopper = |
|
70 |
Scan.stopper (fn [] => eof | inp => mk_eof (List.last inp |-> Position.advance)) is_eof; |
|
71 |
||
72 |
||
73 |
(* basic scanners *) |
|
74 |
||
75 |
fun !!! text scan = |
|
76 |
let |
|
77 |
fun get_pos [] = " (past end-of-text!)" |
|
78 |
| get_pos ((_, pos) :: _) = Position.str_of pos; |
|
79 |
||
80 |
fun err (syms, msg) = |
|
81 |
text ^ get_pos syms ^ " at " ^ Symbol.beginning 10 (map symbol syms) ^ |
|
82 |
(case msg of NONE => "" | SOME s => "\n" ^ s); |
|
83 |
in Scan.!! err scan end; |
|
84 |
||
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
85 |
fun change_prompt scan = Scan.prompt "# " scan; |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
86 |
|
27763 | 87 |
fun $$$ s = Scan.one (fn x => symbol x = s) >> single; |
88 |
fun ~$$$ s = Scan.one (fn x => symbol x <> s) >> single; |
|
89 |
||
27778
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
wenzelm
parents:
27763
diff
changeset
|
90 |
val scan_pos = Scan.ahead (Scan.one (K true)) >> (fn (_, pos): T => pos); |
27763 | 91 |
|
92 |
||
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
93 |
(* Isabelle strings *) |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
94 |
|
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
95 |
local |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
96 |
|
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
97 |
val char_code = |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
98 |
Scan.one (Symbol.is_ascii_digit o symbol) -- |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
99 |
Scan.one (Symbol.is_ascii_digit o symbol) -- |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
100 |
Scan.one (Symbol.is_ascii_digit o symbol) :|-- |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
101 |
(fn (((a, pos), (b, _)), (c, _)) => |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
102 |
let val (n, _) = Library.read_int [a, b, c] |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
103 |
in if n <= 255 then Scan.succeed [(chr n, pos)] else Scan.fail end); |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
104 |
|
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
105 |
fun scan_str q = |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
106 |
$$$ "\\" |-- !!! "bad escape character in string" ($$$ q || $$$ "\\" || char_code) || |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
107 |
Scan.one (fn (s, _) => s <> q andalso s <> "\\" andalso Symbol.is_regular s) >> single; |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
108 |
|
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
109 |
fun scan_strs q = |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
110 |
(scan_pos --| $$$ q) -- !!! "missing quote at end of string" |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
111 |
(change_prompt ((Scan.repeat (scan_str q) >> flat) -- ($$$ q |-- scan_pos))); |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
112 |
|
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
113 |
in |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
114 |
|
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
115 |
val scan_string = scan_strs "\""; |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
116 |
val scan_alt_string = scan_strs "`"; |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
117 |
|
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
118 |
val scan_quoted = Scan.trace (scan_string || scan_alt_string) >> #2; |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
119 |
|
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
120 |
end; |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
121 |
|
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
122 |
|
27763 | 123 |
(* ML-style comments *) |
124 |
||
125 |
local |
|
126 |
||
127 |
val scan_cmt = |
|
128 |
Scan.depend (fn (d: int) => $$$ "(" @@@ $$$ "*" >> pair (d + 1)) || |
|
129 |
Scan.depend (fn 0 => Scan.fail | d => $$$ "*" @@@ $$$ ")" >> pair (d - 1)) || |
|
130 |
Scan.lift ($$$ "*" --| Scan.ahead (~$$$ ")")) || |
|
131 |
Scan.lift (Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s)) >> single; |
|
132 |
||
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
133 |
val scan_body = change_prompt (Scan.pass 0 (Scan.repeat scan_cmt >> flat)); |
27763 | 134 |
|
135 |
in |
|
136 |
||
137 |
fun scan_comment cut = |
|
138 |
$$$ "(" @@@ $$$ "*" @@@ cut "missing end of comment" (scan_body @@@ $$$ "*" @@@ $$$ ")"); |
|
139 |
||
140 |
fun scan_comment_body cut = |
|
141 |
$$$ "(" |-- $$$ "*" |-- cut "missing end of comment" (scan_body --| $$$ "*" --| $$$ ")"); |
|
142 |
||
143 |
end; |
|
144 |
||
145 |
||
146 |
(* source *) |
|
147 |
||
148 |
fun source pos = |
|
149 |
Source.source' pos Symbol.stopper (Scan.bulk (Scan.depend (fn pos => |
|
150 |
Scan.one Symbol.not_eof >> (fn s => (Position.advance s pos, (s, pos)))))) NONE; |
|
151 |
||
152 |
||
153 |
(* compact representation -- with Symbol.DEL padding *) |
|
154 |
||
27778
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
wenzelm
parents:
27763
diff
changeset
|
155 |
type text = string; |
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
wenzelm
parents:
27763
diff
changeset
|
156 |
|
27763 | 157 |
fun pad [] = [] |
158 |
| pad [(s, _)] = [s] |
|
32784 | 159 |
| pad ((s1, pos1) :: (rest as (_, pos2) :: _)) = |
27763 | 160 |
let |
161 |
val end_pos1 = Position.advance s1 pos1; |
|
27797 | 162 |
val d = Int.max (0, Position.distance_of end_pos1 pos2); |
27763 | 163 |
in s1 :: replicate d Symbol.DEL @ pad rest end; |
164 |
||
27797 | 165 |
val implode = implode o pad; |
27763 | 166 |
|
27797 | 167 |
fun range (syms as (_, pos) :: _) = |
27763 | 168 |
let val pos' = List.last syms |-> Position.advance |
27797 | 169 |
in Position.range pos pos' end |
170 |
| range [] = Position.no_range; |
|
27763 | 171 |
|
27797 | 172 |
fun implode_range pos1 pos2 syms = |
173 |
let val syms' = (("", pos1) :: syms @ [("", pos2)]) |
|
174 |
in (implode syms', range syms') end; |
|
27778
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
wenzelm
parents:
27763
diff
changeset
|
175 |
|
27763 | 176 |
fun explode (str, pos) = |
27778
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
wenzelm
parents:
27763
diff
changeset
|
177 |
fold_map (fn s => fn p => ((s, p), (Position.advance s p))) |
3ec7a4d9ef18
renamed SymbolPos.scan_position to SymbolPos.scan_pos;
wenzelm
parents:
27763
diff
changeset
|
178 |
(Symbol.explode str) (Position.reset_range pos) |
27763 | 179 |
|> #1 |> filter_out (fn (s, _) => s = Symbol.DEL); |
180 |
||
181 |
end; |
|
182 |
||
36957 | 183 |
structure Basic_Symbol_Pos = (*not open by default*) |
184 |
struct |
|
185 |
val symbol = Symbol_Pos.symbol; |
|
186 |
val $$$ = Symbol_Pos.$$$; |
|
187 |
val ~$$$ = Symbol_Pos.~$$$; |
|
188 |
end; |
|
27763 | 189 |