19 val stopper: symbol * (symbol -> bool) |
19 val stopper: symbol * (symbol -> bool) |
20 val sync: symbol |
20 val sync: symbol |
21 val is_sync: symbol -> bool |
21 val is_sync: symbol -> bool |
22 val not_sync: symbol -> bool |
22 val not_sync: symbol -> bool |
23 val malformed: symbol |
23 val malformed: symbol |
|
24 val end_malformed: symbol |
24 val is_ascii: symbol -> bool |
25 val is_ascii: symbol -> bool |
25 val is_ascii_letter: symbol -> bool |
26 val is_ascii_letter: symbol -> bool |
26 val is_hex_letter: symbol -> bool |
27 val is_hex_letter: symbol -> bool |
27 val is_ascii_digit: symbol -> bool |
28 val is_ascii_digit: symbol -> bool |
28 val is_ascii_quasi: symbol -> bool |
29 val is_ascii_quasi: symbol -> bool |
46 val is_letdig: symbol -> bool |
47 val is_letdig: symbol -> bool |
47 val is_ident: symbol list -> bool |
48 val is_ident: symbol list -> bool |
48 val beginning: int -> symbol list -> string |
49 val beginning: int -> symbol list -> string |
49 val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a |
50 val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a |
50 val scan_id: string list -> string * string list |
51 val scan_id: string list -> string * string list |
51 val scan: string list -> symbol * string list |
|
52 val source: bool -> (string, 'a) Source.source -> |
52 val source: bool -> (string, 'a) Source.source -> |
53 (symbol, (string, 'a) Source.source) Source.source |
53 (symbol, (string, 'a) Source.source) Source.source |
54 val explode: string -> symbol list |
54 val explode: string -> symbol list |
55 val chars_only: string -> bool |
|
56 val escape: string -> string |
55 val escape: string -> string |
57 val strip_blanks: string -> string |
56 val strip_blanks: string -> string |
58 val bump_init: string -> string |
57 val bump_init: string -> string |
59 val bump_string: string -> string |
58 val bump_string: string -> string |
60 val length: symbol list -> int |
59 val length: symbol list -> int |
111 |
110 |
112 val sync = "\\<^sync>"; |
111 val sync = "\\<^sync>"; |
113 fun is_sync s = s = sync; |
112 fun is_sync s = s = sync; |
114 fun not_sync s = s <> sync; |
113 fun not_sync s = s <> sync; |
115 |
114 |
116 val malformed = "\\<^malformed>"; |
115 val malformed = "[["; |
|
116 val end_malformed = "]]"; |
|
117 |
|
118 fun malformed_msg s = (*Output.escape avoids looping errors*) |
|
119 "Malformed symbolic character: " ^ quote (Output.escape s); |
117 |
120 |
118 |
121 |
119 (* ascii symbols *) |
122 (* ascii symbols *) |
120 |
123 |
121 fun is_ascii s = is_char s andalso ord s < 128; |
124 fun is_ascii s = is_char s andalso ord s < 128; |
154 ord space <= ord c andalso ord c <= ord "~" andalso c <> "." andalso c <> ">" |
157 ord space <= ord c andalso ord c <= ord "~" andalso c <> "." andalso c <> ">" |
155 orelse ord c >= 128; |
158 orelse ord c >= 128; |
156 |
159 |
157 fun encode_raw str = |
160 fun encode_raw str = |
158 let |
161 let |
159 val raw1 = enclose "\\<^raw:" ">" o implode; |
162 val raw0 = enclose "\\<^raw:" ">"; |
|
163 val raw1 = raw0 o implode; |
160 val raw2 = enclose "\\<^raw" ">" o string_of_int o ord; |
164 val raw2 = enclose "\\<^raw" ">" o string_of_int o ord; |
161 |
165 |
162 fun encode cs = enc (Library.take_prefix raw_chr cs) |
166 fun encode cs = enc (Library.take_prefix raw_chr cs) |
163 and enc ([], []) = [] |
167 and enc ([], []) = [] |
164 | enc (cs, []) = [raw1 cs] |
168 | enc (cs, []) = [raw1 cs] |
165 | enc ([], d :: ds) = raw2 d :: encode ds |
169 | enc ([], d :: ds) = raw2 d :: encode ds |
166 | enc (cs, d :: ds) = raw1 cs :: raw2 d :: encode ds; |
170 | enc (cs, d :: ds) = raw1 cs :: raw2 d :: encode ds; |
167 in |
171 in |
168 if exists_string (not o raw_chr) str then implode (encode (explode str)) |
172 if exists_string (not o raw_chr) str then implode (encode (explode str)) |
169 else enclose "\\<^raw:" ">" str |
173 else raw0 str |
170 end; |
174 end; |
171 |
175 |
172 |
176 |
173 (* diagnostics *) |
177 (* diagnostics *) |
174 |
178 |
181 (drop_blanks (Library.take (n, all_cs)) |
185 (drop_blanks (Library.take (n, all_cs)) |
182 |> map (fn c => if is_ascii_blank c then space else c) |
186 |> map (fn c => if is_ascii_blank c then space else c) |
183 |> implode) ^ dots |
187 |> implode) ^ dots |
184 end; |
188 end; |
185 |
189 |
186 (*raw encoding avoids looping errors!*) |
|
187 fun malformed_symbol s = |
|
188 "Malformed symbolic character specification: " ^ quote (Output.escape s); |
|
189 |
|
190 |
190 |
191 (* decode_raw *) |
191 (* decode_raw *) |
192 |
192 |
193 fun is_raw s = |
193 fun is_raw s = |
194 String.isPrefix "\\<^raw" s andalso String.isSuffix ">" s; |
194 String.isPrefix "\\<^raw" s andalso String.isSuffix ">" s; |
195 |
195 |
196 fun decode_raw s = |
196 fun decode_raw s = |
197 if not (is_raw s) then error (malformed_symbol s) |
197 if not (is_raw s) then error (malformed_msg s) |
198 else if String.isPrefix "\\<^raw:" s then String.substring (s, 7, size s - 8) |
198 else if String.isPrefix "\\<^raw:" s then String.substring (s, 7, size s - 8) |
199 else chr (#1 (Library.read_int (explode (String.substring (s, 6, size s - 7))))); |
199 else chr (#1 (Library.read_int (explode (String.substring (s, 6, size s - 7))))); |
200 |
200 |
201 |
201 |
202 (* symbol variants *) |
202 (* symbol variants *) |
206 fun decode s = |
206 fun decode s = |
207 if is_char s then Char s |
207 if is_char s then Char s |
208 else if is_raw s then Raw (decode_raw s) |
208 else if is_raw s then Raw (decode_raw s) |
209 else if String.isPrefix "\\<^" s then Ctrl (String.substring (s, 3, size s - 4)) |
209 else if String.isPrefix "\\<^" s then Ctrl (String.substring (s, 3, size s - 4)) |
210 else if String.isPrefix "\\<" s then Sym (String.substring (s, 2, size s - 3)) |
210 else if String.isPrefix "\\<" s then Sym (String.substring (s, 2, size s - 3)) |
211 else error (malformed_symbol s); |
211 else error (malformed_msg s); |
212 |
212 |
213 |
213 |
214 (* standard symbol kinds *) |
214 (* standard symbol kinds *) |
215 |
215 |
216 datatype kind = Letter | Digit | Quasi | Blank | Other; |
216 datatype kind = Letter | Digit | Quasi | Blank | Other; |
393 (case fin_scan chs of |
393 (case fin_scan chs of |
394 (result, []) => result |
394 (result, []) => result |
395 | (_, rest) => error (message (rest, NONE))) |
395 | (_, rest) => error (message (rest, NONE))) |
396 end; |
396 end; |
397 |
397 |
398 |
|
399 (* scan *) |
|
400 |
|
401 val scan_id = Scan.one is_letter ^^ (Scan.many is_letdig >> implode); |
398 val scan_id = Scan.one is_letter ^^ (Scan.many is_letdig >> implode); |
402 |
399 |
|
400 |
|
401 (* source *) |
|
402 |
403 local |
403 local |
|
404 |
|
405 fun is_plain s = s <> "\^M" andalso s <> "\\" andalso not_eof s; |
404 |
406 |
405 val scan_encoded_newline = |
407 val scan_encoded_newline = |
406 $$ "\^M" -- $$ "\n" >> K "\n" || |
408 $$ "\^M" -- $$ "\n" >> K "\n" || |
407 $$ "\^M" >> K "\n" || |
409 $$ "\^M" >> K "\n" || |
408 $$ "\\" -- Scan.optional ($$ "\\") "" -- Scan.this_string "<^newline>" >> K "\n"; |
410 $$ "\\" -- Scan.optional ($$ "\\") "" -- Scan.this_string "<^newline>" >> K "\n"; |
409 |
411 |
410 val scan_raw = |
412 val scan_raw = |
411 Scan.this_string "raw:" ^^ (Scan.many raw_chr >> implode) || |
413 Scan.this_string "raw:" ^^ (Scan.many raw_chr >> implode) || |
412 Scan.this_string "raw" ^^ (Scan.many1 is_ascii_digit >> implode); |
414 Scan.this_string "raw" ^^ (Scan.many1 is_ascii_digit >> implode); |
413 |
415 |
414 in |
|
415 |
|
416 val scan = |
416 val scan = |
|
417 Scan.one is_plain || |
417 scan_encoded_newline || |
418 scan_encoded_newline || |
418 (($$ "\\" --| Scan.optional ($$ "\\") "") ^^ $$ "<" ^^ |
419 (($$ "\\" --| Scan.optional ($$ "\\") "") ^^ $$ "<" ^^ |
419 !! (fn (cs, _) => malformed_symbol (quote (beginning 10 ("\\" :: "<" :: cs)))) |
420 !! (fn (cs, _) => malformed_msg (beginning 10 ("\\" :: "<" :: cs))) |
420 (($$ "^" ^^ (scan_raw || scan_id) || scan_id) ^^ $$ ">")) || |
421 (($$ "^" ^^ (scan_raw || scan_id) || scan_id) ^^ $$ ">")) || |
421 Scan.one not_eof; |
422 Scan.one not_eof; |
422 |
423 |
|
424 val recover = |
|
425 Scan.many (fn s => not (is_blank s) andalso s <> "\"" andalso not_eof s) |
|
426 >> (fn ss => malformed :: ss @ [end_malformed]); |
|
427 |
|
428 in |
|
429 |
|
430 fun source do_recover src = |
|
431 Source.source stopper (Scan.bulk scan) (if do_recover then SOME (K recover) else NONE) src; |
|
432 |
423 end; |
433 end; |
424 |
434 |
425 |
435 |
426 (* source *) |
|
427 |
|
428 val recover = |
|
429 Scan.many (fn s => not (is_blank s) andalso s <> "\"" andalso not_eof s) >> K [malformed]; |
|
430 |
|
431 fun source do_recover src = |
|
432 Source.source stopper (Scan.bulk scan) (if do_recover then SOME recover else NONE) src; |
|
433 |
|
434 |
|
435 (* explode *) |
436 (* explode *) |
|
437 |
|
438 local |
436 |
439 |
437 fun no_explode [] = true |
440 fun no_explode [] = true |
438 | no_explode ("\\" :: "<" :: _) = false |
441 | no_explode ("\\" :: "<" :: _) = false |
439 | no_explode ("\^M" :: _) = false |
442 | no_explode ("\^M" :: _) = false |
440 | no_explode (_ :: cs) = no_explode cs; |
443 | no_explode (_ :: cs) = no_explode cs; |
441 |
444 |
|
445 in |
|
446 |
442 fun sym_explode str = |
447 fun sym_explode str = |
443 let val chs = explode str in |
448 let val chs = explode str in |
444 if no_explode chs then chs |
449 if no_explode chs then chs |
445 else the (Scan.read stopper (Scan.repeat scan) chs) |
450 else Source.exhaust (source false (Source.of_list chs)) |
446 end; |
451 end; |
447 |
452 |
448 val chars_only = not o exists_string (fn s => s = "\\"); |
453 end; |
449 |
454 |
450 |
455 |
451 (* escape *) |
456 (* escape *) |
452 |
457 |
453 val escape = implode o map (fn s => if is_char s then s else "\\" ^ s) o sym_explode; |
458 val escape = implode o map (fn s => if is_char s then s else "\\" ^ s) o sym_explode; |