52 val is_block_ctrl: symbol -> bool |
52 val is_block_ctrl: symbol -> bool |
53 val is_quasi_letter: symbol -> bool |
53 val is_quasi_letter: symbol -> bool |
54 val is_letdig: symbol -> bool |
54 val is_letdig: symbol -> bool |
55 val is_ident: symbol list -> bool |
55 val is_ident: symbol list -> bool |
56 val beginning: int -> symbol list -> string |
56 val beginning: int -> symbol list -> string |
57 val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a |
|
58 val source: (string, 'a) Source.source -> (symbol, (string, 'a) Source.source) Source.source |
57 val source: (string, 'a) Source.source -> (symbol, (string, 'a) Source.source) Source.source |
59 val explode: string -> symbol list |
58 val explode: string -> symbol list |
|
59 val esc: symbol -> string |
|
60 val escape: string -> string |
|
61 val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a |
60 val split_words: symbol list -> string list |
62 val split_words: symbol list -> string list |
61 val explode_words: string -> string list |
63 val explode_words: string -> string list |
62 val esc: symbol -> string |
|
63 val escape: string -> string |
|
64 val strip_blanks: string -> string |
64 val strip_blanks: string -> string |
65 val bump_init: string -> string |
65 val bump_init: string -> string |
66 val bump_string: string -> string |
66 val bump_string: string -> string |
67 val length: symbol list -> int |
67 val length: symbol list -> int |
68 val xsymbolsN: string |
68 val xsymbolsN: string |
397 val is_block_ctrl = member (op =) ["\\<^bsub>", "\\<^esub>", "\\<^bsup>", "\\<^esup>"]; |
397 val is_block_ctrl = member (op =) ["\\<^bsub>", "\\<^esub>", "\\<^bsup>", "\\<^esup>"]; |
398 |
398 |
399 fun is_quasi_letter s = let val k = kind s in k = Letter orelse k = Quasi end; |
399 fun is_quasi_letter s = let val k = kind s in k = Letter orelse k = Quasi end; |
400 fun is_letdig s = let val k = kind s in k = Letter orelse k = Digit orelse k = Quasi end; |
400 fun is_letdig s = let val k = kind s in k = Letter orelse k = Digit orelse k = Quasi end; |
401 |
401 |
402 fun is_ident [] = false |
|
403 | is_ident (c :: cs) = is_letter c andalso forall is_letdig cs; |
|
404 |
|
405 |
402 |
406 |
403 |
407 (** symbol input **) |
404 (** symbol input **) |
408 |
405 |
409 (* scanning through symbols *) |
406 (* source *) |
|
407 |
|
408 local |
|
409 |
|
410 fun is_plain s = is_ascii s andalso s <> "\^M" andalso s <> "\\"; |
|
411 |
|
412 fun is_utf8_trailer s = is_char s andalso 128 <= ord s andalso ord s < 192; |
|
413 |
|
414 fun implode_pseudo_utf8 (cs as ["\192", c]) = |
|
415 if ord c < 160 then chr (ord c - 128) else implode cs |
|
416 | implode_pseudo_utf8 cs = implode cs; |
|
417 |
|
418 val scan_encoded_newline = |
|
419 $$ "\^M" -- $$ "\n" >> K "\n" || |
|
420 $$ "\^M" >> K "\n" || |
|
421 Scan.this_string "\\<^newline>" >> K "\n"; |
|
422 |
|
423 val scan_raw = |
|
424 Scan.this_string "raw:" ^^ (Scan.many raw_chr >> implode) || |
|
425 Scan.this_string "raw" ^^ (Scan.many1 is_ascii_digit >> implode); |
|
426 |
|
427 val scan_total = |
|
428 Scan.one is_plain || |
|
429 Scan.one is_utf8 ::: Scan.many is_utf8_trailer >> implode_pseudo_utf8 || |
|
430 scan_encoded_newline || |
|
431 ($$ "\\" ^^ $$ "<" ^^ |
|
432 (($$ "^" ^^ Scan.optional (scan_raw || scan_ascii_id) "" || Scan.optional scan_ascii_id "") ^^ |
|
433 Scan.optional ($$ ">") "")) || |
|
434 Scan.one not_eof; |
|
435 |
|
436 in |
|
437 |
|
438 fun source src = Source.source stopper (Scan.bulk scan_total) NONE src; |
|
439 |
|
440 end; |
|
441 |
|
442 |
|
443 (* explode *) |
|
444 |
|
445 local |
|
446 |
|
447 fun no_explode [] = true |
|
448 | no_explode ("\\" :: "<" :: _) = false |
|
449 | no_explode ("\^M" :: _) = false |
|
450 | no_explode (c :: cs) = is_ascii c andalso no_explode cs; |
|
451 |
|
452 in |
|
453 |
|
454 fun sym_explode str = |
|
455 let val chs = raw_explode str in |
|
456 if no_explode chs then chs |
|
457 else Source.exhaust (source (Source.of_list chs)) |
|
458 end; |
|
459 |
|
460 end; |
|
461 |
|
462 |
|
463 (* escape *) |
|
464 |
|
465 val esc = fn s => |
|
466 if is_char s then s |
|
467 else if is_utf8 s then translate_string (fn c => "\\" ^ string_of_int (ord c)) s |
|
468 else "\\" ^ s; |
|
469 |
|
470 val escape = implode o map esc o sym_explode; |
|
471 |
|
472 |
|
473 |
|
474 (** scanning through symbols **) |
|
475 |
|
476 (* scanner *) |
410 |
477 |
411 fun scanner msg scan syms = |
478 fun scanner msg scan syms = |
412 let |
479 let |
413 fun message (ss, NONE) = (fn () => msg ^ ": " ^ quote (beginning 10 ss)) |
480 fun message (ss, NONE) = (fn () => msg ^ ": " ^ quote (beginning 10 ss)) |
414 | message (ss, SOME msg') = (fn () => msg ^ ", " ^ msg' () ^ ": " ^ quote (beginning 10 ss)); |
481 | message (ss, SOME msg') = (fn () => msg ^ ", " ^ msg' () ^ ": " ^ quote (beginning 10 ss)); |
418 (result, []) => result |
485 (result, []) => result |
419 | (_, rest) => error (message (rest, NONE) ())) |
486 | (_, rest) => error (message (rest, NONE) ())) |
420 end; |
487 end; |
421 |
488 |
422 |
489 |
423 (* source *) |
|
424 |
|
425 local |
|
426 |
|
427 fun is_plain s = is_ascii s andalso s <> "\^M" andalso s <> "\\"; |
|
428 |
|
429 fun is_utf8_trailer s = is_char s andalso 128 <= ord s andalso ord s < 192; |
|
430 |
|
431 fun implode_pseudo_utf8 (cs as ["\192", c]) = |
|
432 if ord c < 160 then chr (ord c - 128) else implode cs |
|
433 | implode_pseudo_utf8 cs = implode cs; |
|
434 |
|
435 val scan_encoded_newline = |
|
436 $$ "\^M" -- $$ "\n" >> K "\n" || |
|
437 $$ "\^M" >> K "\n" || |
|
438 Scan.this_string "\\<^newline>" >> K "\n"; |
|
439 |
|
440 val scan_raw = |
|
441 Scan.this_string "raw:" ^^ (Scan.many raw_chr >> implode) || |
|
442 Scan.this_string "raw" ^^ (Scan.many1 is_ascii_digit >> implode); |
|
443 |
|
444 val scan_total = |
|
445 Scan.one is_plain || |
|
446 Scan.one is_utf8 ::: Scan.many is_utf8_trailer >> implode_pseudo_utf8 || |
|
447 scan_encoded_newline || |
|
448 ($$ "\\" ^^ $$ "<" ^^ |
|
449 (($$ "^" ^^ Scan.optional (scan_raw || scan_ascii_id) "" || Scan.optional scan_ascii_id "") ^^ |
|
450 Scan.optional ($$ ">") "")) || |
|
451 Scan.one not_eof; |
|
452 |
|
453 in |
|
454 |
|
455 fun source src = Source.source stopper (Scan.bulk scan_total) NONE src; |
|
456 |
|
457 end; |
|
458 |
|
459 |
|
460 (* explode *) |
|
461 |
|
462 local |
|
463 |
|
464 fun no_explode [] = true |
|
465 | no_explode ("\\" :: "<" :: _) = false |
|
466 | no_explode ("\^M" :: _) = false |
|
467 | no_explode (c :: cs) = is_ascii c andalso no_explode cs; |
|
468 |
|
469 in |
|
470 |
|
471 fun sym_explode str = |
|
472 let val chs = raw_explode str in |
|
473 if no_explode chs then chs |
|
474 else Source.exhaust (source (Source.of_list chs)) |
|
475 end; |
|
476 |
|
477 end; |
|
478 |
|
479 |
|
480 (* space-separated words *) |
490 (* space-separated words *) |
481 |
491 |
482 val scan_word = |
492 val scan_word = |
483 Scan.many1 is_ascii_blank >> K NONE || |
493 Scan.many1 is_ascii_blank >> K NONE || |
484 Scan.many1 (fn s => not (is_ascii_blank s) andalso not_eof s) >> (SOME o implode); |
494 Scan.many1 (fn s => not (is_ascii_blank s) andalso not_eof s) >> (SOME o implode); |
485 |
495 |
486 val split_words = scanner "Bad text" (Scan.repeat scan_word >> map_filter I); |
496 val split_words = scanner "Bad text" (Scan.repeat scan_word >> map_filter I); |
487 |
497 |
488 val explode_words = split_words o sym_explode; |
498 val explode_words = split_words o sym_explode; |
489 |
|
490 |
|
491 (* escape *) |
|
492 |
|
493 val esc = fn s => |
|
494 if is_char s then s |
|
495 else if is_utf8 s then translate_string (fn c => "\\" ^ string_of_int (ord c)) s |
|
496 else "\\" ^ s; |
|
497 |
|
498 val escape = implode o map esc o sym_explode; |
|
499 |
499 |
500 |
500 |
501 (* blanks *) |
501 (* blanks *) |
502 |
502 |
503 fun strip_blanks s = |
503 fun strip_blanks s = |
504 sym_explode s |
504 sym_explode s |
505 |> take_prefix is_blank |> #2 |
505 |> take_prefix is_blank |> #2 |
506 |> take_suffix is_blank |> #1 |
506 |> take_suffix is_blank |> #1 |
507 |> implode; |
507 |> implode; |
|
508 |
|
509 |
|
510 (* identifiers *) |
|
511 |
|
512 fun is_ident [] = false |
|
513 | is_ident (c :: cs) = is_letter c andalso forall is_letdig cs; |
508 |
514 |
509 |
515 |
510 (* bump string -- treat as base 26 or base 1 numbers *) |
516 (* bump string -- treat as base 26 or base 1 numbers *) |
511 |
517 |
512 fun symbolic_end (_ :: "\\<^isub>" :: _) = true |
518 fun symbolic_end (_ :: "\\<^isub>" :: _) = true |