| author | wenzelm | 
| Fri, 18 Oct 2024 14:20:09 +0200 | |
| changeset 81182 | fc5066122e68 | 
| parent 77847 | 3bb6468d202e | 
| permissions | -rw-r--r-- | 
| 63936 | 1 | (* Title: Pure/General/symbol_explode.ML | 
| 63932 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 2 | Author: Makarius | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 3 | |
| 63936 | 4 | String explode operation for Isabelle symbols (see also symbol.ML). | 
| 63932 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 5 | *) | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 6 | |
| 63936 | 7 | structure Symbol: sig val explode: string -> string list end = | 
| 63932 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 8 | struct | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 9 | |
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 10 | fun is_ascii_letter c = #"A" <= c andalso c <= #"Z" orelse #"a" <= c andalso c <= #"z"; | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 11 | fun is_ascii_letdig c = | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 12 | is_ascii_letter c orelse #"0" <= c andalso c <= #"9" orelse c = #"_" orelse c = #"'"; | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 13 | |
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 14 | fun is_utf8 c = c >= #"\128"; | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 15 | fun is_utf8_trailer c = #"\128" <= c andalso c < #"\192"; | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 16 | fun is_utf8_control c = #"\128" <= c andalso c < #"\160"; | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 17 | |
| 63936 | 18 | fun explode string = | 
| 63932 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 19 | let | 
| 77847 
3bb6468d202e
more operations, following Isabelle/ML conventions;
 wenzelm parents: 
74170diff
changeset | 20 | val char = String.nth string; | 
| 74170 | 21 | fun substring i j = String.substring (string, i, j - i); | 
| 63932 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 22 | |
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 23 | val n = size string; | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 24 | fun test pred i = i < n andalso pred (char i); | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 25 | fun many pred i = if test pred i then many pred (i + 1) else i; | 
| 74170 | 26 | fun maybe_char c i = if test (fn c' => c = c') i then i + 1 else i; | 
| 63932 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 27 | fun maybe_ascii_id i = if test is_ascii_letter i then many is_ascii_letdig (i + 1) else i; | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 28 | |
| 63936 | 29 | fun scan i = | 
| 63932 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 30 | if i < n then | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 31 | let val ch = char i in | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 32 | (*encoded newline*) | 
| 63936 | 33 | if ch = #"\^M" then "\n" :: scan (maybe_char #"\n" (i + 1)) | 
| 63932 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 34 | (*pseudo utf8: encoded ascii control*) | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 35 | else if ch = #"\192" andalso | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 36 | test is_utf8_control (i + 1) andalso not (test is_utf8 (i + 2)) | 
| 63936 | 37 | then chr (Char.ord (char (i + 1)) - 128) :: scan (i + 2) | 
| 63932 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 38 | (*utf8*) | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 39 | else if is_utf8 ch then | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 40 | let val j = many is_utf8_trailer (i + 1) | 
| 74170 | 41 | in substring i j :: scan j end | 
| 63932 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 42 | (*named symbol*) | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 43 | else if ch = #"\\" andalso test (fn c => c = #"<") (i + 1) then | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 44 | let val j = (i + 2) |> maybe_char #"^" |> maybe_ascii_id |> maybe_char #">" | 
| 74170 | 45 | in substring i j :: scan j end | 
| 63932 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 46 | (*single character*) | 
| 63936 | 47 | else String.str ch :: scan (i + 1) | 
| 63932 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 48 | end | 
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 49 | else []; | 
| 63936 | 50 | in scan 0 end; | 
| 63932 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 51 | |
| 
6040db6b929d
more tight implementation of symbol explode operation (without support for raw symbols);
 wenzelm parents: diff
changeset | 52 | end; |