src/Pure/General/symbol_explode.ML
author wenzelm
Thu, 22 Sep 2016 11:25:27 +0200
changeset 63936 b87784e19a77
parent 63932 6040db6b929d
child 74170 09d4175f473e
permissions -rw-r--r--
discontinued raw symbols; discontinued Symbol.source; use initial Symbol.explode;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
63936
b87784e19a77 discontinued raw symbols;
wenzelm
parents: 63932
diff changeset
     1
(*  Title:      Pure/General/symbol_explode.ML
63932
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
     3
63936
b87784e19a77 discontinued raw symbols;
wenzelm
parents: 63932
diff changeset
     4
String explode operation for Isabelle symbols (see also symbol.ML).
63932
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
     5
*)
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
     6
63936
b87784e19a77 discontinued raw symbols;
wenzelm
parents: 63932
diff changeset
     7
structure Symbol: sig val explode: string -> string list end =
63932
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
     8
struct
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
     9
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    10
fun is_ascii_letter c = #"A" <= c andalso c <= #"Z" orelse #"a" <= c andalso c <= #"z";
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    11
fun is_ascii_letdig c =
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    12
  is_ascii_letter c orelse #"0" <= c andalso c <= #"9" orelse c = #"_" orelse c = #"'";
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    13
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    14
fun is_utf8 c = c >= #"\128";
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    15
fun is_utf8_trailer c = #"\128" <= c andalso c < #"\192";
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    16
fun is_utf8_control c = #"\128" <= c andalso c < #"\160";
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    17
63936
b87784e19a77 discontinued raw symbols;
wenzelm
parents: 63932
diff changeset
    18
fun explode string =
63932
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    19
  let
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    20
    fun char i = String.sub (string, i);
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    21
    fun string_range i j = String.substring (string, i, j - i);
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    22
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    23
    val n = size string;
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    24
    fun test pred i = i < n andalso pred (char i);
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    25
    fun many pred i = if test pred i then many pred (i + 1) else i;
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    26
    fun maybe pred i = if test pred i then i + 1 else i;
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    27
    fun maybe_char c = maybe (fn c' => c = c');
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    28
    fun maybe_ascii_id i = if test is_ascii_letter i then many is_ascii_letdig (i + 1) else i;
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    29
63936
b87784e19a77 discontinued raw symbols;
wenzelm
parents: 63932
diff changeset
    30
    fun scan i =
63932
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    31
      if i < n then
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    32
        let val ch = char i in
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    33
          (*encoded newline*)
63936
b87784e19a77 discontinued raw symbols;
wenzelm
parents: 63932
diff changeset
    34
          if ch = #"\^M" then "\n" :: scan (maybe_char #"\n" (i + 1))
63932
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    35
          (*pseudo utf8: encoded ascii control*)
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    36
          else if ch = #"\192" andalso
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    37
            test is_utf8_control (i + 1) andalso not (test is_utf8 (i + 2))
63936
b87784e19a77 discontinued raw symbols;
wenzelm
parents: 63932
diff changeset
    38
          then chr (Char.ord (char (i + 1)) - 128) :: scan (i + 2)
63932
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    39
          (*utf8*)
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    40
          else if is_utf8 ch then
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    41
            let val j = many is_utf8_trailer (i + 1)
63936
b87784e19a77 discontinued raw symbols;
wenzelm
parents: 63932
diff changeset
    42
            in string_range i j :: scan j end
63932
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    43
          (*named symbol*)
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    44
          else if ch = #"\\" andalso test (fn c => c = #"<") (i + 1) then
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    45
            let val j = (i + 2) |> maybe_char #"^" |> maybe_ascii_id |> maybe_char #">"
63936
b87784e19a77 discontinued raw symbols;
wenzelm
parents: 63932
diff changeset
    46
            in string_range i j :: scan j end
63932
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    47
          (*single character*)
63936
b87784e19a77 discontinued raw symbols;
wenzelm
parents: 63932
diff changeset
    48
          else String.str ch :: scan (i + 1)
63932
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    49
        end
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    50
      else [];
63936
b87784e19a77 discontinued raw symbols;
wenzelm
parents: 63932
diff changeset
    51
  in scan 0 end;
63932
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    52
6040db6b929d more tight implementation of symbol explode operation (without support for raw symbols);
wenzelm
parents:
diff changeset
    53
end;