src/Pure/General/symbol.ML
changeset 6116 8ba2f25610f7
child 6118 caa439435666
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/Pure/General/symbol.ML	Wed Jan 13 12:44:33 1999 +0100
     1.3 @@ -0,0 +1,274 @@
     1.4 +(*  Title:      Pure/Syntax/symbol.ML
     1.5 +    ID:         $Id$
     1.6 +    Author:     Markus Wenzel, TU Muenchen
     1.7 +
     1.8 +Generalized characters.
     1.9 +*)
    1.10 +
    1.11 +signature SYMBOL =
    1.12 +sig
    1.13 +  type symbol
    1.14 +  val space: symbol
    1.15 +  val eof: symbol
    1.16 +  val is_eof: symbol -> bool
    1.17 +  val not_eof: symbol -> bool
    1.18 +  val stopper: symbol * (symbol -> bool)
    1.19 +  val is_ascii: symbol -> bool
    1.20 +  val is_letter: symbol -> bool
    1.21 +  val is_digit: symbol -> bool
    1.22 +  val is_quasi_letter: symbol -> bool
    1.23 +  val is_letdig: symbol -> bool
    1.24 +  val is_blank: symbol -> bool
    1.25 +  val is_printable: symbol -> bool
    1.26 +  val beginning: symbol list -> string
    1.27 +  val scan: string list -> symbol * string list
    1.28 +  val explode: string -> symbol list
    1.29 +  val length: symbol list -> int
    1.30 +  val size: string -> int
    1.31 +  val input: string -> string
    1.32 +  val output: string -> string
    1.33 +  val source: bool -> (string, 'a) Source.source ->
    1.34 +    (symbol, (string, 'a) Source.source) Source.source
    1.35 +end;
    1.36 +
    1.37 +structure Symbol: SYMBOL =
    1.38 +struct
    1.39 +
    1.40 +
    1.41 +(** encoding table (isabelle-0) **)
    1.42 +
    1.43 +val enc_start = 160;
    1.44 +val enc_end = 255;
    1.45 +
    1.46 +val enc_vector =
    1.47 +[
    1.48 +(* GENERATED TEXT FOLLOWS - Do not edit! *)
    1.49 +  "\\<space2>",
    1.50 +  "\\<Gamma>",
    1.51 +  "\\<Delta>",
    1.52 +  "\\<Theta>",
    1.53 +  "\\<Lambda>",
    1.54 +  "\\<Pi>",
    1.55 +  "\\<Sigma>",
    1.56 +  "\\<Phi>",
    1.57 +  "\\<Psi>",
    1.58 +  "\\<Omega>",
    1.59 +  "\\<alpha>",
    1.60 +  "\\<beta>",
    1.61 +  "\\<gamma>",
    1.62 +  "\\<delta>",
    1.63 +  "\\<epsilon>",
    1.64 +  "\\<zeta>",
    1.65 +  "\\<eta>",
    1.66 +  "\\<theta>",
    1.67 +  "\\<kappa>",
    1.68 +  "\\<lambda>",
    1.69 +  "\\<mu>",
    1.70 +  "\\<nu>",
    1.71 +  "\\<xi>",
    1.72 +  "\\<pi>",
    1.73 +  "\\<rho>",
    1.74 +  "\\<sigma>",
    1.75 +  "\\<tau>",
    1.76 +  "\\<phi>",
    1.77 +  "\\<chi>",
    1.78 +  "\\<psi>",
    1.79 +  "\\<omega>",
    1.80 +  "\\<not>",
    1.81 +  "\\<and>",
    1.82 +  "\\<or>",
    1.83 +  "\\<forall>",
    1.84 +  "\\<exists>",
    1.85 +  "\\<And>",
    1.86 +  "\\<lceil>",
    1.87 +  "\\<rceil>",
    1.88 +  "\\<lfloor>",
    1.89 +  "\\<rfloor>",
    1.90 +  "\\<turnstile>",
    1.91 +  "\\<Turnstile>",
    1.92 +  "\\<lbrakk>",
    1.93 +  "\\<rbrakk>",
    1.94 +  "\\<cdot>",
    1.95 +  "\\<in>",
    1.96 +  "\\<subseteq>",
    1.97 +  "\\<inter>",
    1.98 +  "\\<union>",
    1.99 +  "\\<Inter>",
   1.100 +  "\\<Union>",
   1.101 +  "\\<sqinter>",
   1.102 +  "\\<squnion>",
   1.103 +  "\\<Sqinter>",
   1.104 +  "\\<Squnion>",
   1.105 +  "\\<bottom>",
   1.106 +  "\\<doteq>",
   1.107 +  "\\<equiv>",
   1.108 +  "\\<noteq>",
   1.109 +  "\\<sqsubset>",
   1.110 +  "\\<sqsubseteq>",
   1.111 +  "\\<prec>",
   1.112 +  "\\<preceq>",
   1.113 +  "\\<succ>",
   1.114 +  "\\<approx>",
   1.115 +  "\\<sim>",
   1.116 +  "\\<simeq>",
   1.117 +  "\\<le>",
   1.118 +  "\\<Colon>",
   1.119 +  "\\<leftarrow>",
   1.120 +  "\\<midarrow>",
   1.121 +  "\\<rightarrow>",
   1.122 +  "\\<Leftarrow>",
   1.123 +  "\\<Midarrow>",
   1.124 +  "\\<Rightarrow>",
   1.125 +  "\\<bow>",
   1.126 +  "\\<mapsto>",
   1.127 +  "\\<leadsto>",
   1.128 +  "\\<up>",
   1.129 +  "\\<down>",
   1.130 +  "\\<notin>",
   1.131 +  "\\<times>",
   1.132 +  "\\<oplus>",
   1.133 +  "\\<ominus>",
   1.134 +  "\\<otimes>",
   1.135 +  "\\<oslash>",
   1.136 +  "\\<subset>",
   1.137 +  "\\<infinity>",
   1.138 +  "\\<box>",
   1.139 +  "\\<diamond>",
   1.140 +  "\\<circ>",
   1.141 +  "\\<bullet>",
   1.142 +  "\\<parallel>",
   1.143 +  "\\<surd>",
   1.144 +  "\\<copyright>"
   1.145 +(* END OF GENERATED TEXT *)
   1.146 +];
   1.147 +
   1.148 +val enc_rel = enc_vector ~~ map chr (enc_start upto enc_end);
   1.149 +
   1.150 +val char_tab = Symtab.make enc_rel;
   1.151 +val symbol_tab = Symtab.make (map swap enc_rel);
   1.152 +
   1.153 +fun lookup_symbol c =
   1.154 +  if ord c < enc_start then None
   1.155 +  else Symtab.lookup (symbol_tab, c);
   1.156 +
   1.157 +
   1.158 +(* encode / decode *)
   1.159 +
   1.160 +fun char s = if_none (Symtab.lookup (char_tab, s)) s;
   1.161 +fun symbol c = if_none (lookup_symbol c) c;
   1.162 +
   1.163 +fun symbol' c =
   1.164 +  (case lookup_symbol c of
   1.165 +    None => c
   1.166 +  | Some s => "\\" ^ s);
   1.167 +
   1.168 +
   1.169 +
   1.170 +(** type symbol **)
   1.171 +
   1.172 +type symbol = string;
   1.173 +
   1.174 +val space = " ";
   1.175 +val eof = "";
   1.176 +
   1.177 +
   1.178 +(* kinds *)
   1.179 +
   1.180 +fun is_eof s = s = eof;
   1.181 +fun not_eof s = s <> eof;
   1.182 +val stopper = (eof, is_eof);
   1.183 +
   1.184 +fun is_ascii s = size s = 1 andalso ord s < 128;
   1.185 +
   1.186 +fun is_letter s =
   1.187 +  size s = 1 andalso
   1.188 +   (ord "A" <= ord s andalso ord s <= ord "Z" orelse
   1.189 +    ord "a" <= ord s andalso ord s <= ord "z");
   1.190 +
   1.191 +fun is_digit s =
   1.192 +  size s = 1 andalso ord "0" <= ord s andalso ord s <= ord "9";
   1.193 +
   1.194 +fun is_quasi_letter "_" = true
   1.195 +  | is_quasi_letter "'" = true
   1.196 +  | is_quasi_letter s = is_letter s;
   1.197 +
   1.198 +val is_blank =
   1.199 +  fn " " => true | "\t" => true | "\n" => true | "\^L" => true
   1.200 +    | "\160" => true | "\\<space2>" => true
   1.201 +    | _ => false;
   1.202 +
   1.203 +val is_letdig = is_quasi_letter orf is_digit;
   1.204 +
   1.205 +fun is_printable s =
   1.206 +  size s = 1 andalso ord space <= ord s andalso ord s <= ord "~" orelse
   1.207 +  size s > 2 andalso nth_elem (2, explode s) <> "^";
   1.208 +
   1.209 +
   1.210 +(* beginning *)
   1.211 +
   1.212 +val smash_blanks = map (fn s => if is_blank s then space else s);
   1.213 +
   1.214 +fun beginning raw_ss =
   1.215 +  let
   1.216 +    val (all_ss, _) = take_suffix is_blank raw_ss;
   1.217 +    val dots = if length all_ss > 10 then " ..." else "";
   1.218 +    val (ss, _) = take_suffix is_blank (take (10, all_ss));
   1.219 +  in implode (smash_blanks ss) ^ dots end;
   1.220 +
   1.221 +
   1.222 +(* scan *)
   1.223 +
   1.224 +val scan_id = Scan.one is_letter ^^ (Scan.any is_letdig >> implode);
   1.225 +
   1.226 +val scan =
   1.227 +  ($$ "\\" --| Scan.optional ($$ "\\") "") ^^ $$ "<" ^^
   1.228 +    !! (fn (cs, _) => "Malformed symbolic character specification: \\" ^ "<" ^ beginning cs)
   1.229 +      (Scan.optional ($$ "^") "" ^^ scan_id ^^ $$ ">") ||
   1.230 +  Scan.one not_eof;
   1.231 +
   1.232 +
   1.233 +(* source *)
   1.234 +
   1.235 +val recover = Scan.any1 ((not o is_blank) andf not_eof);
   1.236 +
   1.237 +fun source do_recover src =
   1.238 +  Source.source stopper (Scan.bulk scan) (if do_recover then Some recover else None) src;
   1.239 +
   1.240 +
   1.241 +(* explode *)
   1.242 +
   1.243 +fun no_syms [] = true
   1.244 +  | no_syms ("\\" :: "<" :: _) = false
   1.245 +  | no_syms (c :: cs) = ord c < enc_start andalso no_syms cs;
   1.246 +
   1.247 +fun sym_explode str =
   1.248 +  let val chs = explode str in
   1.249 +    if no_syms chs then chs     (*tune trivial case*)
   1.250 +    else map symbol (the (Scan.read stopper (Scan.repeat scan) chs))
   1.251 +  end;
   1.252 +
   1.253 +
   1.254 +(* printable length *)
   1.255 +
   1.256 +fun sym_length ss = foldl (fn (n, c) => if is_printable c then n + 1 else n) (0, ss);
   1.257 +val sym_size = sym_length o sym_explode;
   1.258 +
   1.259 +
   1.260 +(* input / output *)
   1.261 +
   1.262 +fun input str =
   1.263 +  let val chs = explode str in
   1.264 +    if forall (fn c => ord c < enc_start) chs then str
   1.265 +    else implode (map symbol' chs)
   1.266 +  end;
   1.267 +
   1.268 +val output = implode o map char o sym_explode;
   1.269 +
   1.270 +
   1.271 +(*final declarations of this structure!*)
   1.272 +val explode = sym_explode;
   1.273 +val length = sym_length;
   1.274 +val size = sym_size;
   1.275 +
   1.276 +
   1.277 +end;