src/Pure/Syntax/lexicon.ML
changeset 0 a5a9c433f639
child 18 c9ec452ff08f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/Syntax/lexicon.ML	Thu Sep 16 12:20:38 1993 +0200
@@ -0,0 +1,252 @@
+(*  Title:      Lexicon
+    ID:         $Id$
+    Author:     Tobias Nipkow, TU Muenchen
+    Copyright   1993  TU Muenchen
+
+The Isabelle Lexer
+
+Changes:
+TODO:
+  Lexicon -> lexicon, Token -> token
+  end_token -> EndToken ?
+*)
+
+signature LEXICON0 =
+sig
+  val is_identifier: string -> bool
+  val scan_varname: string list -> indexname * string list
+  val string_of_vname: indexname -> string
+end;
+
+signature LEXICON =
+sig
+  type Lexicon
+  datatype Token = Token of string
+                 | IdentSy of string
+                 | VarSy of string * int
+                 | TFreeSy of string
+                 | TVarSy of string * int
+                 | end_token;
+  val empty: Lexicon
+  val extend: Lexicon * string list -> Lexicon
+  val matching_tokens: Token * Token -> bool
+  val mk_lexicon: string list -> Lexicon
+  val name_of_token: Token -> string
+  val predef_term: string -> Token
+  val is_terminal: string -> bool
+  val tokenize: Lexicon -> string -> Token list
+  val token_to_string: Token -> string
+  val valued_token: Token -> bool
+  type 'b TokenMap
+  val mkTokenMap: ('b * Token list) list -> 'b TokenMap
+  val applyTokenMap: 'b TokenMap * Token -> 'b list
+  include LEXICON0
+end;
+
+functor LexiconFun(Extension: EXTENSION): LEXICON =
+struct
+
+open Extension;
+
+
+datatype Token = Token of string
+               | IdentSy of string
+               | VarSy of string * int
+               | TFreeSy of string
+               | TVarSy of string * int
+               | end_token;
+val no_token = "";
+
+datatype dfa = Tip | Branch of string * string * dfa * dfa * dfa;
+
+type Lexicon = dfa;
+
+fun is_id(c::cs) = is_letter(c) andalso forall is_letdig cs
+  | is_id([])    = false;
+
+fun is_identifier s = is_id(explode s);
+
+val empty = Tip;
+
+fun extend1(dfa, s) =
+    let fun add(Branch(c, a, less, eq, gr), cs as (d::ds)) =
+              if c>d then Branch(c, a, add(less, cs), eq, gr) else
+              if c<d then Branch(c, a, less, eq, add(gr, cs)) else
+              Branch(c, if null ds then s else a, less, add(eq, ds), gr)
+          | add(Tip, c::cs) =
+              Branch(c, if null cs then s else no_token, Tip, add(Tip, cs), Tip)
+          | add(dfa, []) = dfa
+
+    in add(dfa, explode s) end;
+
+val extend = foldl extend1;
+
+fun mk_lexicon ss = extend(empty, ss);
+
+fun next_dfa (Tip, _) = None
+  | next_dfa (Branch(d, a, less, eq, gr), c) =
+      if c<d then next_dfa(less, c) else
+      if c>d then next_dfa(gr, c)   else Some(a, eq);
+
+exception ID of string * string list;
+
+val eof_id = "End of input - identifier expected.\n";
+
+(*A string of letters, digits, or ' _ *)
+fun xscan_ident exn =
+let fun scan []  =  raise exn(eof_id, [])
+      | scan(c::cs) =
+        if  is_letter c
+        then let val (ds, tail) = take_prefix is_letdig cs
+             in  (implode(c::ds), tail)  end
+        else raise exn("Identifier expected: ", c::cs)
+in scan end;
+
+(*Scan the offset of a Var, if present; otherwise ~1 *)
+fun scan_offset cs = case cs of
+    ("."::[]) => (~1, cs)
+  | ("."::(ds as c::cs')) => if is_digit c then scan_int ds else (~1, cs)
+  | _ => (~1, cs);
+
+fun split_varname s =
+    let val (rpost, rpref) = take_prefix is_digit (rev(explode s))
+        val (i, _) = scan_int(rev rpost)
+    in (implode(rev rpref), i) end;
+
+fun xscan_varname exn cs : (string*int) * string list =
+let val (a, ds) = xscan_ident exn cs;
+    val (i, es) = scan_offset ds
+in if i = ~1 then (split_varname a, es) else ((a, i), es) end;
+
+fun scan_varname s = xscan_varname ID s
+        handle ID(err, cs) => error(err^(implode cs));
+
+fun tokenize (dfa) (s:string) : Token list =
+let exception LEX_ERR;
+    exception FAIL of string * string list;
+    val lexerr = "Lexical error: ";
+
+    fun tokenize1 (_:dfa, []:string list) : Token * string list =
+          raise LEX_ERR
+      | tokenize1(dfa, c::cs) =
+          case next_dfa(dfa, c) of
+            None => raise LEX_ERR
+          | Some(a, dfa') =>
+             if a=no_token then tokenize1(dfa', cs)
+             else (tokenize1(dfa', cs) handle LEX_ERR =>
+                   if is_identifier a andalso not(null cs) andalso
+                      is_letdig(hd cs)
+                   then raise LEX_ERR else (Token(a), cs));
+
+    fun token(cs) = tokenize1(dfa, cs) handle LEX_ERR => raise FAIL(lexerr, cs);
+
+    fun id([]) = raise FAIL(eof_id, [])
+      | id(cs as c::cs') =
+        if is_letter(c)
+        then let val (id, cs'') = xscan_ident FAIL cs
+             in (IdentSy(id), cs'') end
+        else
+        if c = "?"
+        then case cs' of
+                "'"::xs => let val ((a, i), ys) = xscan_varname FAIL xs
+                           in (TVarSy("'"^a, i), ys) end
+             | _ => let val ((a, i), ys) = xscan_varname FAIL cs'
+                    in (VarSy(a, i), ys) end
+        else
+        if c = "'"
+        then let val (a, cs'') = xscan_ident FAIL cs'
+             in (TFreeSy("'" ^ a), cs'') end
+        else raise FAIL(lexerr, cs);
+
+    fun tknize([], ts) = rev(ts)
+      | tknize(cs as c::cs', ts) =
+        if is_blank(c) then tknize(cs', ts) else
+        let val (t, cs'') =
+                if c="?" then id(cs) handle FAIL _ => token(cs)
+                else (token(cs) handle FAIL _ => id(cs))
+        in tknize(cs'', t::ts) end
+
+in tknize(explode s, []) handle FAIL(s, cs) => error(s^(implode cs)) end;
+
+(*Variables have the form ?nameidx, or ?name.idx if "name" ends with a digit*)
+fun string_of_vname (a, idx) = case rev(explode a) of
+        []   => "?NULL_VARIABLE_NAME"
+      | c::_ => "?" ^
+                (if is_digit c then a ^ "." ^ string_of_int idx
+                 else if idx = 0 then a
+                 else a ^ string_of_int idx);
+
+fun token_to_string (Token(s)) = s
+  | token_to_string (IdentSy(s)) = s
+  | token_to_string (VarSy v) = string_of_vname v
+  | token_to_string (TFreeSy a) = a
+  | token_to_string (TVarSy v) = string_of_vname v
+  | token_to_string end_token = "\n";
+
+(* MMW *)
+fun name_of_token (Token s) = quote s
+  | name_of_token (IdentSy _) = id
+  | name_of_token (VarSy _) = var
+  | name_of_token (TFreeSy _) = tfree
+  | name_of_token (TVarSy _) = tvar;
+
+fun matching_tokens(Token(i), Token(j)) = (i=j) |
+    matching_tokens(IdentSy(_), IdentSy(_)) = true |
+    matching_tokens(VarSy _, VarSy _) = true |
+    matching_tokens(TFreeSy _, TFreeSy _) = true |
+    matching_tokens(TVarSy _, TVarSy _) = true |
+    matching_tokens(end_token, end_token) = true |
+    matching_tokens(_, _) = false;
+
+fun valued_token(end_token) = false
+  | valued_token(Token _) = false
+  | valued_token(IdentSy _) = true
+  | valued_token(VarSy _) = true
+  | valued_token(TFreeSy _) = true
+  | valued_token(TVarSy _) = true;
+
+(* MMW *)
+fun predef_term name =
+  if name = id then IdentSy name
+  else if name = var then VarSy (name, 0)
+  else if name = tfree then TFreeSy name
+  else if name = tvar then TVarSy (name, 0)
+  else end_token;
+
+(* MMW *)
+fun is_terminal s = s mem [id, var, tfree, tvar];
+
+
+
+type 'b TokenMap = (Token * 'b list) list * 'b list;
+val first_token = 0;
+
+fun int_of_token(Token(tk)) = first_token |
+    int_of_token(IdentSy _) = first_token - 1 |
+    int_of_token(VarSy _) = first_token - 2 |
+    int_of_token(TFreeSy _) = first_token - 3 |
+    int_of_token(TVarSy _) = first_token - 4 |
+    int_of_token(end_token) = first_token - 5;
+
+fun lesstk(s, t) = int_of_token(s) < int_of_token(t) orelse
+                  (case (s, t) of (Token(a), Token(b)) => a<b | _ => false);
+
+fun mkTokenMap(atll) =
+    let val aill = atll;
+        val dom = sort lesstk (distinct(flat(map snd aill)));
+        val mt = map fst (filter (null o snd) atll);
+        fun mktm(i) =
+            let fun add(l, (a, il)) = if i mem il then a::l else l
+            in (i, foldl add ([], aill)) end;
+    in (map mktm dom, mt) end;
+
+fun find_al (i) =
+    let fun find((j, al)::l) = if lesstk(i, j) then [] else
+                              if matching_tokens(i, j) then al else find l |
+            find [] = [];
+    in find end;
+fun applyTokenMap((l, mt), tk:Token) = mt@(find_al tk l);
+
+
+end;
+