diff -r 000000000000 -r a5a9c433f639 src/Pure/Syntax/lexicon.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Pure/Syntax/lexicon.ML Thu Sep 16 12:20:38 1993 +0200 @@ -0,0 +1,252 @@ +(* Title: Lexicon + ID: $Id$ + Author: Tobias Nipkow, TU Muenchen + Copyright 1993 TU Muenchen + +The Isabelle Lexer + +Changes: +TODO: + Lexicon -> lexicon, Token -> token + end_token -> EndToken ? +*) + +signature LEXICON0 = +sig + val is_identifier: string -> bool + val scan_varname: string list -> indexname * string list + val string_of_vname: indexname -> string +end; + +signature LEXICON = +sig + type Lexicon + datatype Token = Token of string + | IdentSy of string + | VarSy of string * int + | TFreeSy of string + | TVarSy of string * int + | end_token; + val empty: Lexicon + val extend: Lexicon * string list -> Lexicon + val matching_tokens: Token * Token -> bool + val mk_lexicon: string list -> Lexicon + val name_of_token: Token -> string + val predef_term: string -> Token + val is_terminal: string -> bool + val tokenize: Lexicon -> string -> Token list + val token_to_string: Token -> string + val valued_token: Token -> bool + type 'b TokenMap + val mkTokenMap: ('b * Token list) list -> 'b TokenMap + val applyTokenMap: 'b TokenMap * Token -> 'b list + include LEXICON0 +end; + +functor LexiconFun(Extension: EXTENSION): LEXICON = +struct + +open Extension; + + +datatype Token = Token of string + | IdentSy of string + | VarSy of string * int + | TFreeSy of string + | TVarSy of string * int + | end_token; +val no_token = ""; + +datatype dfa = Tip | Branch of string * string * dfa * dfa * dfa; + +type Lexicon = dfa; + +fun is_id(c::cs) = is_letter(c) andalso forall is_letdig cs + | is_id([]) = false; + +fun is_identifier s = is_id(explode s); + +val empty = Tip; + +fun extend1(dfa, s) = + let fun add(Branch(c, a, less, eq, gr), cs as (d::ds)) = + if c>d then Branch(c, a, add(less, cs), eq, gr) else + if cd then next_dfa(gr, c) else Some(a, eq); + +exception ID of string * string list; + +val eof_id = "End of input - identifier expected.\n"; + +(*A string of letters, digits, or ' _ *) +fun xscan_ident exn = +let fun scan [] = raise exn(eof_id, []) + | scan(c::cs) = + if is_letter c + then let val (ds, tail) = take_prefix is_letdig cs + in (implode(c::ds), tail) end + else raise exn("Identifier expected: ", c::cs) +in scan end; + +(*Scan the offset of a Var, if present; otherwise ~1 *) +fun scan_offset cs = case cs of + ("."::[]) => (~1, cs) + | ("."::(ds as c::cs')) => if is_digit c then scan_int ds else (~1, cs) + | _ => (~1, cs); + +fun split_varname s = + let val (rpost, rpref) = take_prefix is_digit (rev(explode s)) + val (i, _) = scan_int(rev rpost) + in (implode(rev rpref), i) end; + +fun xscan_varname exn cs : (string*int) * string list = +let val (a, ds) = xscan_ident exn cs; + val (i, es) = scan_offset ds +in if i = ~1 then (split_varname a, es) else ((a, i), es) end; + +fun scan_varname s = xscan_varname ID s + handle ID(err, cs) => error(err^(implode cs)); + +fun tokenize (dfa) (s:string) : Token list = +let exception LEX_ERR; + exception FAIL of string * string list; + val lexerr = "Lexical error: "; + + fun tokenize1 (_:dfa, []:string list) : Token * string list = + raise LEX_ERR + | tokenize1(dfa, c::cs) = + case next_dfa(dfa, c) of + None => raise LEX_ERR + | Some(a, dfa') => + if a=no_token then tokenize1(dfa', cs) + else (tokenize1(dfa', cs) handle LEX_ERR => + if is_identifier a andalso not(null cs) andalso + is_letdig(hd cs) + then raise LEX_ERR else (Token(a), cs)); + + fun token(cs) = tokenize1(dfa, cs) handle LEX_ERR => raise FAIL(lexerr, cs); + + fun id([]) = raise FAIL(eof_id, []) + | id(cs as c::cs') = + if is_letter(c) + then let val (id, cs'') = xscan_ident FAIL cs + in (IdentSy(id), cs'') end + else + if c = "?" + then case cs' of + "'"::xs => let val ((a, i), ys) = xscan_varname FAIL xs + in (TVarSy("'"^a, i), ys) end + | _ => let val ((a, i), ys) = xscan_varname FAIL cs' + in (VarSy(a, i), ys) end + else + if c = "'" + then let val (a, cs'') = xscan_ident FAIL cs' + in (TFreeSy("'" ^ a), cs'') end + else raise FAIL(lexerr, cs); + + fun tknize([], ts) = rev(ts) + | tknize(cs as c::cs', ts) = + if is_blank(c) then tknize(cs', ts) else + let val (t, cs'') = + if c="?" then id(cs) handle FAIL _ => token(cs) + else (token(cs) handle FAIL _ => id(cs)) + in tknize(cs'', t::ts) end + +in tknize(explode s, []) handle FAIL(s, cs) => error(s^(implode cs)) end; + +(*Variables have the form ?nameidx, or ?name.idx if "name" ends with a digit*) +fun string_of_vname (a, idx) = case rev(explode a) of + [] => "?NULL_VARIABLE_NAME" + | c::_ => "?" ^ + (if is_digit c then a ^ "." ^ string_of_int idx + else if idx = 0 then a + else a ^ string_of_int idx); + +fun token_to_string (Token(s)) = s + | token_to_string (IdentSy(s)) = s + | token_to_string (VarSy v) = string_of_vname v + | token_to_string (TFreeSy a) = a + | token_to_string (TVarSy v) = string_of_vname v + | token_to_string end_token = "\n"; + +(* MMW *) +fun name_of_token (Token s) = quote s + | name_of_token (IdentSy _) = id + | name_of_token (VarSy _) = var + | name_of_token (TFreeSy _) = tfree + | name_of_token (TVarSy _) = tvar; + +fun matching_tokens(Token(i), Token(j)) = (i=j) | + matching_tokens(IdentSy(_), IdentSy(_)) = true | + matching_tokens(VarSy _, VarSy _) = true | + matching_tokens(TFreeSy _, TFreeSy _) = true | + matching_tokens(TVarSy _, TVarSy _) = true | + matching_tokens(end_token, end_token) = true | + matching_tokens(_, _) = false; + +fun valued_token(end_token) = false + | valued_token(Token _) = false + | valued_token(IdentSy _) = true + | valued_token(VarSy _) = true + | valued_token(TFreeSy _) = true + | valued_token(TVarSy _) = true; + +(* MMW *) +fun predef_term name = + if name = id then IdentSy name + else if name = var then VarSy (name, 0) + else if name = tfree then TFreeSy name + else if name = tvar then TVarSy (name, 0) + else end_token; + +(* MMW *) +fun is_terminal s = s mem [id, var, tfree, tvar]; + + + +type 'b TokenMap = (Token * 'b list) list * 'b list; +val first_token = 0; + +fun int_of_token(Token(tk)) = first_token | + int_of_token(IdentSy _) = first_token - 1 | + int_of_token(VarSy _) = first_token - 2 | + int_of_token(TFreeSy _) = first_token - 3 | + int_of_token(TVarSy _) = first_token - 4 | + int_of_token(end_token) = first_token - 5; + +fun lesstk(s, t) = int_of_token(s) < int_of_token(t) orelse + (case (s, t) of (Token(a), Token(b)) => a false); + +fun mkTokenMap(atll) = + let val aill = atll; + val dom = sort lesstk (distinct(flat(map snd aill))); + val mt = map fst (filter (null o snd) atll); + fun mktm(i) = + let fun add(l, (a, il)) = if i mem il then a::l else l + in (i, foldl add ([], aill)) end; + in (map mktm dom, mt) end; + +fun find_al (i) = + let fun find((j, al)::l) = if lesstk(i, j) then [] else + if matching_tokens(i, j) then al else find l | + find [] = []; + in find end; +fun applyTokenMap((l, mt), tk:Token) = mt@(find_al tk l); + + +end; +