# HG changeset patch # User wenzelm # Date 889456359 -3600 # Node ID a50ab39756db5652b0c45d9a4ccbebb8928fd195 # Parent ffbaf431665d33c4d4d35d3f3355547bde2c60c5 adapted to symbols, scan; diff -r ffbaf431665d -r a50ab39756db src/Pure/Syntax/lexicon.ML --- a/src/Pure/Syntax/lexicon.ML Mon Mar 09 16:12:19 1998 +0100 +++ b/src/Pure/Syntax/lexicon.ML Mon Mar 09 16:12:39 1998 +0100 @@ -2,62 +2,31 @@ ID: $Id$ Author: Tobias Nipkow and Markus Wenzel, TU Muenchen -Scanner combinators and the main lexer (for terms and types). +Lexer for the inner Isabelle syntax (terms and types). *) -infix 5 -- ^^; -infix 3 >>; -infix 0 ||; - -signature SCANNER = -sig - exception LEXICAL_ERROR - val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c - val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b - val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e - val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c - val $$ : ''a -> ''a list -> ''a * ''a list - val scan_empty: 'a list -> 'b list * 'a list - val scan_one: ('a -> bool) -> 'a list -> 'a * 'a list - val scan_any: ('a -> bool) -> 'a list -> 'a list * 'a list - val scan_any1: ('a -> bool) -> 'a list -> 'a list * 'a list - val scan_end: 'a list -> 'b list * 'a list - val optional: ('a -> 'b * 'a) -> 'a -> 'b option * 'a - val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a - val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a - val max_of: ('a -> ('b * string) * 'a) -> ('a -> ('b * string) * 'a) - -> 'a -> ('b * string) option * 'a - val scan_id: string list -> string * string list - val scan_longid: string list -> string * string list - val scan_tid: string list -> string * string list - val scan_nat: string list -> string * string list - type lexicon - val dest_lexicon: lexicon -> string list - val empty_lexicon: lexicon - val extend_lexicon: lexicon -> string list -> lexicon - val make_lexicon: string list -> lexicon - val merge_lexicons: lexicon -> lexicon -> lexicon - val scan_literal: lexicon -> string list -> string * string list -end; - signature LEXICON0 = sig val is_identifier: string -> bool val implode_xstr: string list -> string val explode_xstr: string -> string list + val scan_id: string list -> string * string list + val scan_longid: string list -> string * string list + val scan_var: string list -> string * string list + val scan_tid: string list -> string * string list + val scan_nat: string list -> string * string list + val scan_int: string list -> string * string list val string_of_vname: indexname -> string val string_of_vname': indexname -> string - val scan_varname: string list -> indexname * string list - val scan_var: string -> term + val indexname: string list -> indexname + val read_var: string -> term val const: string -> term val free: string -> term val var: indexname -> term - val read_var: string -> string * int end; signature LEXICON = sig - include SCANNER include LEXICON0 val is_xid: string -> bool val is_tid: string -> bool @@ -84,7 +53,7 @@ val token_assoc: (token option * 'a list) list * token -> 'a list val valued_token: token -> bool val predef_term: string -> token option - val tokenize: lexicon -> bool -> string list -> token list + val tokenize: Scan.lexicon -> bool -> string list -> token list end; structure Lexicon : LEXICON = @@ -94,36 +63,53 @@ (** is_identifier etc. **) fun is_ident [] = false - | is_ident (c :: cs) = is_letter c andalso forall is_letdig cs; + | is_ident (c :: cs) = Symbol.is_letter c andalso forall Symbol.is_letdig cs; -val is_identifier = is_ident o explode; +val is_identifier = is_ident o Symbol.explode; fun is_xid s = - (case explode s of + (case Symbol.explode s of "_" :: cs => is_ident cs | cs => is_ident cs); fun is_tid s = - (case explode s of + (case Symbol.explode s of "'" :: cs => is_ident cs | _ => false); +(** basic scanners **) + +val scan_letter_letdigs = Scan.one Symbol.is_letter -- Scan.any Symbol.is_letdig >> op ::; +val scan_digits1 = Scan.any1 Symbol.is_digit; + +val scan_id = scan_letter_letdigs >> implode; +val scan_longid = scan_id ^^ (Scan.repeat1 ($$ "." ^^ scan_id) >> implode); +val scan_tid = $$ "'" ^^ scan_id; + +val scan_nat = scan_digits1 >> implode; +val scan_int = $$ "~" ^^ scan_nat || scan_nat; + +val scan_id_nat = scan_id ^^ Scan.optional ($$ "." ^^ scan_nat) ""; +val scan_var = $$ "?" ^^ scan_id_nat; + + + (** string_of_vname **) fun string_of_vname (x, i) = let val si = string_of_int i; + val dot = Symbol.is_digit (last_elem (Symbol.explode x)) handle _ => true; in - (if is_digit (last_elem (explode x)) then "?" ^ x ^ "." ^ si - else if i = 0 then "?" ^ x - else "?" ^ x ^ si) - handle LIST _ => "?NULL_VARIABLE." ^ si + if dot then "?" ^ x ^ "." ^ si + else if i = 0 then "?" ^ x + else "?" ^ x ^ si end; -fun string_of_vname' (xi as (x, i)) = - if i < 0 then x else string_of_vname xi; +fun string_of_vname' (x, ~1) = x + | string_of_vname' xi = string_of_vname xi; @@ -231,170 +217,23 @@ | predef_term _ = None; -(* xstr tokens *) - -fun implode_xstr cs = enclose "''" "''" (implode cs); - -fun explode_xstr str = - rev (tl (tl (rev (tl (tl (explode str)))))); - - - -(** datatype lexicon **) - -datatype lexicon = - Empty | - Branch of string * string * lexicon * lexicon * lexicon; - -val no_token = ""; - - -(* dest_lexicon *) - -fun dest_lexicon Empty = [] - | dest_lexicon (Branch (_, "", lt, eq, gt)) = - dest_lexicon eq @ dest_lexicon lt @ dest_lexicon gt - | dest_lexicon (Branch (_, str, lt, eq, gt)) = - str :: (dest_lexicon eq @ dest_lexicon lt @ dest_lexicon gt); - - -(* empty, extend, make, merge lexicons *) - -val empty_lexicon = Empty; +(* xstr tokens *) -fun extend_lexicon lexicon strs = - let - fun ext (lex, s) = - let - fun add (Branch (d, a, lt, eq, gt)) (chs as c :: cs) = - if c < d then Branch (d, a, add lt chs, eq, gt) - else if c > d then Branch (d, a, lt, eq, add gt chs) - else Branch (d, if null cs then s else a, lt, add eq cs, gt) - | add Empty [c] = - Branch (c, s, Empty, Empty, Empty) - | add Empty (c :: cs) = - Branch (c, no_token, Empty, add Empty cs, Empty) - | add lex [] = lex; +val scan_chr = + $$ "\\" |-- Scan.one Symbol.not_eof || + Scan.one (not_equal "'" andf Symbol.not_eof) || + $$ "'" --| Scan.ahead (Scan.one (not_equal "'")); - val cs = explode s; - in - if exists is_blank cs then - sys_error ("extend_lexicon: blank in delimiter " ^ quote s) - else add lex cs - end; - in - foldl ext (lexicon, strs \\ dest_lexicon lexicon) - end; - -val make_lexicon = extend_lexicon empty_lexicon; - -fun merge_lexicons lex1 lex2 = - let - val strs1 = dest_lexicon lex1; - val strs2 = dest_lexicon lex2; - in - if strs2 subset strs1 then lex1 - else if strs1 subset strs2 then lex2 - else extend_lexicon lex1 strs2 - end; - - - -(** scanners **) - -exception LEXICAL_ERROR; +val scan_str = + $$ "'" |-- $$ "'" |-- + !! (fn cs => "Inner lexical error: malformed literal string at " ^ quote ("''" ^ beginning cs)) + (Scan.repeat scan_chr --| $$ "'" --| $$ "'"); -(* scanner combinators *) - -fun (scan >> f) cs = apfst f (scan cs); - -fun (scan1 || scan2) cs = scan1 cs handle LEXICAL_ERROR => scan2 cs; - -fun (scan1 -- scan2) cs = - let - val (x, cs') = scan1 cs; - val (y, cs'') = scan2 cs'; - in - ((x, y), cs'') - end; - -fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^; - - -(* generic scanners *) - -fun $$ _ [] = raise LEXICAL_ERROR - | $$ a (c :: cs) = - if a = c then (c, cs) else raise LEXICAL_ERROR; - -fun scan_empty cs = ([], cs); - -fun scan_one _ [] = raise LEXICAL_ERROR - | scan_one pred (c :: cs) = - if pred c then (c, cs) else raise LEXICAL_ERROR; - -fun scan_any _ [] = ([], []) - | scan_any pred (chs as c :: cs) = - if pred c then apfst (cons c) (scan_any pred cs) - else ([], chs); - -fun scan_any1 pred = scan_one pred -- scan_any pred >> op ::; - -fun scan_rest cs = (cs, []); - -fun scan_end [] = ([], []) - | scan_end _ = raise LEXICAL_ERROR; - -fun optional scan = scan >> Some || scan_empty >> K None; - -fun repeat scan cs = (scan -- repeat scan >> op :: || scan_empty) cs; +fun implode_xstr cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs)); -fun repeat1 scan = scan -- repeat scan >> op ::; - -fun max_of scan1 scan2 cs = - (case (optional scan1 cs, optional scan2 cs) of - (tok1, (None, _)) => tok1 - | ((None, _), tok2) => tok2 - | (tok1 as (Some (_, s1), _), tok2 as (Some (_, s2), _)) => - if size s1 >= size s2 then tok1 else tok2); - - -(* other scanners *) - -val scan_letter_letdigs = scan_one is_letter -- scan_any is_letdig >> op ::; - -val scan_digits1 = scan_any1 is_digit; - -val scan_id = scan_letter_letdigs >> implode; - -val scan_longid = scan_id ^^ (repeat1 ($$ "." ^^ scan_id) >> implode); - -val scan_id_nat = - scan_id ^^ ($$ "." ^^ (scan_digits1 >> implode) || scan_empty >> K ""); - -val scan_tid = $$ "'" ^^ scan_id; - -val scan_nat = scan_digits1 >> implode; - -val scan_int = $$ "~" ^^ scan_nat || scan_nat; - - -(* scan_literal *) - -fun scan_literal lex chrs = - let - fun scan_lit _ s_cs [] = s_cs - | scan_lit Empty s_cs _ = s_cs - | scan_lit (Branch (d, a, lt, eq, gt)) s_cs (chs as c :: cs) = - if c < d then scan_lit lt s_cs chs - else if c > d then scan_lit gt s_cs chs - else scan_lit eq (if a = no_token then s_cs else Some (a, cs)) cs; - in - (case scan_lit lex None chrs of - None => raise LEXICAL_ERROR - | Some s_cs => s_cs) - end; +fun explode_xstr str = + #1 (Scan.error (Scan.finite Symbol.eof scan_str) (Symbol.explode str)); @@ -406,39 +245,24 @@ if xids then $$ "_" ^^ scan_id || scan_id else scan_id; - val scan_lit = scan_literal lex >> pair Token; - val scan_val = $$ "?" ^^ $$ "'" ^^ scan_id_nat >> pair TVarSy || - $$ "?" ^^ scan_id_nat >> pair VarSy || - $$ "'" ^^ scan_id >> pair TFreeSy || - $$ "#" ^^ scan_int >> pair NumSy || + scan_var >> pair VarSy || + scan_tid >> pair TFreeSy || + $$ "#" ^^ scan_int >> pair NumSy || (* FIXME remove "#" *) scan_longid >> pair LongIdentSy || scan_xid >> pair IdentSy; - fun scan_str ("'" :: "'" :: cs) = ([], cs) - | scan_str ("\\" :: c :: cs) = apfst (cons c) (scan_str cs) - | scan_str (c :: cs) = apfst (cons c) (scan_str cs) - | scan_str [] = raise ERROR; - + val scan_lit = Scan.literal lex >> (pair Token o implode); - fun scan (rev_toks, []) = rev (EndToken :: rev_toks) - | scan (rev_toks, chs as "'" :: "'" :: cs) = - let - val (cs', cs'') = scan_str cs handle ERROR => - error ("Lexical error: missing quotes at end of string " ^ - quote (implode chs)); - in - scan (StrSy (implode_xstr cs') :: rev_toks, cs'') - end - | scan (rev_toks, chs as c :: cs) = - if is_blank c then scan (rev_toks, cs) - else - (case max_of scan_lit scan_val chs of - (None, _) => error ("Lexical error at: " ^ quote (implode chs)) - | (Some (tk, s), chs') => scan (tk s :: rev_toks, chs')); + val scan_token = + Scan.max (op <= o pairself snd) scan_lit scan_val >> (fn (tk, s) => Some (tk s)) || + scan_str >> (Some o StrSy o implode_xstr) || + Scan.one Symbol.is_blank >> K None; in - scan ([], chs) + (case Scan.error (Scan.finite Symbol.eof (Scan.repeat scan_token)) chs of + (toks, []) => mapfilter I toks @ [EndToken] + | (_, cs) => error ("Inner lexical error at: " ^ quote (implode cs))) end; @@ -452,15 +276,14 @@ fun nat_of_chs n [] = n | nat_of_chs n (c :: cs) = nat_of_chs (n * 10 + (ord c - ord "0")) cs; - val nat_of = nat_of_chs 0; + val nat = nat_of_chs 0; fun split_vname chs = - let val (cs, ds) = take_suffix is_digit chs - in (implode cs, nat_of ds) end + let val (cs, ds) = take_suffix Symbol.is_digit chs + in (implode cs, nat ds) end val scan = - scan_letter_letdigs -- - ($$ "." -- scan_digits1 >> (nat_of o #2) || scan_empty >> K ~1); + scan_letter_letdigs -- Scan.optional ($$ "." |-- scan_digits1 >> nat) ~1; in (case scan chrs of ((cs, ~1), cs') => (split_vname cs, cs') @@ -468,38 +291,30 @@ end; -(* scan_varname *) +(* indexname *) -fun scan_varname chs = - scan_vname chs handle LEXICAL_ERROR - => error ("scan_varname: bad varname " ^ quote (implode chs)); +fun indexname cs = + (case Scan.error (Scan.finite Symbol.eof (Scan.option scan_vname)) cs of + (Some xi, []) => xi + | _ => error ("Lexical error in variable name: " ^ quote (implode cs))); -(* scan_var *) +(* read_var *) fun const c = Const (c, dummyT); fun free x = Free (x, dummyT); fun var xi = Var (xi, dummyT); -fun scan_var str = +fun read_var str = let fun tvar (x, i) = var ("'" ^ x, i); val scan = - $$ "?" -- $$ "'" -- scan_vname -- scan_end >> (tvar o #2 o #1) || - $$ "?" -- scan_vname -- scan_end >> (var o #2 o #1) || - scan_rest >> (free o implode); + $$ "?" |-- $$ "'" |-- scan_vname >> tvar || + $$ "?" |-- scan_vname >> var || + Scan.any Symbol.not_eof >> (free o implode); in - #1 (scan (explode str)) - end; - - -(* read_var *) - -fun read_var str = - let val scan = $$ "?" -- scan_vname -- scan_end >> (#2 o #1) in - #1 (scan (explode str)) handle LEXICAL_ERROR - => error ("Bad variable " ^ quote str) + #1 (Scan.error (Scan.finite Symbol.eof scan) (Symbol.explode str)) end; diff -r ffbaf431665d -r a50ab39756db src/Pure/Syntax/syntax.ML --- a/src/Pure/Syntax/syntax.ML Mon Mar 09 16:12:19 1998 +0100 +++ b/src/Pure/Syntax/syntax.ML Mon Mar 09 16:12:39 1998 +0100 @@ -104,7 +104,7 @@ let fun eq_tr ((c1, (_, s1)), (c2, (_, s2))) = c1 = c2 andalso s1 = s2; - fun name (s, _) = implode (tl (explode s)); + fun name (s, _) = implode (tl (Symbol.explode s)); fun merge mode = let @@ -162,7 +162,7 @@ datatype syntax = Syntax of { - lexicon: lexicon, + lexicon: Scan.lexicon, logtypes: string list, gram: gram, consts: string list, @@ -181,7 +181,7 @@ val empty_syntax = Syntax { - lexicon = empty_lexicon, + lexicon = Scan.empty_lexicon, logtypes = [], gram = empty_gram, consts = [], @@ -208,7 +208,7 @@ print_ast_translation, token_translation} = syn_ext; in Syntax { - lexicon = if inout then extend_lexicon lexicon (delims_of xprods) else lexicon, + lexicon = if inout then Scan.extend_lexicon lexicon (delims_of xprods) else lexicon, logtypes = extend_list logtypes1 logtypes2, gram = if inout then extend_gram gram xprods else gram, consts = consts2 union consts1, @@ -243,7 +243,7 @@ tokentrtab = tokentrtab2, prtabs = prtabs2} = tabs2; in Syntax { - lexicon = merge_lexicons lexicon1 lexicon2, + lexicon = Scan.merge_lexicons lexicon1 lexicon2, logtypes = merge_lists logtypes1 logtypes2, gram = merge_grams gram1 gram2, consts = merge_lists consts1 consts2, @@ -282,7 +282,7 @@ val {lexicon, logtypes, prmodes, gram, prtabs, ...} = tabs; val prmodes' = sort_strings (filter_out (equal "") prmodes); in - Pretty.writeln (pretty_strs_qs "lexicon:" (dest_lexicon lexicon)); + Pretty.writeln (pretty_strs_qs "lexicon:" (map implode (Scan.dest_lexicon lexicon))); Pretty.writeln (Pretty.strs ("logtypes:" :: logtypes)); Pretty.writeln (Pretty.big_list "prods:" (pretty_gram gram)); Pretty.writeln (pretty_strs_qs "print modes:" prmodes') @@ -299,8 +299,10 @@ fun pretty_ruletab name tab = Pretty.big_list name (map pretty_rule (dest_ruletab tab)); + fun pretty_tokentr (mode, trs) = Pretty.strs (quote mode ^ ":" :: map fst trs); + val {consts, parse_ast_trtab, parse_ruletab, parse_trtab, print_trtab, - print_ruletab, print_ast_trtab, ...} = tabs; + print_ruletab, print_ast_trtab, tokentrtab, ...} = tabs; in Pretty.writeln (pretty_strs_qs "consts:" consts); Pretty.writeln (pretty_trtab "parse_ast_translation:" parse_ast_trtab); @@ -308,7 +310,8 @@ Pretty.writeln (pretty_trtab "parse_translation:" parse_trtab); Pretty.writeln (pretty_trtab "print_translation:" print_trtab); Pretty.writeln (pretty_ruletab "print_rules:" print_ruletab); - Pretty.writeln (pretty_trtab "print_ast_translation:" print_ast_trtab) + Pretty.writeln (pretty_trtab "print_ast_translation:" print_ast_trtab); + Pretty.writeln (Pretty.big_list "token_translation:" (map pretty_tokentr tokentrtab)) end; @@ -326,7 +329,7 @@ let val {lexicon, gram, parse_ast_trtab, parse_ruletab, ...} = tabs; - val chars = SymbolFont.read_charnames (explode str); + val chars = Symbol.explode str; val toks = tokenize lexicon false chars; val _ = writeln ("tokens: " ^ space_implode " " (map display_token toks)); @@ -350,7 +353,7 @@ let val {lexicon, gram, parse_ast_trtab, logtypes, ...} = tabs; val root' = if root mem logtypes then logic else root; - val chars = SymbolFont.read_charnames (explode str); + val chars = Symbol.explode str; val pts = parse gram root' (tokenize lexicon xids chars); fun show_pt pt =