--- a/src/Pure/Syntax/lexicon.ML Mon Mar 09 16:12:19 1998 +0100
+++ b/src/Pure/Syntax/lexicon.ML Mon Mar 09 16:12:39 1998 +0100
@@ -2,62 +2,31 @@
ID: $Id$
Author: Tobias Nipkow and Markus Wenzel, TU Muenchen
-Scanner combinators and the main lexer (for terms and types).
+Lexer for the inner Isabelle syntax (terms and types).
*)
-infix 5 -- ^^;
-infix 3 >>;
-infix 0 ||;
-
-signature SCANNER =
-sig
- exception LEXICAL_ERROR
- val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
- val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
- val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
- val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
- val $$ : ''a -> ''a list -> ''a * ''a list
- val scan_empty: 'a list -> 'b list * 'a list
- val scan_one: ('a -> bool) -> 'a list -> 'a * 'a list
- val scan_any: ('a -> bool) -> 'a list -> 'a list * 'a list
- val scan_any1: ('a -> bool) -> 'a list -> 'a list * 'a list
- val scan_end: 'a list -> 'b list * 'a list
- val optional: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
- val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
- val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
- val max_of: ('a -> ('b * string) * 'a) -> ('a -> ('b * string) * 'a)
- -> 'a -> ('b * string) option * 'a
- val scan_id: string list -> string * string list
- val scan_longid: string list -> string * string list
- val scan_tid: string list -> string * string list
- val scan_nat: string list -> string * string list
- type lexicon
- val dest_lexicon: lexicon -> string list
- val empty_lexicon: lexicon
- val extend_lexicon: lexicon -> string list -> lexicon
- val make_lexicon: string list -> lexicon
- val merge_lexicons: lexicon -> lexicon -> lexicon
- val scan_literal: lexicon -> string list -> string * string list
-end;
-
signature LEXICON0 =
sig
val is_identifier: string -> bool
val implode_xstr: string list -> string
val explode_xstr: string -> string list
+ val scan_id: string list -> string * string list
+ val scan_longid: string list -> string * string list
+ val scan_var: string list -> string * string list
+ val scan_tid: string list -> string * string list
+ val scan_nat: string list -> string * string list
+ val scan_int: string list -> string * string list
val string_of_vname: indexname -> string
val string_of_vname': indexname -> string
- val scan_varname: string list -> indexname * string list
- val scan_var: string -> term
+ val indexname: string list -> indexname
+ val read_var: string -> term
val const: string -> term
val free: string -> term
val var: indexname -> term
- val read_var: string -> string * int
end;
signature LEXICON =
sig
- include SCANNER
include LEXICON0
val is_xid: string -> bool
val is_tid: string -> bool
@@ -84,7 +53,7 @@
val token_assoc: (token option * 'a list) list * token -> 'a list
val valued_token: token -> bool
val predef_term: string -> token option
- val tokenize: lexicon -> bool -> string list -> token list
+ val tokenize: Scan.lexicon -> bool -> string list -> token list
end;
structure Lexicon : LEXICON =
@@ -94,36 +63,53 @@
(** is_identifier etc. **)
fun is_ident [] = false
- | is_ident (c :: cs) = is_letter c andalso forall is_letdig cs;
+ | is_ident (c :: cs) = Symbol.is_letter c andalso forall Symbol.is_letdig cs;
-val is_identifier = is_ident o explode;
+val is_identifier = is_ident o Symbol.explode;
fun is_xid s =
- (case explode s of
+ (case Symbol.explode s of
"_" :: cs => is_ident cs
| cs => is_ident cs);
fun is_tid s =
- (case explode s of
+ (case Symbol.explode s of
"'" :: cs => is_ident cs
| _ => false);
+(** basic scanners **)
+
+val scan_letter_letdigs = Scan.one Symbol.is_letter -- Scan.any Symbol.is_letdig >> op ::;
+val scan_digits1 = Scan.any1 Symbol.is_digit;
+
+val scan_id = scan_letter_letdigs >> implode;
+val scan_longid = scan_id ^^ (Scan.repeat1 ($$ "." ^^ scan_id) >> implode);
+val scan_tid = $$ "'" ^^ scan_id;
+
+val scan_nat = scan_digits1 >> implode;
+val scan_int = $$ "~" ^^ scan_nat || scan_nat;
+
+val scan_id_nat = scan_id ^^ Scan.optional ($$ "." ^^ scan_nat) "";
+val scan_var = $$ "?" ^^ scan_id_nat;
+
+
+
(** string_of_vname **)
fun string_of_vname (x, i) =
let
val si = string_of_int i;
+ val dot = Symbol.is_digit (last_elem (Symbol.explode x)) handle _ => true;
in
- (if is_digit (last_elem (explode x)) then "?" ^ x ^ "." ^ si
- else if i = 0 then "?" ^ x
- else "?" ^ x ^ si)
- handle LIST _ => "?NULL_VARIABLE." ^ si
+ if dot then "?" ^ x ^ "." ^ si
+ else if i = 0 then "?" ^ x
+ else "?" ^ x ^ si
end;
-fun string_of_vname' (xi as (x, i)) =
- if i < 0 then x else string_of_vname xi;
+fun string_of_vname' (x, ~1) = x
+ | string_of_vname' xi = string_of_vname xi;
@@ -231,170 +217,23 @@
| predef_term _ = None;
-(* xstr tokens *)
-
-fun implode_xstr cs = enclose "''" "''" (implode cs);
-
-fun explode_xstr str =
- rev (tl (tl (rev (tl (tl (explode str))))));
-
-
-
-(** datatype lexicon **)
-
-datatype lexicon =
- Empty |
- Branch of string * string * lexicon * lexicon * lexicon;
-
-val no_token = "";
-
-
-(* dest_lexicon *)
-
-fun dest_lexicon Empty = []
- | dest_lexicon (Branch (_, "", lt, eq, gt)) =
- dest_lexicon eq @ dest_lexicon lt @ dest_lexicon gt
- | dest_lexicon (Branch (_, str, lt, eq, gt)) =
- str :: (dest_lexicon eq @ dest_lexicon lt @ dest_lexicon gt);
-
-
-(* empty, extend, make, merge lexicons *)
-
-val empty_lexicon = Empty;
+(* xstr tokens *)
-fun extend_lexicon lexicon strs =
- let
- fun ext (lex, s) =
- let
- fun add (Branch (d, a, lt, eq, gt)) (chs as c :: cs) =
- if c < d then Branch (d, a, add lt chs, eq, gt)
- else if c > d then Branch (d, a, lt, eq, add gt chs)
- else Branch (d, if null cs then s else a, lt, add eq cs, gt)
- | add Empty [c] =
- Branch (c, s, Empty, Empty, Empty)
- | add Empty (c :: cs) =
- Branch (c, no_token, Empty, add Empty cs, Empty)
- | add lex [] = lex;
+val scan_chr =
+ $$ "\\" |-- Scan.one Symbol.not_eof ||
+ Scan.one (not_equal "'" andf Symbol.not_eof) ||
+ $$ "'" --| Scan.ahead (Scan.one (not_equal "'"));
- val cs = explode s;
- in
- if exists is_blank cs then
- sys_error ("extend_lexicon: blank in delimiter " ^ quote s)
- else add lex cs
- end;
- in
- foldl ext (lexicon, strs \\ dest_lexicon lexicon)
- end;
-
-val make_lexicon = extend_lexicon empty_lexicon;
-
-fun merge_lexicons lex1 lex2 =
- let
- val strs1 = dest_lexicon lex1;
- val strs2 = dest_lexicon lex2;
- in
- if strs2 subset strs1 then lex1
- else if strs1 subset strs2 then lex2
- else extend_lexicon lex1 strs2
- end;
-
-
-
-(** scanners **)
-
-exception LEXICAL_ERROR;
+val scan_str =
+ $$ "'" |-- $$ "'" |--
+ !! (fn cs => "Inner lexical error: malformed literal string at " ^ quote ("''" ^ beginning cs))
+ (Scan.repeat scan_chr --| $$ "'" --| $$ "'");
-(* scanner combinators *)
-
-fun (scan >> f) cs = apfst f (scan cs);
-
-fun (scan1 || scan2) cs = scan1 cs handle LEXICAL_ERROR => scan2 cs;
-
-fun (scan1 -- scan2) cs =
- let
- val (x, cs') = scan1 cs;
- val (y, cs'') = scan2 cs';
- in
- ((x, y), cs'')
- end;
-
-fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^;
-
-
-(* generic scanners *)
-
-fun $$ _ [] = raise LEXICAL_ERROR
- | $$ a (c :: cs) =
- if a = c then (c, cs) else raise LEXICAL_ERROR;
-
-fun scan_empty cs = ([], cs);
-
-fun scan_one _ [] = raise LEXICAL_ERROR
- | scan_one pred (c :: cs) =
- if pred c then (c, cs) else raise LEXICAL_ERROR;
-
-fun scan_any _ [] = ([], [])
- | scan_any pred (chs as c :: cs) =
- if pred c then apfst (cons c) (scan_any pred cs)
- else ([], chs);
-
-fun scan_any1 pred = scan_one pred -- scan_any pred >> op ::;
-
-fun scan_rest cs = (cs, []);
-
-fun scan_end [] = ([], [])
- | scan_end _ = raise LEXICAL_ERROR;
-
-fun optional scan = scan >> Some || scan_empty >> K None;
-
-fun repeat scan cs = (scan -- repeat scan >> op :: || scan_empty) cs;
+fun implode_xstr cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs));
-fun repeat1 scan = scan -- repeat scan >> op ::;
-
-fun max_of scan1 scan2 cs =
- (case (optional scan1 cs, optional scan2 cs) of
- (tok1, (None, _)) => tok1
- | ((None, _), tok2) => tok2
- | (tok1 as (Some (_, s1), _), tok2 as (Some (_, s2), _)) =>
- if size s1 >= size s2 then tok1 else tok2);
-
-
-(* other scanners *)
-
-val scan_letter_letdigs = scan_one is_letter -- scan_any is_letdig >> op ::;
-
-val scan_digits1 = scan_any1 is_digit;
-
-val scan_id = scan_letter_letdigs >> implode;
-
-val scan_longid = scan_id ^^ (repeat1 ($$ "." ^^ scan_id) >> implode);
-
-val scan_id_nat =
- scan_id ^^ ($$ "." ^^ (scan_digits1 >> implode) || scan_empty >> K "");
-
-val scan_tid = $$ "'" ^^ scan_id;
-
-val scan_nat = scan_digits1 >> implode;
-
-val scan_int = $$ "~" ^^ scan_nat || scan_nat;
-
-
-(* scan_literal *)
-
-fun scan_literal lex chrs =
- let
- fun scan_lit _ s_cs [] = s_cs
- | scan_lit Empty s_cs _ = s_cs
- | scan_lit (Branch (d, a, lt, eq, gt)) s_cs (chs as c :: cs) =
- if c < d then scan_lit lt s_cs chs
- else if c > d then scan_lit gt s_cs chs
- else scan_lit eq (if a = no_token then s_cs else Some (a, cs)) cs;
- in
- (case scan_lit lex None chrs of
- None => raise LEXICAL_ERROR
- | Some s_cs => s_cs)
- end;
+fun explode_xstr str =
+ #1 (Scan.error (Scan.finite Symbol.eof scan_str) (Symbol.explode str));
@@ -406,39 +245,24 @@
if xids then $$ "_" ^^ scan_id || scan_id
else scan_id;
- val scan_lit = scan_literal lex >> pair Token;
-
val scan_val =
$$ "?" ^^ $$ "'" ^^ scan_id_nat >> pair TVarSy ||
- $$ "?" ^^ scan_id_nat >> pair VarSy ||
- $$ "'" ^^ scan_id >> pair TFreeSy ||
- $$ "#" ^^ scan_int >> pair NumSy ||
+ scan_var >> pair VarSy ||
+ scan_tid >> pair TFreeSy ||
+ $$ "#" ^^ scan_int >> pair NumSy || (* FIXME remove "#" *)
scan_longid >> pair LongIdentSy ||
scan_xid >> pair IdentSy;
- fun scan_str ("'" :: "'" :: cs) = ([], cs)
- | scan_str ("\\" :: c :: cs) = apfst (cons c) (scan_str cs)
- | scan_str (c :: cs) = apfst (cons c) (scan_str cs)
- | scan_str [] = raise ERROR;
-
+ val scan_lit = Scan.literal lex >> (pair Token o implode);
- fun scan (rev_toks, []) = rev (EndToken :: rev_toks)
- | scan (rev_toks, chs as "'" :: "'" :: cs) =
- let
- val (cs', cs'') = scan_str cs handle ERROR =>
- error ("Lexical error: missing quotes at end of string " ^
- quote (implode chs));
- in
- scan (StrSy (implode_xstr cs') :: rev_toks, cs'')
- end
- | scan (rev_toks, chs as c :: cs) =
- if is_blank c then scan (rev_toks, cs)
- else
- (case max_of scan_lit scan_val chs of
- (None, _) => error ("Lexical error at: " ^ quote (implode chs))
- | (Some (tk, s), chs') => scan (tk s :: rev_toks, chs'));
+ val scan_token =
+ Scan.max (op <= o pairself snd) scan_lit scan_val >> (fn (tk, s) => Some (tk s)) ||
+ scan_str >> (Some o StrSy o implode_xstr) ||
+ Scan.one Symbol.is_blank >> K None;
in
- scan ([], chs)
+ (case Scan.error (Scan.finite Symbol.eof (Scan.repeat scan_token)) chs of
+ (toks, []) => mapfilter I toks @ [EndToken]
+ | (_, cs) => error ("Inner lexical error at: " ^ quote (implode cs)))
end;
@@ -452,15 +276,14 @@
fun nat_of_chs n [] = n
| nat_of_chs n (c :: cs) = nat_of_chs (n * 10 + (ord c - ord "0")) cs;
- val nat_of = nat_of_chs 0;
+ val nat = nat_of_chs 0;
fun split_vname chs =
- let val (cs, ds) = take_suffix is_digit chs
- in (implode cs, nat_of ds) end
+ let val (cs, ds) = take_suffix Symbol.is_digit chs
+ in (implode cs, nat ds) end
val scan =
- scan_letter_letdigs --
- ($$ "." -- scan_digits1 >> (nat_of o #2) || scan_empty >> K ~1);
+ scan_letter_letdigs -- Scan.optional ($$ "." |-- scan_digits1 >> nat) ~1;
in
(case scan chrs of
((cs, ~1), cs') => (split_vname cs, cs')
@@ -468,38 +291,30 @@
end;
-(* scan_varname *)
+(* indexname *)
-fun scan_varname chs =
- scan_vname chs handle LEXICAL_ERROR
- => error ("scan_varname: bad varname " ^ quote (implode chs));
+fun indexname cs =
+ (case Scan.error (Scan.finite Symbol.eof (Scan.option scan_vname)) cs of
+ (Some xi, []) => xi
+ | _ => error ("Lexical error in variable name: " ^ quote (implode cs)));
-(* scan_var *)
+(* read_var *)
fun const c = Const (c, dummyT);
fun free x = Free (x, dummyT);
fun var xi = Var (xi, dummyT);
-fun scan_var str =
+fun read_var str =
let
fun tvar (x, i) = var ("'" ^ x, i);
val scan =
- $$ "?" -- $$ "'" -- scan_vname -- scan_end >> (tvar o #2 o #1) ||
- $$ "?" -- scan_vname -- scan_end >> (var o #2 o #1) ||
- scan_rest >> (free o implode);
+ $$ "?" |-- $$ "'" |-- scan_vname >> tvar ||
+ $$ "?" |-- scan_vname >> var ||
+ Scan.any Symbol.not_eof >> (free o implode);
in
- #1 (scan (explode str))
- end;
-
-
-(* read_var *)
-
-fun read_var str =
- let val scan = $$ "?" -- scan_vname -- scan_end >> (#2 o #1) in
- #1 (scan (explode str)) handle LEXICAL_ERROR
- => error ("Bad variable " ^ quote str)
+ #1 (Scan.error (Scan.finite Symbol.eof scan) (Symbol.explode str))
end;
--- a/src/Pure/Syntax/syntax.ML Mon Mar 09 16:12:19 1998 +0100
+++ b/src/Pure/Syntax/syntax.ML Mon Mar 09 16:12:39 1998 +0100
@@ -104,7 +104,7 @@
let
fun eq_tr ((c1, (_, s1)), (c2, (_, s2))) = c1 = c2 andalso s1 = s2;
- fun name (s, _) = implode (tl (explode s));
+ fun name (s, _) = implode (tl (Symbol.explode s));
fun merge mode =
let
@@ -162,7 +162,7 @@
datatype syntax =
Syntax of {
- lexicon: lexicon,
+ lexicon: Scan.lexicon,
logtypes: string list,
gram: gram,
consts: string list,
@@ -181,7 +181,7 @@
val empty_syntax =
Syntax {
- lexicon = empty_lexicon,
+ lexicon = Scan.empty_lexicon,
logtypes = [],
gram = empty_gram,
consts = [],
@@ -208,7 +208,7 @@
print_ast_translation, token_translation} = syn_ext;
in
Syntax {
- lexicon = if inout then extend_lexicon lexicon (delims_of xprods) else lexicon,
+ lexicon = if inout then Scan.extend_lexicon lexicon (delims_of xprods) else lexicon,
logtypes = extend_list logtypes1 logtypes2,
gram = if inout then extend_gram gram xprods else gram,
consts = consts2 union consts1,
@@ -243,7 +243,7 @@
tokentrtab = tokentrtab2, prtabs = prtabs2} = tabs2;
in
Syntax {
- lexicon = merge_lexicons lexicon1 lexicon2,
+ lexicon = Scan.merge_lexicons lexicon1 lexicon2,
logtypes = merge_lists logtypes1 logtypes2,
gram = merge_grams gram1 gram2,
consts = merge_lists consts1 consts2,
@@ -282,7 +282,7 @@
val {lexicon, logtypes, prmodes, gram, prtabs, ...} = tabs;
val prmodes' = sort_strings (filter_out (equal "") prmodes);
in
- Pretty.writeln (pretty_strs_qs "lexicon:" (dest_lexicon lexicon));
+ Pretty.writeln (pretty_strs_qs "lexicon:" (map implode (Scan.dest_lexicon lexicon)));
Pretty.writeln (Pretty.strs ("logtypes:" :: logtypes));
Pretty.writeln (Pretty.big_list "prods:" (pretty_gram gram));
Pretty.writeln (pretty_strs_qs "print modes:" prmodes')
@@ -299,8 +299,10 @@
fun pretty_ruletab name tab =
Pretty.big_list name (map pretty_rule (dest_ruletab tab));
+ fun pretty_tokentr (mode, trs) = Pretty.strs (quote mode ^ ":" :: map fst trs);
+
val {consts, parse_ast_trtab, parse_ruletab, parse_trtab, print_trtab,
- print_ruletab, print_ast_trtab, ...} = tabs;
+ print_ruletab, print_ast_trtab, tokentrtab, ...} = tabs;
in
Pretty.writeln (pretty_strs_qs "consts:" consts);
Pretty.writeln (pretty_trtab "parse_ast_translation:" parse_ast_trtab);
@@ -308,7 +310,8 @@
Pretty.writeln (pretty_trtab "parse_translation:" parse_trtab);
Pretty.writeln (pretty_trtab "print_translation:" print_trtab);
Pretty.writeln (pretty_ruletab "print_rules:" print_ruletab);
- Pretty.writeln (pretty_trtab "print_ast_translation:" print_ast_trtab)
+ Pretty.writeln (pretty_trtab "print_ast_translation:" print_ast_trtab);
+ Pretty.writeln (Pretty.big_list "token_translation:" (map pretty_tokentr tokentrtab))
end;
@@ -326,7 +329,7 @@
let
val {lexicon, gram, parse_ast_trtab, parse_ruletab, ...} = tabs;
- val chars = SymbolFont.read_charnames (explode str);
+ val chars = Symbol.explode str;
val toks = tokenize lexicon false chars;
val _ = writeln ("tokens: " ^ space_implode " " (map display_token toks));
@@ -350,7 +353,7 @@
let
val {lexicon, gram, parse_ast_trtab, logtypes, ...} = tabs;
val root' = if root mem logtypes then logic else root;
- val chars = SymbolFont.read_charnames (explode str);
+ val chars = Symbol.explode str;
val pts = parse gram root' (tokenize lexicon xids chars);
fun show_pt pt =