src/Pure/Syntax/lexicon.ML
changeset 4247 9bba9251bb4d
parent 3828 f6a7ca242dc2
child 4587 6bce9ef27d7e
equal deleted inserted replaced
4246:c539e702e1d2 4247:9bba9251bb4d
     1 (*  Title:      Pure/Syntax/lexicon.ML
     1 (*  Title:      Pure/Syntax/lexicon.ML
     2     ID:         $Id$
     2     ID:         $Id$
     3     Author:     Tobias Nipkow and Markus Wenzel, TU Muenchen
     3     Author:     Tobias Nipkow and Markus Wenzel, TU Muenchen
     4 
     4 
     5 Scanner combinators and Isabelle's main lexer (used for terms and types).
     5 Scanner combinators and the main lexer (for terms and types).
     6 *)
     6 *)
     7 
     7 
     8 infix 5 -- ^^;
     8 infix 5 -- ^^;
     9 infix 3 >>;
     9 infix 3 >>;
    10 infix 0 ||;
    10 infix 0 ||;
    11 
    11 
    12 signature SCANNER =
    12 signature SCANNER =
    13   sig
    13 sig
    14   exception LEXICAL_ERROR
    14   exception LEXICAL_ERROR
    15   val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
    15   val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
    16   val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
    16   val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
    17   val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
    17   val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
    18   val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
    18   val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
    36   val empty_lexicon: lexicon
    36   val empty_lexicon: lexicon
    37   val extend_lexicon: lexicon -> string list -> lexicon
    37   val extend_lexicon: lexicon -> string list -> lexicon
    38   val make_lexicon: string list -> lexicon
    38   val make_lexicon: string list -> lexicon
    39   val merge_lexicons: lexicon -> lexicon -> lexicon
    39   val merge_lexicons: lexicon -> lexicon -> lexicon
    40   val scan_literal: lexicon -> string list -> string * string list
    40   val scan_literal: lexicon -> string list -> string * string list
    41   end;
    41 end;
    42 
    42 
    43 signature LEXICON0 =
    43 signature LEXICON0 =
    44   sig
    44 sig
    45   val is_identifier: string -> bool
    45   val is_identifier: string -> bool
       
    46   val implode_xstr: string list -> string
       
    47   val explode_xstr: string -> string list
    46   val string_of_vname: indexname -> string
    48   val string_of_vname: indexname -> string
    47   val string_of_vname': indexname -> string
    49   val string_of_vname': indexname -> string
    48   val scan_varname: string list -> indexname * string list
    50   val scan_varname: string list -> indexname * string list
    49   val scan_var: string -> term
    51   val scan_var: string -> term
    50   val const: string -> term
    52   val const: string -> term
    51   val free: string -> term
    53   val free: string -> term
    52   val var: indexname -> term
    54   val var: indexname -> term
    53   end;
    55 end;
    54 
    56 
    55 signature LEXICON =
    57 signature LEXICON =
    56   sig
    58 sig
    57   include SCANNER
    59   include SCANNER
    58   include LEXICON0
    60   include LEXICON0
    59   val is_xid: string -> bool
    61   val is_xid: string -> bool
    60   val is_tid: string -> bool
    62   val is_tid: string -> bool
    61   datatype token =
    63   datatype token =
    80   val matching_tokens: token * token -> bool
    82   val matching_tokens: token * token -> bool
    81   val token_assoc: (token option * 'a list) list * token -> 'a list
    83   val token_assoc: (token option * 'a list) list * token -> 'a list
    82   val valued_token: token -> bool
    84   val valued_token: token -> bool
    83   val predef_term: string -> token option
    85   val predef_term: string -> token option
    84   val tokenize: lexicon -> bool -> string list -> token list
    86   val tokenize: lexicon -> bool -> string list -> token list
    85   end;
    87 end;
    86 
    88 
    87 structure Lexicon : LEXICON =
    89 structure Lexicon : LEXICON =
    88 struct
    90 struct
       
    91 
    89 
    92 
    90 (** is_identifier etc. **)
    93 (** is_identifier etc. **)
    91 
    94 
    92 fun is_ident [] = false
    95 fun is_ident [] = false
    93   | is_ident (c :: cs) = is_letter c andalso forall is_letdig cs;
    96   | is_ident (c :: cs) = is_letter c andalso forall is_letdig cs;
   223   | predef_term "tid" = Some (TFreeSy "tid")
   226   | predef_term "tid" = Some (TFreeSy "tid")
   224   | predef_term "tvar" = Some (TVarSy "tvar")
   227   | predef_term "tvar" = Some (TVarSy "tvar")
   225   | predef_term "xnum" = Some (NumSy "xnum")
   228   | predef_term "xnum" = Some (NumSy "xnum")
   226   | predef_term "xstr" = Some (StrSy "xstr")
   229   | predef_term "xstr" = Some (StrSy "xstr")
   227   | predef_term _ = None;
   230   | predef_term _ = None;
       
   231 
       
   232 
       
   233 (*  xstr tokens *)
       
   234 
       
   235 fun implode_xstr cs = enclose "''" "''" (implode cs);
       
   236 
       
   237 fun explode_xstr str =
       
   238   rev (tl (tl (rev (tl (tl (explode str))))));
   228 
   239 
   229 
   240 
   230 
   241 
   231 (** datatype lexicon **)
   242 (** datatype lexicon **)
   232 
   243 
   415           let
   426           let
   416             val (cs', cs'') = scan_str cs handle ERROR =>
   427             val (cs', cs'') = scan_str cs handle ERROR =>
   417               error ("Lexical error: missing quotes at end of string " ^
   428               error ("Lexical error: missing quotes at end of string " ^
   418                 quote (implode chs));
   429                 quote (implode chs));
   419           in
   430           in
   420             scan (StrSy (implode cs') :: rev_toks, cs'')
   431             scan (StrSy (implode_xstr cs') :: rev_toks, cs'')
   421           end
   432           end
   422       | scan (rev_toks, chs as c :: cs) =
   433       | scan (rev_toks, chs as c :: cs) =
   423           if is_blank c then scan (rev_toks, cs)
   434           if is_blank c then scan (rev_toks, cs)
   424           else
   435           else
   425             (case max_of scan_lit scan_val chs of
   436             (case max_of scan_lit scan_val chs of
   479       scan_rest >> (free o implode);
   490       scan_rest >> (free o implode);
   480   in
   491   in
   481     #1 (scan (explode str))
   492     #1 (scan (explode str))
   482   end;
   493   end;
   483 
   494 
       
   495 
   484 end;
   496 end;
   485