src/Pure/Syntax/lexicon.ML
changeset 550 353eea6ec232
parent 376 d3d01131470f
child 1143 0dfb8b437f5d
equal deleted inserted replaced
549:5d904be18774 550:353eea6ec232
    43 sig
    43 sig
    44   val is_identifier: string -> bool
    44   val is_identifier: string -> bool
    45   val string_of_vname: indexname -> string
    45   val string_of_vname: indexname -> string
    46   val scan_varname: string list -> indexname * string list
    46   val scan_varname: string list -> indexname * string list
    47   val scan_var: string -> term
    47   val scan_var: string -> term
       
    48   val const: string -> term
       
    49   val free: string -> term
       
    50   val var: indexname -> term
    48 end;
    51 end;
    49 
    52 
    50 signature LEXICON =
    53 signature LEXICON =
    51 sig
    54 sig
    52   include SCANNER
    55   include SCANNER
    57     Token of string |
    60     Token of string |
    58     IdentSy of string |
    61     IdentSy of string |
    59     VarSy of string |
    62     VarSy of string |
    60     TFreeSy of string |
    63     TFreeSy of string |
    61     TVarSy of string |
    64     TVarSy of string |
       
    65     NumSy of string |
       
    66     StrSy of string |
    62     EndToken
    67     EndToken
    63   val id: string
    68   val idT: typ
    64   val var: string
    69   val varT: typ
    65   val tid: string
    70   val tidT: typ
    66   val tvar: string
    71   val tvarT: typ
    67   val terminals: string list
    72   val terminals: string list
    68   val is_terminal: string -> bool
    73   val is_terminal: string -> bool
    69   val str_of_token: token -> string
    74   val str_of_token: token -> string
    70   val display_token: token -> string
    75   val display_token: token -> string
    71   val matching_tokens: token * token -> bool
    76   val matching_tokens: token * token -> bool
   117   Token of string |
   122   Token of string |
   118   IdentSy of string |
   123   IdentSy of string |
   119   VarSy of string |
   124   VarSy of string |
   120   TFreeSy of string |
   125   TFreeSy of string |
   121   TVarSy of string |
   126   TVarSy of string |
       
   127   NumSy of string |
       
   128   StrSy of string |
   122   EndToken;
   129   EndToken;
   123 
   130 
   124 
   131 
   125 (* terminal arguments *)
   132 (* terminal arguments *)
   126 
   133 
   127 val id = "id";
   134 val idT = Type ("id", []);
   128 val var = "var";
   135 val varT = Type ("var", []);
   129 val tid = "tid";
   136 val tidT = Type ("tid", []);
   130 val tvar = "tvar";
   137 val tvarT = Type ("tvar", []);
   131 
   138 
   132 val terminals = [id, var, tid, tvar];
   139 val terminals = ["id", "var", "tid", "tvar", "xnum", "xstr"];
   133 
   140 
   134 fun is_terminal s = s mem terminals;
   141 fun is_terminal s = s mem terminals;
   135 
   142 
   136 
   143 
   137 (* str_of_token *)
   144 (* str_of_token *)
   139 fun str_of_token (Token s) = s
   146 fun str_of_token (Token s) = s
   140   | str_of_token (IdentSy s) = s
   147   | str_of_token (IdentSy s) = s
   141   | str_of_token (VarSy s) = s
   148   | str_of_token (VarSy s) = s
   142   | str_of_token (TFreeSy s) = s
   149   | str_of_token (TFreeSy s) = s
   143   | str_of_token (TVarSy s) = s
   150   | str_of_token (TVarSy s) = s
       
   151   | str_of_token (NumSy s) = s
       
   152   | str_of_token (StrSy s) = s
   144   | str_of_token EndToken = "EOF";
   153   | str_of_token EndToken = "EOF";
   145 
   154 
   146 
   155 
   147 (* display_token *)
   156 (* display_token *)
   148 
   157 
   149 fun display_token (Token s) = quote s
   158 fun display_token (Token s) = quote s
   150   | display_token (IdentSy s) = "id(" ^ s ^ ")"
   159   | display_token (IdentSy s) = "id(" ^ s ^ ")"
   151   | display_token (VarSy s) = "var(" ^ s ^ ")"
   160   | display_token (VarSy s) = "var(" ^ s ^ ")"
   152   | display_token (TFreeSy s) = "tid(" ^ s ^ ")"
   161   | display_token (TFreeSy s) = "tid(" ^ s ^ ")"
   153   | display_token (TVarSy s) = "tvar(" ^ s ^ ")"
   162   | display_token (TVarSy s) = "tvar(" ^ s ^ ")"
       
   163   | display_token (NumSy s) = "xnum(" ^ s ^ ")"
       
   164   | display_token (StrSy s) = "xstr(" ^ s ^ ")"
   154   | display_token EndToken = "";
   165   | display_token EndToken = "";
   155 
   166 
   156 
   167 
   157 (* matching_tokens *)
   168 (* matching_tokens *)
   158 
   169 
   159 fun matching_tokens (Token x, Token y) = (x = y)
   170 fun matching_tokens (Token x, Token y) = (x = y)
   160   | matching_tokens (IdentSy _, IdentSy _) = true
   171   | matching_tokens (IdentSy _, IdentSy _) = true
   161   | matching_tokens (VarSy _, VarSy _) = true
   172   | matching_tokens (VarSy _, VarSy _) = true
   162   | matching_tokens (TFreeSy _, TFreeSy _) = true
   173   | matching_tokens (TFreeSy _, TFreeSy _) = true
   163   | matching_tokens (TVarSy _, TVarSy _) = true
   174   | matching_tokens (TVarSy _, TVarSy _) = true
       
   175   | matching_tokens (NumSy _, NumSy _) = true
       
   176   | matching_tokens (StrSy _, StrSy _) = true
   164   | matching_tokens (EndToken, EndToken) = true
   177   | matching_tokens (EndToken, EndToken) = true
   165   | matching_tokens _ = false;
   178   | matching_tokens _ = false;
   166 
   179 
   167 
   180 
   168 (* token_assoc *)
   181 (* token_assoc *)
   182 fun valued_token (Token _) = false
   195 fun valued_token (Token _) = false
   183   | valued_token (IdentSy _) = true
   196   | valued_token (IdentSy _) = true
   184   | valued_token (VarSy _) = true
   197   | valued_token (VarSy _) = true
   185   | valued_token (TFreeSy _) = true
   198   | valued_token (TFreeSy _) = true
   186   | valued_token (TVarSy _) = true
   199   | valued_token (TVarSy _) = true
       
   200   | valued_token (NumSy _) = true
       
   201   | valued_token (StrSy _) = true
   187   | valued_token EndToken = false;
   202   | valued_token EndToken = false;
   188 
   203 
   189 
   204 
   190 (* predef_term *)
   205 (* predef_term *)
   191 
   206 
   192 fun predef_term name =
   207 fun predef_term "id" = Some (IdentSy "id")
   193   if name = id then Some (IdentSy name)
   208   | predef_term "var" = Some (VarSy "var")
   194   else if name = var then Some (VarSy name)
   209   | predef_term "tid" = Some (TFreeSy "tid")
   195   else if name = tid then Some (TFreeSy name)
   210   | predef_term "tvar" = Some (TVarSy "tvar")
   196   else if name = tvar then Some (TVarSy name)
   211   | predef_term "xnum" = Some (NumSy "xnum")
   197   else None;
   212   | predef_term "xstr" = Some (StrSy "xstr")
       
   213   | predef_term _ = None;
   198 
   214 
   199 
   215 
   200 
   216 
   201 (** datatype lexicon **)
   217 (** datatype lexicon **)
   202 
   218 
   331 
   347 
   332 val scan_tid = $$ "'" ^^ scan_id;
   348 val scan_tid = $$ "'" ^^ scan_id;
   333 
   349 
   334 val scan_nat = scan_digits1 >> implode;
   350 val scan_nat = scan_digits1 >> implode;
   335 
   351 
       
   352 val scan_int = $$ "~" ^^ scan_nat || scan_nat;
       
   353 
   336 
   354 
   337 (* scan_literal *)
   355 (* scan_literal *)
   338 
   356 
   339 fun scan_literal lex chrs =
   357 fun scan_literal lex chrs =
   340   let
   358   let
   360       if xids then $$ "_" ^^ scan_id || scan_id
   378       if xids then $$ "_" ^^ scan_id || scan_id
   361       else scan_id;
   379       else scan_id;
   362 
   380 
   363     val scan_lit = scan_literal lex >> pair Token;
   381     val scan_lit = scan_literal lex >> pair Token;
   364 
   382 
   365     val scan_ident =
   383     val scan_val =
   366       $$ "?" ^^ $$ "'" ^^ scan_id_nat >> pair TVarSy ||
   384       $$ "?" ^^ $$ "'" ^^ scan_id_nat >> pair TVarSy ||
   367       $$ "?" ^^ scan_id_nat >> pair VarSy ||
   385       $$ "?" ^^ scan_id_nat >> pair VarSy ||
   368       $$ "'" ^^ scan_id >> pair TFreeSy ||
   386       $$ "'" ^^ scan_id >> pair TFreeSy ||
       
   387       $$ "#" ^^ scan_int >> pair NumSy ||
   369       scan_xid >> pair IdentSy;
   388       scan_xid >> pair IdentSy;
   370 
   389 
   371     fun scan_tokens [] rev_toks = rev (EndToken :: rev_toks)
   390     fun scan_str ("'" :: "'" :: cs) = ([], cs)
   372       | scan_tokens (chs as c :: cs) rev_toks =
   391       | scan_str ("\\" :: c :: cs) = apfst (cons c) (scan_str cs)
   373           if is_blank c then scan_tokens cs rev_toks
   392       | scan_str (c :: cs) = apfst (cons c) (scan_str cs)
       
   393       | scan_str [] = raise ERROR;
       
   394 
       
   395 
       
   396     fun scan (rev_toks, []) = rev (EndToken :: rev_toks)
       
   397       | scan (rev_toks, chs as "'" :: "'" :: cs) =
       
   398           let
       
   399             val (cs', cs'') = scan_str cs handle ERROR =>
       
   400               error ("Lexical error: missing quotes at end of string " ^
       
   401                 quote (implode chs));
       
   402           in
       
   403             scan (StrSy (implode cs') :: rev_toks, cs'')
       
   404           end
       
   405       | scan (rev_toks, chs as c :: cs) =
       
   406           if is_blank c then scan (rev_toks, cs)
   374           else
   407           else
   375             (case max_of scan_lit scan_ident chs of
   408             (case max_of scan_lit scan_val chs of
   376               (None, _) => error ("Lexical error at: " ^ quote (implode chs))
   409               (None, _) => error ("Lexical error at: " ^ quote (implode chs))
   377             | (Some (tk, s), chs') => scan_tokens chs' (tk s :: rev_toks));
   410             | (Some (tk, s), chs') => scan (tk s :: rev_toks, chs'));
   378   in
   411   in
   379     scan_tokens (explode str) []
   412     scan ([], explode str)
   380   end;
   413   end;
   381 
   414 
   382 
   415 
   383 
   416 
   384 (** scan variables **)
   417 (** scan variables **)
   413     => error ("scan_varname: bad varname " ^ quote (implode chs));
   446     => error ("scan_varname: bad varname " ^ quote (implode chs));
   414 
   447 
   415 
   448 
   416 (* scan_var *)
   449 (* scan_var *)
   417 
   450 
       
   451 fun const c = Const (c, dummyT);
       
   452 fun free x = Free (x, dummyT);
       
   453 fun var xi = Var (xi, dummyT);
       
   454 
   418 fun scan_var str =
   455 fun scan_var str =
   419   let
   456   let
   420     fun tvar (x, i) = Var (("'" ^ x, i), dummyT);
   457     fun tvar (x, i) = var ("'" ^ x, i);
   421     fun var x_i = Var (x_i, dummyT);
       
   422     fun free x = Free (x, dummyT);
       
   423 
   458 
   424     val scan =
   459     val scan =
   425       $$ "?" -- $$ "'" -- scan_vname -- scan_end >> (tvar o #2 o #1) ||
   460       $$ "?" -- $$ "'" -- scan_vname -- scan_end >> (tvar o #2 o #1) ||
   426       $$ "?" -- scan_vname -- scan_end >> (var o #2 o #1) ||
   461       $$ "?" -- scan_vname -- scan_end >> (var o #2 o #1) ||
   427       scan_rest >> (free o implode);
   462       scan_rest >> (free o implode);