src/Pure/Syntax/lexicon.ML
changeset 50239 fb579401dc26
parent 50238 98d35a7368bd
child 50242 56b9c792a98b
equal deleted inserted replaced
50238:98d35a7368bd 50239:fb579401dc26
    10   sig
    10   sig
    11     val const: string -> term
    11     val const: string -> term
    12     val free: string -> term
    12     val free: string -> term
    13     val var: indexname -> term
    13     val var: indexname -> term
    14   end
    14   end
    15   val is_identifier: string -> bool
       
    16   val is_xid: string -> bool
       
    17   val is_tid: string -> bool
       
    18   val scan_id: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    15   val scan_id: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    19   val scan_longid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    16   val scan_longid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    20   val scan_tid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    17   val scan_tid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    21   val scan_nat: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    18   val scan_nat: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    22   val scan_int: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    19   val scan_int: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    23   val scan_float: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    20   val scan_float: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    24   val scan_hex: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    21   val scan_hex: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    25   val scan_bin: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    22   val scan_bin: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    26   val scan_var: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    23   val scan_var: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    27   val scan_tvar: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    24   val scan_tvar: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
       
    25   val is_tid: string -> bool
    28   datatype token_kind =
    26   datatype token_kind =
    29     Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy |
    27     Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy |
    30     NumSy | FloatSy | XNumSy | StrSy | Space | Comment | EOF
    28     NumSy | FloatSy | XNumSy | StrSy | Space | Comment | EOF
    31   datatype token = Token of token_kind * string * Position.range
    29   datatype token = Token of token_kind * string * Position.range
    32   val str_of_token: token -> string
    30   val str_of_token: token -> string
    88 
    86 
    89 end;
    87 end;
    90 
    88 
    91 
    89 
    92 
    90 
    93 (** is_identifier etc. **)
       
    94 
       
    95 val is_identifier = Symbol.is_ident o Symbol.explode;
       
    96 
       
    97 fun is_xid s =
       
    98   (case Symbol.explode s of
       
    99     "_" :: cs => Symbol.is_ident cs
       
   100   | cs => Symbol.is_ident cs);
       
   101 
       
   102 fun is_tid s =
       
   103   (case Symbol.explode s of
       
   104     "'" :: cs => Symbol.is_ident cs
       
   105   | _ => false);
       
   106 
       
   107 
       
   108 
       
   109 (** basic scanners **)
    91 (** basic scanners **)
   110 
    92 
   111 open Basic_Symbol_Pos;
    93 open Basic_Symbol_Pos;
   112 
    94 
   113 fun !!! msg = Symbol_Pos.!!! (fn () => "Inner lexical error: " ^ msg);
    95 fun !!! msg = Symbol_Pos.!!! (fn () => "Inner lexical error: " ^ msg);
   114 
    96 
   115 val scan_id =
    97 val scan_id = Symbol_Pos.scan_ident;
   116   Scan.one (Symbol.is_letter o Symbol_Pos.symbol) :::
       
   117   Scan.many (Symbol.is_letdig o Symbol_Pos.symbol);
       
   118 
       
   119 val scan_longid = scan_id @@@ (Scan.repeat1 ($$$ "." @@@ scan_id) >> flat);
    98 val scan_longid = scan_id @@@ (Scan.repeat1 ($$$ "." @@@ scan_id) >> flat);
   120 val scan_tid = $$$ "'" @@@ scan_id;
    99 val scan_tid = $$$ "'" @@@ scan_id;
   121 
   100 
   122 val scan_nat = Scan.many1 (Symbol.is_digit o Symbol_Pos.symbol);
   101 val scan_nat = Scan.many1 (Symbol.is_digit o Symbol_Pos.symbol);
   123 val scan_int = $$$ "-" @@@ scan_nat || scan_nat;
   102 val scan_int = $$$ "-" @@@ scan_nat || scan_nat;
   127 val scan_bin = $$$ "0" @@@ $$$ "b" @@@ Scan.many1 (fn (s, _) => s = "0" orelse s = "1");
   106 val scan_bin = $$$ "0" @@@ $$$ "b" @@@ Scan.many1 (fn (s, _) => s = "0" orelse s = "1");
   128 
   107 
   129 val scan_id_nat = scan_id @@@ Scan.optional ($$$ "." @@@ scan_nat) [];
   108 val scan_id_nat = scan_id @@@ Scan.optional ($$$ "." @@@ scan_nat) [];
   130 val scan_var = $$$ "?" @@@ scan_id_nat;
   109 val scan_var = $$$ "?" @@@ scan_id_nat;
   131 val scan_tvar = $$$ "?" @@@ $$$ "'" @@@ scan_id_nat;
   110 val scan_tvar = $$$ "?" @@@ $$$ "'" @@@ scan_id_nat;
       
   111 
       
   112 fun is_tid s =
       
   113   (case try (unprefix "'") s of
       
   114     SOME s' => Symbol_Pos.is_identifier s'
       
   115   | NONE => false);
   132 
   116 
   133 
   117 
   134 
   118 
   135 (** datatype token **)
   119 (** datatype token **)
   136 
   120 
   313       | chop_idx (cs as (_ :: "\\<^isup>" :: _)) ds = idxname cs ds
   297       | chop_idx (cs as (_ :: "\\<^isup>" :: _)) ds = idxname cs ds
   314       | chop_idx (c :: cs) ds =
   298       | chop_idx (c :: cs) ds =
   315           if Symbol.is_digit c then chop_idx cs (c :: ds)
   299           if Symbol.is_digit c then chop_idx cs (c :: ds)
   316           else idxname (c :: cs) ds;
   300           else idxname (c :: cs) ds;
   317 
   301 
   318     val scan = (scan_id >> map Symbol_Pos.symbol) --
   302     val scan =
       
   303       (scan_id >> map Symbol_Pos.symbol) --
   319       Scan.optional ($$$ "." |-- scan_nat >> (nat 0 o map Symbol_Pos.symbol)) ~1;
   304       Scan.optional ($$$ "." |-- scan_nat >> (nat 0 o map Symbol_Pos.symbol)) ~1;
   320   in
   305   in
   321     scan >>
   306     scan >>
   322       (fn (cs, ~1) => chop_idx (rev cs) []
   307       (fn (cs, ~1) => chop_idx (rev cs) []
   323         | (cs, i) => (implode cs, i))
   308         | (cs, i) => (implode cs, i))