src/Pure/Syntax/lexicon.ML
changeset 30573 49899f26fbd1
parent 29565 3f8b24fcfbd6
child 35262 9ea4445d2ccf
equal deleted inserted replaced
30572:8fbc355100f2 30573:49899f26fbd1
     7 signature LEXICON0 =
     7 signature LEXICON0 =
     8 sig
     8 sig
     9   val is_identifier: string -> bool
     9   val is_identifier: string -> bool
    10   val is_ascii_identifier: string -> bool
    10   val is_ascii_identifier: string -> bool
    11   val is_tid: string -> bool
    11   val is_tid: string -> bool
    12   val scan_id: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list
    12   val scan_id: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    13   val scan_longid: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list
    13   val scan_longid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    14   val scan_tid: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list
    14   val scan_tid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    15   val scan_nat: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list
    15   val scan_nat: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    16   val scan_int: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list
    16   val scan_int: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    17   val scan_hex: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list
    17   val scan_hex: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    18   val scan_bin: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list
    18   val scan_bin: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    19   val scan_var: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list
    19   val scan_var: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    20   val scan_tvar: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list
    20   val scan_tvar: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    21   val implode_xstr: string list -> string
    21   val implode_xstr: string list -> string
    22   val explode_xstr: string -> string list
    22   val explode_xstr: string -> string list
    23   val read_indexname: string -> indexname
    23   val read_indexname: string -> indexname
    24   val read_var: string -> term
    24   val read_var: string -> term
    25   val read_variable: string -> indexname option
    25   val read_variable: string -> indexname option
    58   val is_terminal: string -> bool
    58   val is_terminal: string -> bool
    59   val report_token: token -> unit
    59   val report_token: token -> unit
    60   val matching_tokens: token * token -> bool
    60   val matching_tokens: token * token -> bool
    61   val valued_token: token -> bool
    61   val valued_token: token -> bool
    62   val predef_term: string -> token option
    62   val predef_term: string -> token option
    63   val tokenize: Scan.lexicon -> bool -> SymbolPos.T list -> token list
    63   val tokenize: Scan.lexicon -> bool -> Symbol_Pos.T list -> token list
    64 end;
    64 end;
    65 
    65 
    66 structure Lexicon: LEXICON =
    66 structure Lexicon: LEXICON =
    67 struct
    67 struct
    68 
    68 
    86 
    86 
    87 
    87 
    88 
    88 
    89 (** basic scanners **)
    89 (** basic scanners **)
    90 
    90 
    91 open BasicSymbolPos;
    91 open Basic_Symbol_Pos;
    92 
    92 
    93 fun !!! msg = SymbolPos.!!! ("Inner lexical error: " ^ msg);
    93 fun !!! msg = Symbol_Pos.!!! ("Inner lexical error: " ^ msg);
    94 
    94 
    95 val scan_id = Scan.one (Symbol.is_letter o symbol) ::: Scan.many (Symbol.is_letdig o symbol);
    95 val scan_id = Scan.one (Symbol.is_letter o symbol) ::: Scan.many (Symbol.is_letdig o symbol);
    96 val scan_longid = scan_id @@@ (Scan.repeat1 ($$$ "." @@@ scan_id) >> flat);
    96 val scan_longid = scan_id @@@ (Scan.repeat1 ($$$ "." @@@ scan_id) >> flat);
    97 val scan_tid = $$$ "'" @@@ scan_id;
    97 val scan_tid = $$$ "'" @@@ scan_id;
    98 
    98 
   218 
   218 
   219 
   219 
   220 fun implode_xstr cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs));
   220 fun implode_xstr cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs));
   221 
   221 
   222 fun explode_xstr str =
   222 fun explode_xstr str =
   223   (case Scan.read SymbolPos.stopper scan_str_body (SymbolPos.explode (str, Position.none)) of
   223   (case Scan.read Symbol_Pos.stopper scan_str_body (Symbol_Pos.explode (str, Position.none)) of
   224     SOME cs => map symbol cs
   224     SOME cs => map symbol cs
   225   | _ => error ("Inner lexical error: literal string expected at " ^ quote str));
   225   | _ => error ("Inner lexical error: literal string expected at " ^ quote str));
   226 
   226 
   227 
   227 
   228 
   228 
   229 (** tokenize **)
   229 (** tokenize **)
   230 
   230 
   231 fun token_leq (Token (_, s1, _), Token (_, s2, _)) = s1 <= s2;
   231 fun token_leq (Token (_, s1, _), Token (_, s2, _)) = s1 <= s2;
   232 fun token kind ss = Token (kind, SymbolPos.content ss, SymbolPos.range ss);
   232 fun token kind ss = Token (kind, Symbol_Pos.content ss, Symbol_Pos.range ss);
   233 
   233 
   234 fun tokenize lex xids syms =
   234 fun tokenize lex xids syms =
   235   let
   235   let
   236     val scan_xid =
   236     val scan_xid =
   237       if xids then $$$ "_" @@@ scan_id || scan_id
   237       if xids then $$$ "_" @@@ scan_id || scan_id
   250       scan_xid >> token IdentSy;
   250       scan_xid >> token IdentSy;
   251 
   251 
   252     val scan_lit = Scan.literal lex >> token Literal;
   252     val scan_lit = Scan.literal lex >> token Literal;
   253 
   253 
   254     val scan_token =
   254     val scan_token =
   255       SymbolPos.scan_comment !!! >> token Comment ||
   255       Symbol_Pos.scan_comment !!! >> token Comment ||
   256       Scan.max token_leq scan_lit scan_val ||
   256       Scan.max token_leq scan_lit scan_val ||
   257       scan_str >> token StrSy ||
   257       scan_str >> token StrSy ||
   258       Scan.many1 (Symbol.is_blank o symbol) >> token Space;
   258       Scan.many1 (Symbol.is_blank o symbol) >> token Space;
   259   in
   259   in
   260     (case Scan.error
   260     (case Scan.error
   261         (Scan.finite SymbolPos.stopper (Scan.repeat scan_token)) syms of
   261         (Scan.finite Symbol_Pos.stopper (Scan.repeat scan_token)) syms of
   262       (toks, []) => toks
   262       (toks, []) => toks
   263     | (_, ss) => error ("Inner lexical error at: " ^ SymbolPos.content ss ^
   263     | (_, ss) => error ("Inner lexical error at: " ^ Symbol_Pos.content ss ^
   264         Position.str_of (#1 (SymbolPos.range ss))))
   264         Position.str_of (#1 (Symbol_Pos.range ss))))
   265   end;
   265   end;
   266 
   266 
   267 
   267 
   268 
   268 
   269 (** scan variables **)
   269 (** scan variables **)
   301 
   301 
   302 
   302 
   303 (* indexname *)
   303 (* indexname *)
   304 
   304 
   305 fun read_indexname s =
   305 fun read_indexname s =
   306   (case Scan.read SymbolPos.stopper scan_indexname (SymbolPos.explode (s, Position.none)) of
   306   (case Scan.read Symbol_Pos.stopper scan_indexname (Symbol_Pos.explode (s, Position.none)) of
   307     SOME xi => xi
   307     SOME xi => xi
   308   | _ => error ("Lexical error in variable name: " ^ quote s));
   308   | _ => error ("Lexical error in variable name: " ^ quote s));
   309 
   309 
   310 
   310 
   311 (* read_var *)
   311 (* read_var *)
   315 fun var xi = Var (xi, dummyT);
   315 fun var xi = Var (xi, dummyT);
   316 
   316 
   317 fun read_var str =
   317 fun read_var str =
   318   let
   318   let
   319     val scan =
   319     val scan =
   320       $$$ "?" |-- scan_indexname --| Scan.ahead (Scan.one SymbolPos.is_eof) >> var ||
   320       $$$ "?" |-- scan_indexname --| Scan.ahead (Scan.one Symbol_Pos.is_eof) >> var ||
   321       Scan.many (Symbol.is_regular o symbol) >> (free o implode o map symbol);
   321       Scan.many (Symbol.is_regular o symbol) >> (free o implode o map symbol);
   322   in the (Scan.read SymbolPos.stopper scan (SymbolPos.explode (str, Position.none))) end;
   322   in the (Scan.read Symbol_Pos.stopper scan (Symbol_Pos.explode (str, Position.none))) end;
   323 
   323 
   324 
   324 
   325 (* read_variable *)
   325 (* read_variable *)
   326 
   326 
   327 fun read_variable str =
   327 fun read_variable str =
   328   let val scan = $$$ "?" |-- scan_indexname || scan_indexname
   328   let val scan = $$$ "?" |-- scan_indexname || scan_indexname
   329   in Scan.read SymbolPos.stopper scan (SymbolPos.explode (str, Position.none)) end;
   329   in Scan.read Symbol_Pos.stopper scan (Symbol_Pos.explode (str, Position.none)) end;
   330 
   330 
   331 
   331 
   332 (* specific identifiers *)
   332 (* specific identifiers *)
   333 
   333 
   334 val constN = "\\<^const>";
   334 val constN = "\\<^const>";
   339 
   339 
   340 local
   340 local
   341 
   341 
   342 fun nat cs =
   342 fun nat cs =
   343   Option.map (#1 o Library.read_int o map symbol)
   343   Option.map (#1 o Library.read_int o map symbol)
   344     (Scan.read SymbolPos.stopper scan_nat cs);
   344     (Scan.read Symbol_Pos.stopper scan_nat cs);
   345 
   345 
   346 in
   346 in
   347 
   347 
   348 fun read_nat s = nat (SymbolPos.explode (s, Position.none));
   348 fun read_nat s = nat (Symbol_Pos.explode (s, Position.none));
   349 
   349 
   350 fun read_int s =
   350 fun read_int s =
   351   (case SymbolPos.explode (s, Position.none) of
   351   (case Symbol_Pos.explode (s, Position.none) of
   352     ("-", _) :: cs => Option.map ~ (nat cs)
   352     ("-", _) :: cs => Option.map ~ (nat cs)
   353   | cs => nat cs);
   353   | cs => nat cs);
   354 
   354 
   355 end;
   355 end;
   356 
   356