src/Pure/Syntax/lexicon.ML
changeset 58421 37cbbd8eb460
parent 58410 6d46ad54a2ab
child 58854 b979c781c2db
equal deleted inserted replaced
58420:00bf84d3f526 58421:37cbbd8eb460
    14   end
    14   end
    15   val scan_id: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    15   val scan_id: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    16   val scan_longid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    16   val scan_longid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    17   val scan_tid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    17   val scan_tid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    18   val scan_nat: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    18   val scan_nat: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    19   val scan_int: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
       
    20   val scan_float: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    19   val scan_float: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    21   val scan_hex: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    20   val scan_hex: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    22   val scan_bin: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    21   val scan_bin: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    23   val scan_var: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    22   val scan_var: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    24   val scan_tvar: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    23   val scan_tvar: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    25   val is_tid: string -> bool
    24   val is_tid: string -> bool
    26   datatype token_kind =
    25   datatype token_kind =
    27     Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | NumSy |
    26     Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | NumSy |
    28     FloatSy | XNumSy | StrSy | StringSy | Cartouche | Space | Comment | EOF
    27     FloatSy | StrSy | StringSy | Cartouche | Space | Comment | EOF
    29   datatype token = Token of token_kind * string * Position.range
    28   datatype token = Token of token_kind * string * Position.range
    30   val str_of_token: token -> string
    29   val str_of_token: token -> string
    31   val pos_of_token: token -> Position.T
    30   val pos_of_token: token -> Position.T
    32   val is_proper: token -> bool
    31   val is_proper: token -> bool
    33   val mk_eof: Position.T -> token
    32   val mk_eof: Position.T -> token
    50   val read_indexname: string -> indexname
    49   val read_indexname: string -> indexname
    51   val read_var: string -> term
    50   val read_var: string -> term
    52   val read_variable: string -> indexname option
    51   val read_variable: string -> indexname option
    53   val read_nat: string -> int option
    52   val read_nat: string -> int option
    54   val read_int: string -> int option
    53   val read_int: string -> int option
    55   val read_xnum: string -> {radix: int, leading_zeros: int, value: int}
    54   val read_num: string -> {radix: int, leading_zeros: int, value: int}
    56   val read_float: string -> {mant: int, exp: int}
    55   val read_float: string -> {mant: int, exp: int}
    57   val mark_class: string -> string val unmark_class: string -> string
    56   val mark_class: string -> string val unmark_class: string -> string
    58   val mark_type: string -> string val unmark_type: string -> string
    57   val mark_type: string -> string val unmark_type: string -> string
    59   val mark_const: string -> string val unmark_const: string -> string
    58   val mark_const: string -> string val unmark_const: string -> string
    60   val mark_fixed: string -> string val unmark_fixed: string -> string
    59   val mark_fixed: string -> string val unmark_fixed: string -> string
    97 val scan_longid = scan_id @@@ (Scan.repeat1 ($$$ "." @@@ scan_id) >> flat);
    96 val scan_longid = scan_id @@@ (Scan.repeat1 ($$$ "." @@@ scan_id) >> flat);
    98 val scan_tid = $$$ "'" @@@ scan_id;
    97 val scan_tid = $$$ "'" @@@ scan_id;
    99 
    98 
   100 val scan_nat = Scan.many1 (Symbol.is_digit o Symbol_Pos.symbol);
    99 val scan_nat = Scan.many1 (Symbol.is_digit o Symbol_Pos.symbol);
   101 val scan_float = scan_nat @@@ $$$ "." @@@ scan_nat;
   100 val scan_float = scan_nat @@@ $$$ "." @@@ scan_nat;
   102 val scan_int = $$$ "-" @@@ scan_nat || scan_nat;
       
   103 val scan_hex = $$$ "0" @@@ $$$ "x" @@@ Scan.many1 (Symbol.is_ascii_hex o Symbol_Pos.symbol);
   101 val scan_hex = $$$ "0" @@@ $$$ "x" @@@ Scan.many1 (Symbol.is_ascii_hex o Symbol_Pos.symbol);
   104 val scan_bin = $$$ "0" @@@ $$$ "b" @@@ Scan.many1 (fn (s, _) => s = "0" orelse s = "1");
   102 val scan_bin = $$$ "0" @@@ $$$ "b" @@@ Scan.many1 (fn (s, _) => s = "0" orelse s = "1");
       
   103 val scan_num = scan_hex || scan_bin || scan_nat;
   105 
   104 
   106 val scan_id_nat = scan_id @@@ Scan.optional ($$$ "." @@@ scan_nat) [];
   105 val scan_id_nat = scan_id @@@ Scan.optional ($$$ "." @@@ scan_nat) [];
   107 val scan_var = $$$ "?" @@@ scan_id_nat;
   106 val scan_var = $$$ "?" @@@ scan_id_nat;
   108 val scan_tvar = $$$ "?" @@@ $$$ "'" @@@ scan_id_nat;
   107 val scan_tvar = $$$ "?" @@@ $$$ "'" @@@ scan_id_nat;
   109 
   108 
   116 
   115 
   117 (** datatype token **)
   116 (** datatype token **)
   118 
   117 
   119 datatype token_kind =
   118 datatype token_kind =
   120   Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | NumSy |
   119   Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | NumSy |
   121   FloatSy | XNumSy | StrSy | StringSy | Cartouche | Space | Comment | EOF;
   120   FloatSy | StrSy | StringSy | Cartouche | Space | Comment | EOF;
   122 
   121 
   123 datatype token = Token of token_kind * string * Position.range;
   122 datatype token = Token of token_kind * string * Position.range;
   124 
   123 
   125 fun str_of_token (Token (_, s, _)) = s;
   124 fun str_of_token (Token (_, s, _)) = s;
   126 fun pos_of_token (Token (_, _, (pos, _))) = pos;
   125 fun pos_of_token (Token (_, _, (pos, _))) = pos;
   149   ("var", VarSy),
   148   ("var", VarSy),
   150   ("tid", TFreeSy),
   149   ("tid", TFreeSy),
   151   ("tvar", TVarSy),
   150   ("tvar", TVarSy),
   152   ("num_token", NumSy),
   151   ("num_token", NumSy),
   153   ("float_token", FloatSy),
   152   ("float_token", FloatSy),
   154   ("xnum_token", XNumSy),
       
   155   ("str_token", StrSy),
   153   ("str_token", StrSy),
   156   ("string_token", StringSy),
   154   ("string_token", StringSy),
   157   ("cartouche", Cartouche)];
   155   ("cartouche", Cartouche)];
   158 
   156 
   159 val terminals = map #1 terminal_kinds;
   157 val terminals = map #1 terminal_kinds;
   170 val token_kind_markup =
   168 val token_kind_markup =
   171  fn TFreeSy => Markup.tfree
   169  fn TFreeSy => Markup.tfree
   172   | TVarSy => Markup.tvar
   170   | TVarSy => Markup.tvar
   173   | NumSy => Markup.numeral
   171   | NumSy => Markup.numeral
   174   | FloatSy => Markup.numeral
   172   | FloatSy => Markup.numeral
   175   | XNumSy => Markup.numeral
       
   176   | StrSy => Markup.inner_string
   173   | StrSy => Markup.inner_string
   177   | StringSy => Markup.inner_string
   174   | StringSy => Markup.inner_string
   178   | Cartouche => Markup.inner_cartouche
   175   | Cartouche => Markup.inner_cartouche
   179   | Comment => Markup.inner_comment
   176   | Comment => Markup.inner_comment
   180   | _ => Markup.empty;
   177   | _ => Markup.empty;
   261   let
   258   let
   262     val scan_xid =
   259     val scan_xid =
   263       (if xids then $$$ "_" @@@ scan_id || scan_id else scan_id) ||
   260       (if xids then $$$ "_" @@@ scan_id || scan_id else scan_id) ||
   264       $$$ "_" @@@ $$$ "_";
   261       $$$ "_" @@@ $$$ "_";
   265 
   262 
   266     val scan_num_unsigned = scan_hex || scan_bin || scan_nat;
       
   267     val scan_num_signed = scan_hex || scan_bin || scan_int;
       
   268 
       
   269     val scan_val =
   263     val scan_val =
   270       scan_tvar >> token TVarSy ||
   264       scan_tvar >> token TVarSy ||
   271       scan_var >> token VarSy ||
   265       scan_var >> token VarSy ||
   272       scan_tid >> token TFreeSy ||
   266       scan_tid >> token TFreeSy ||
   273       scan_float >> token FloatSy ||
   267       scan_float >> token FloatSy ||
   274       scan_num_unsigned >> token NumSy ||
   268       scan_num >> token NumSy ||
   275       $$$ "#" @@@ scan_num_signed >> token XNumSy ||
       
   276       scan_longid >> token LongIdentSy ||
   269       scan_longid >> token LongIdentSy ||
   277       scan_xid >> token IdentSy;
   270       scan_xid >> token IdentSy;
   278 
   271 
   279     val scan_lit = Scan.literal lex >> token Literal;
   272     val scan_lit = Scan.literal lex >> token Literal;
   280 
   273 
   376   | cs => nat cs);
   369   | cs => nat cs);
   377 
   370 
   378 end;
   371 end;
   379 
   372 
   380 
   373 
   381 (* read_xnum: hex/bin/decimal *)
   374 (* read_num: hex/bin/decimal *)
   382 
   375 
   383 local
   376 local
   384 
   377 
   385 val ten = ord "0" + 10;
   378 val ten = ord "0" + 10;
   386 val a = ord "a";
   379 val a = ord "a";
   398   | leading_zeros ("0" :: cs) = 1 + leading_zeros cs
   391   | leading_zeros ("0" :: cs) = 1 + leading_zeros cs
   399   | leading_zeros _ = 0;
   392   | leading_zeros _ = 0;
   400 
   393 
   401 in
   394 in
   402 
   395 
   403 fun read_xnum str =
   396 fun read_num str =
   404   let
   397   let
   405     val (sign, radix, digs) =
   398     val (radix, digs) =
   406       (case Symbol.explode (perhaps (try (unprefix "#")) str) of
   399       (case Symbol.explode str of
   407         "0" :: "x" :: cs => (1, 16, map remap_hex cs)
   400         "0" :: "x" :: cs => (16, map remap_hex cs)
   408       | "0" :: "b" :: cs => (1, 2, cs)
   401       | "0" :: "b" :: cs => (2, cs)
   409       | "-" :: cs => (~1, 10, cs)
   402       | cs => (10, cs));
   410       | cs => (1, 10, cs));
       
   411   in
   403   in
   412    {radix = radix,
   404    {radix = radix,
   413     leading_zeros = leading_zeros digs,
   405     leading_zeros = leading_zeros digs,
   414     value = sign * #1 (Library.read_radix_int radix digs)}
   406     value = #1 (Library.read_radix_int radix digs)}
   415   end;
   407   end;
   416 
   408 
   417 end;
   409 end;
   418 
   410 
   419 fun read_float str =
   411 fun read_float str =
   420   let
   412   let
   421     val (sign, cs) =
   413     val cs = Symbol.explode str;
   422       (case Symbol.explode str of
       
   423         "-" :: cs => (~1, cs)
       
   424       | cs => (1, cs));
       
   425     val (intpart, fracpart) =
   414     val (intpart, fracpart) =
   426       (case take_prefix Symbol.is_digit cs of
   415       (case take_prefix Symbol.is_digit cs of
   427         (intpart, "." :: fracpart) => (intpart, fracpart)
   416         (intpart, "." :: fracpart) => (intpart, fracpart)
   428       | _ => raise Fail "read_float");
   417       | _ => raise Fail "read_float");
   429   in
   418   in
   430    {mant = sign * #1 (Library.read_int (intpart @ fracpart)),
   419    {mant = #1 (Library.read_int (intpart @ fracpart)),
   431     exp = length fracpart}
   420     exp = length fracpart}
   432   end;
   421   end;
   433 
   422 
   434 
   423 
   435 (* marked logical entities *)
   424 (* marked logical entities *)