src/Pure/Syntax/lexicon.ML
author wenzelm
Wed Dec 12 23:36:07 2012 +0100 (2012-12-12 ago)
changeset 50499 f496b2b7bafb
parent 50242 56b9c792a98b
child 51612 6a1e40f9dd55
permissions -rw-r--r--
rendering of selected dialog_result as active_result_color, depending on dynamic command status in output panel, but not static popups etc.;
     1 (*  Title:      Pure/Syntax/lexicon.ML
     2     Author:     Tobias Nipkow and Markus Wenzel, TU Muenchen
     3 
     4 Lexer for the inner Isabelle syntax (terms and types).
     5 *)
     6 
     7 signature LEXICON =
     8 sig
     9   structure Syntax:
    10   sig
    11     val const: string -> term
    12     val free: string -> term
    13     val var: indexname -> term
    14   end
    15   val scan_id: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    16   val scan_longid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    17   val scan_tid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    18   val scan_nat: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    19   val scan_int: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    20   val scan_float: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    21   val scan_hex: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    22   val scan_bin: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    23   val scan_var: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    24   val scan_tvar: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
    25   val is_tid: string -> bool
    26   datatype token_kind =
    27     Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy |
    28     NumSy | FloatSy | XNumSy | StrSy | Space | Comment | EOF
    29   datatype token = Token of token_kind * string * Position.range
    30   val str_of_token: token -> string
    31   val pos_of_token: token -> Position.T
    32   val is_proper: token -> bool
    33   val mk_eof: Position.T -> token
    34   val eof: token
    35   val is_eof: token -> bool
    36   val stopper: token Scan.stopper
    37   val idT: typ
    38   val longidT: typ
    39   val varT: typ
    40   val tidT: typ
    41   val tvarT: typ
    42   val terminals: string list
    43   val is_terminal: string -> bool
    44   val literal_markup: string -> Markup.T
    45   val report_of_token: token -> Position.report
    46   val reported_token_range: Proof.context -> token -> string
    47   val matching_tokens: token * token -> bool
    48   val valued_token: token -> bool
    49   val predef_term: string -> token option
    50   val implode_str: string list -> string
    51   val explode_str: string -> string list
    52   val tokenize: Scan.lexicon -> bool -> Symbol_Pos.T list -> token list
    53   val read_indexname: string -> indexname
    54   val read_var: string -> term
    55   val read_variable: string -> indexname option
    56   val read_nat: string -> int option
    57   val read_int: string -> int option
    58   val read_xnum: string -> {radix: int, leading_zeros: int, value: int}
    59   val read_float: string -> {mant: int, exp: int}
    60   val mark_class: string -> string val unmark_class: string -> string
    61   val mark_type: string -> string val unmark_type: string -> string
    62   val mark_const: string -> string val unmark_const: string -> string
    63   val mark_fixed: string -> string val unmark_fixed: string -> string
    64   val unmark:
    65    {case_class: string -> 'a,
    66     case_type: string -> 'a,
    67     case_const: string -> 'a,
    68     case_fixed: string -> 'a,
    69     case_default: string -> 'a} -> string -> 'a
    70   val is_marked: string -> bool
    71   val dummy_type: term
    72   val fun_type: term
    73 end;
    74 
    75 structure Lexicon: LEXICON =
    76 struct
    77 
    78 (** syntaxtic terms **)
    79 
    80 structure Syntax =
    81 struct
    82 
    83 fun const c = Const (c, dummyT);
    84 fun free x = Free (x, dummyT);
    85 fun var xi = Var (xi, dummyT);
    86 
    87 end;
    88 
    89 
    90 
    91 (** basic scanners **)
    92 
    93 open Basic_Symbol_Pos;
    94 
    95 fun !!! msg = Symbol_Pos.!!! (fn () => "Inner lexical error: " ^ msg);
    96 
    97 val scan_id = Symbol_Pos.scan_ident;
    98 val scan_longid = scan_id @@@ (Scan.repeat1 ($$$ "." @@@ scan_id) >> flat);
    99 val scan_tid = $$$ "'" @@@ scan_id;
   100 
   101 val scan_nat = Scan.many1 (Symbol.is_digit o Symbol_Pos.symbol);
   102 val scan_int = $$$ "-" @@@ scan_nat || scan_nat;
   103 val scan_natdot = scan_nat @@@ $$$ "." @@@ scan_nat;
   104 val scan_float = $$$ "-" @@@ scan_natdot || scan_natdot;
   105 val scan_hex = $$$ "0" @@@ $$$ "x" @@@ Scan.many1 (Symbol.is_ascii_hex o Symbol_Pos.symbol);
   106 val scan_bin = $$$ "0" @@@ $$$ "b" @@@ Scan.many1 (fn (s, _) => s = "0" orelse s = "1");
   107 
   108 val scan_id_nat = scan_id @@@ Scan.optional ($$$ "." @@@ scan_nat) [];
   109 val scan_var = $$$ "?" @@@ scan_id_nat;
   110 val scan_tvar = $$$ "?" @@@ $$$ "'" @@@ scan_id_nat;
   111 
   112 fun is_tid s =
   113   (case try (unprefix "'") s of
   114     SOME s' => Symbol_Pos.is_identifier s'
   115   | NONE => false);
   116 
   117 
   118 
   119 (** datatype token **)
   120 
   121 datatype token_kind =
   122   Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy |
   123   NumSy | FloatSy | XNumSy | StrSy | Space | Comment | EOF;
   124 
   125 datatype token = Token of token_kind * string * Position.range;
   126 
   127 fun str_of_token (Token (_, s, _)) = s;
   128 fun pos_of_token (Token (_, _, (pos, _))) = pos;
   129 
   130 fun is_proper (Token (Space, _, _)) = false
   131   | is_proper (Token (Comment, _, _)) = false
   132   | is_proper _ = true;
   133 
   134 
   135 (* stopper *)
   136 
   137 fun mk_eof pos = Token (EOF, "", (pos, Position.none));
   138 val eof = mk_eof Position.none;
   139 
   140 fun is_eof (Token (EOF, _, _)) = true
   141   | is_eof _ = false;
   142 
   143 val stopper = Scan.stopper (K eof) is_eof;
   144 
   145 
   146 (* terminal arguments *)
   147 
   148 val idT = Type ("id", []);
   149 val longidT = Type ("longid", []);
   150 val varT = Type ("var", []);
   151 val tidT = Type ("tid", []);
   152 val tvarT = Type ("tvar", []);
   153 
   154 val terminal_kinds =
   155  [("id", IdentSy),
   156   ("longid", LongIdentSy),
   157   ("var", VarSy),
   158   ("tid", TFreeSy),
   159   ("tvar", TVarSy),
   160   ("num_token", NumSy),
   161   ("float_token", FloatSy),
   162   ("xnum_token", XNumSy),
   163   ("str_token", StrSy)];
   164 
   165 val terminals = map #1 terminal_kinds;
   166 val is_terminal = member (op =) terminals;
   167 
   168 
   169 (* markup *)
   170 
   171 fun literal_markup s =
   172   if Symbol.is_ascii_identifier s then Markup.literal else Markup.delimiter;
   173 
   174 val token_kind_markup =
   175  fn VarSy   => Markup.var
   176   | TFreeSy => Markup.tfree
   177   | TVarSy  => Markup.tvar
   178   | NumSy   => Markup.numeral
   179   | FloatSy => Markup.numeral
   180   | XNumSy  => Markup.numeral
   181   | StrSy   => Markup.inner_string
   182   | Comment => Markup.inner_comment
   183   | _       => Markup.empty;
   184 
   185 fun report_of_token (Token (kind, s, (pos, _))) =
   186   let val markup = if kind = Literal then literal_markup s else token_kind_markup kind
   187   in (pos, markup) end;
   188 
   189 fun reported_token_range ctxt tok =
   190   if is_proper tok
   191   then Context_Position.reported_text ctxt (pos_of_token tok) Markup.token_range ""
   192   else "";
   193 
   194 
   195 (* matching_tokens *)
   196 
   197 fun matching_tokens (Token (Literal, x, _), Token (Literal, y, _)) = x = y
   198   | matching_tokens (Token (k, _, _), Token (k', _, _)) = k = k';
   199 
   200 
   201 (* valued_token *)
   202 
   203 fun valued_token (Token (Literal, _, _)) = false
   204   | valued_token (Token (EOF, _, _)) = false
   205   | valued_token _ = true;
   206 
   207 
   208 (* predef_term *)
   209 
   210 fun predef_term s =
   211   (case AList.lookup (op =) terminal_kinds s of
   212     SOME sy => SOME (Token (sy, s, Position.no_range))
   213   | NONE => NONE);
   214 
   215 
   216 (* str tokens *)
   217 
   218 val scan_chr =
   219   $$$ "\\" |-- $$$ "'" ||
   220   Scan.one
   221     ((fn s => s <> "\\" andalso s <> "'" andalso Symbol.is_regular s) o
   222       Symbol_Pos.symbol) >> single ||
   223   $$$ "'" --| Scan.ahead (~$$$ "'");
   224 
   225 val scan_str =
   226   $$$ "'" @@@ $$$ "'" @@@ !!! "missing end of string"
   227     ((Scan.repeat scan_chr >> flat) @@@ $$$ "'" @@@ $$$ "'");
   228 
   229 val scan_str_body =
   230   $$$ "'" |-- $$$ "'" |-- !!! "missing end of string"
   231     ((Scan.repeat scan_chr >> flat) --| $$$ "'" --| $$$ "'");
   232 
   233 
   234 fun implode_str cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs));
   235 
   236 fun explode_str str =
   237   (case Scan.read Symbol_Pos.stopper scan_str_body (Symbol_Pos.explode (str, Position.none)) of
   238     SOME cs => map Symbol_Pos.symbol cs
   239   | _ => error ("Inner lexical error: literal string expected at " ^ quote str));
   240 
   241 
   242 
   243 (** tokenize **)
   244 
   245 fun token_leq (Token (_, s1, _), Token (_, s2, _)) = s1 <= s2;
   246 fun token kind ss = Token (kind, Symbol_Pos.content ss, Symbol_Pos.range ss);
   247 
   248 fun tokenize lex xids syms =
   249   let
   250     val scan_xid =
   251       if xids then $$$ "_" @@@ scan_id || scan_id
   252       else scan_id;
   253 
   254     val scan_num = scan_hex || scan_bin || scan_int;
   255 
   256     val scan_val =
   257       scan_tvar >> token TVarSy ||
   258       scan_var >> token VarSy ||
   259       scan_tid >> token TFreeSy ||
   260       scan_float >> token FloatSy ||
   261       scan_num >> token NumSy ||
   262       $$$ "#" @@@ scan_num >> token XNumSy ||
   263       scan_longid >> token LongIdentSy ||
   264       scan_xid >> token IdentSy;
   265 
   266     val scan_lit = Scan.literal lex >> token Literal;
   267 
   268     val scan_token =
   269       Symbol_Pos.scan_comment !!! >> token Comment ||
   270       Scan.max token_leq scan_lit scan_val ||
   271       scan_str >> token StrSy ||
   272       Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol) >> token Space;
   273   in
   274     (case Scan.error
   275         (Scan.finite Symbol_Pos.stopper (Scan.repeat scan_token)) syms of
   276       (toks, []) => toks
   277     | (_, ss) => error ("Inner lexical error at: " ^ Symbol_Pos.content ss ^
   278         Position.here (#1 (Symbol_Pos.range ss))))
   279   end;
   280 
   281 
   282 
   283 (** scan variables **)
   284 
   285 (* scan_indexname *)
   286 
   287 local
   288 
   289 val scan_vname =
   290   let
   291     fun nat n [] = n
   292       | nat n (c :: cs) = nat (n * 10 + (ord c - ord "0")) cs;
   293 
   294     fun idxname cs ds = (implode (rev cs), nat 0 ds);
   295     fun chop_idx [] ds = idxname [] ds
   296       | chop_idx (cs as (_ :: "\\<^sub>" :: _)) ds = idxname cs ds
   297       | chop_idx (cs as (_ :: "\\<^isub>" :: _)) ds = idxname cs ds
   298       | chop_idx (cs as (_ :: "\\<^isup>" :: _)) ds = idxname cs ds
   299       | chop_idx (c :: cs) ds =
   300           if Symbol.is_digit c then chop_idx cs (c :: ds)
   301           else idxname (c :: cs) ds;
   302 
   303     val scan =
   304       (scan_id >> map Symbol_Pos.symbol) --
   305       Scan.optional ($$$ "." |-- scan_nat >> (nat 0 o map Symbol_Pos.symbol)) ~1;
   306   in
   307     scan >>
   308       (fn (cs, ~1) => chop_idx (rev cs) []
   309         | (cs, i) => (implode cs, i))
   310   end;
   311 
   312 in
   313 
   314 val scan_indexname = $$$ "'" |-- scan_vname >> (fn (x, i) => ("'" ^ x, i)) || scan_vname;
   315 
   316 end;
   317 
   318 
   319 (* indexname *)
   320 
   321 fun read_indexname s =
   322   (case Scan.read Symbol_Pos.stopper scan_indexname (Symbol_Pos.explode (s, Position.none)) of
   323     SOME xi => xi
   324   | _ => error ("Lexical error in variable name: " ^ quote s));
   325 
   326 
   327 (* read_var *)
   328 
   329 fun read_var str =
   330   let
   331     val scan =
   332       $$$ "?" |-- scan_indexname --| Scan.ahead (Scan.one Symbol_Pos.is_eof)
   333         >> Syntax.var ||
   334       Scan.many (Symbol.is_regular o Symbol_Pos.symbol)
   335         >> (Syntax.free o implode o map Symbol_Pos.symbol);
   336   in the (Scan.read Symbol_Pos.stopper scan (Symbol_Pos.explode (str, Position.none))) end;
   337 
   338 
   339 (* read_variable *)
   340 
   341 fun read_variable str =
   342   let val scan = $$$ "?" |-- scan_indexname || scan_indexname
   343   in Scan.read Symbol_Pos.stopper scan (Symbol_Pos.explode (str, Position.none)) end;
   344 
   345 
   346 (* read numbers *)
   347 
   348 local
   349 
   350 fun nat cs =
   351   Option.map (#1 o Library.read_int o map Symbol_Pos.symbol)
   352     (Scan.read Symbol_Pos.stopper scan_nat cs);
   353 
   354 in
   355 
   356 fun read_nat s = nat (Symbol_Pos.explode (s, Position.none));
   357 
   358 fun read_int s =
   359   (case Symbol_Pos.explode (s, Position.none) of
   360     ("-", _) :: cs => Option.map ~ (nat cs)
   361   | cs => nat cs);
   362 
   363 end;
   364 
   365 
   366 (* read_xnum: hex/bin/decimal *)
   367 
   368 local
   369 
   370 val ten = ord "0" + 10;
   371 val a = ord "a";
   372 val A = ord "A";
   373 val _ = a > A orelse raise Fail "Bad ASCII";
   374 
   375 fun remap_hex c =
   376   let val x = ord c in
   377     if x >= a then chr (x - a + ten)
   378     else if x >= A then chr (x - A + ten)
   379     else c
   380   end;
   381 
   382 fun leading_zeros ["0"] = 0
   383   | leading_zeros ("0" :: cs) = 1 + leading_zeros cs
   384   | leading_zeros _ = 0;
   385 
   386 in
   387 
   388 fun read_xnum str =
   389   let
   390     val (sign, radix, digs) =
   391       (case Symbol.explode (perhaps (try (unprefix "#")) str) of
   392         "0" :: "x" :: cs => (1, 16, map remap_hex cs)
   393       | "0" :: "b" :: cs => (1, 2, cs)
   394       | "-" :: cs => (~1, 10, cs)
   395       | cs => (1, 10, cs));
   396   in
   397    {radix = radix,
   398     leading_zeros = leading_zeros digs,
   399     value = sign * #1 (Library.read_radix_int radix digs)}
   400   end;
   401 
   402 end;
   403 
   404 fun read_float str =
   405   let
   406     val (sign, cs) =
   407       (case Symbol.explode str of
   408         "-" :: cs => (~1, cs)
   409       | cs => (1, cs));
   410     val (intpart, fracpart) =
   411       (case take_prefix Symbol.is_digit cs of
   412         (intpart, "." :: fracpart) => (intpart, fracpart)
   413       | _ => raise Fail "read_float");
   414   in
   415    {mant = sign * #1 (Library.read_int (intpart @ fracpart)),
   416     exp = length fracpart}
   417   end;
   418 
   419 
   420 (* marked logical entities *)
   421 
   422 fun marker s = (prefix s, unprefix s);
   423 
   424 val (mark_class, unmark_class) = marker "\\<^class>";
   425 val (mark_type, unmark_type) = marker "\\<^type>";
   426 val (mark_const, unmark_const) = marker "\\<^const>";
   427 val (mark_fixed, unmark_fixed) = marker "\\<^fixed>";
   428 
   429 fun unmark {case_class, case_type, case_const, case_fixed, case_default} s =
   430   (case try unmark_class s of
   431     SOME c => case_class c
   432   | NONE =>
   433       (case try unmark_type s of
   434         SOME c => case_type c
   435       | NONE =>
   436           (case try unmark_const s of
   437             SOME c => case_const c
   438           | NONE =>
   439               (case try unmark_fixed s of
   440                 SOME c => case_fixed c
   441               | NONE => case_default s))));
   442 
   443 val is_marked =
   444   unmark {case_class = K true, case_type = K true, case_const = K true,
   445     case_fixed = K true, case_default = K false};
   446 
   447 val dummy_type = Syntax.const (mark_type "dummy");
   448 val fun_type = Syntax.const (mark_type "fun");
   449 
   450 end;