src/Pure/Syntax/lexicon.ML
author wenzelm
Wed Oct 31 21:59:07 2001 +0100 (2001-10-31)
changeset 12004 1703de633aaf
parent 11697 8dd899efbd35
child 12785 27debaf2112d
permissions -rw-r--r--
IsarThy.theorem_i: no locale;
wenzelm@18
     1
(*  Title:      Pure/Syntax/lexicon.ML
clasohm@0
     2
    ID:         $Id$
wenzelm@18
     3
    Author:     Tobias Nipkow and Markus Wenzel, TU Muenchen
clasohm@0
     4
wenzelm@4703
     5
Lexer for the inner Isabelle syntax (terms and types).
wenzelm@18
     6
*)
clasohm@0
     7
clasohm@0
     8
signature LEXICON0 =
wenzelm@4247
     9
sig
clasohm@0
    10
  val is_identifier: string -> bool
wenzelm@4247
    11
  val implode_xstr: string list -> string
wenzelm@4247
    12
  val explode_xstr: string -> string list
wenzelm@4703
    13
  val scan_id: string list -> string * string list
wenzelm@4703
    14
  val scan_longid: string list -> string * string list
wenzelm@4703
    15
  val scan_var: string list -> string * string list
wenzelm@4703
    16
  val scan_tid: string list -> string * string list
wenzelm@4902
    17
  val scan_tvar: string list -> string * string list
wenzelm@4703
    18
  val scan_nat: string list -> string * string list
wenzelm@4703
    19
  val scan_int: string list -> string * string list
wenzelm@18
    20
  val string_of_vname: indexname -> string
wenzelm@2583
    21
  val string_of_vname': indexname -> string
wenzelm@4703
    22
  val indexname: string list -> indexname
wenzelm@4703
    23
  val read_var: string -> term
wenzelm@550
    24
  val const: string -> term
wenzelm@550
    25
  val free: string -> term
wenzelm@550
    26
  val var: indexname -> term
wenzelm@9289
    27
  val internal: string -> string
wenzelm@9289
    28
  val dest_internal: string -> string
wenzelm@5260
    29
  val skolem: string -> string
wenzelm@5286
    30
  val dest_skolem: string -> string
wenzelm@5860
    31
  val read_nat: string -> int option
wenzelm@9326
    32
  val read_xnum: string -> int
wenzelm@7784
    33
  val read_idents: string -> string list
wenzelm@4247
    34
end;
clasohm@0
    35
clasohm@0
    36
signature LEXICON =
wenzelm@4247
    37
sig
wenzelm@18
    38
  include LEXICON0
wenzelm@18
    39
  val is_xid: string -> bool
clasohm@330
    40
  val is_tid: string -> bool
wenzelm@18
    41
  datatype token =
wenzelm@18
    42
    Token of string |
wenzelm@18
    43
    IdentSy of string |
wenzelm@3828
    44
    LongIdentSy of string |
wenzelm@18
    45
    VarSy of string |
wenzelm@18
    46
    TFreeSy of string |
wenzelm@18
    47
    TVarSy of string |
wenzelm@550
    48
    NumSy of string |
wenzelm@11697
    49
    XNumSy of string |
wenzelm@550
    50
    StrSy of string |
wenzelm@237
    51
    EndToken
wenzelm@550
    52
  val idT: typ
wenzelm@3828
    53
  val longidT: typ
wenzelm@550
    54
  val varT: typ
wenzelm@550
    55
  val tidT: typ
wenzelm@550
    56
  val tvarT: typ
wenzelm@237
    57
  val terminals: string list
wenzelm@237
    58
  val is_terminal: string -> bool
wenzelm@18
    59
  val str_of_token: token -> string
wenzelm@18
    60
  val display_token: token -> string
wenzelm@18
    61
  val matching_tokens: token * token -> bool
clasohm@330
    62
  val token_assoc: (token option * 'a list) list * token -> 'a list
wenzelm@18
    63
  val valued_token: token -> bool
wenzelm@237
    64
  val predef_term: string -> token option
wenzelm@4703
    65
  val tokenize: Scan.lexicon -> bool -> string list -> token list
wenzelm@4247
    66
end;
clasohm@0
    67
paulson@1507
    68
structure Lexicon : LEXICON =
clasohm@0
    69
struct
clasohm@0
    70
wenzelm@4247
    71
wenzelm@18
    72
(** is_identifier etc. **)
wenzelm@18
    73
wenzelm@18
    74
fun is_ident [] = false
wenzelm@4703
    75
  | is_ident (c :: cs) = Symbol.is_letter c andalso forall Symbol.is_letdig cs;
wenzelm@18
    76
wenzelm@4703
    77
val is_identifier = is_ident o Symbol.explode;
wenzelm@18
    78
wenzelm@18
    79
fun is_xid s =
wenzelm@4703
    80
  (case Symbol.explode s of
wenzelm@18
    81
    "_" :: cs => is_ident cs
wenzelm@18
    82
  | cs => is_ident cs);
wenzelm@18
    83
clasohm@330
    84
fun is_tid s =
wenzelm@4703
    85
  (case Symbol.explode s of
wenzelm@18
    86
    "'" :: cs => is_ident cs
wenzelm@18
    87
  | _ => false);
wenzelm@18
    88
clasohm@0
    89
clasohm@0
    90
wenzelm@4703
    91
(** basic scanners **)
wenzelm@4703
    92
wenzelm@4703
    93
val scan_letter_letdigs = Scan.one Symbol.is_letter -- Scan.any Symbol.is_letdig >> op ::;
wenzelm@4703
    94
val scan_digits1 = Scan.any1 Symbol.is_digit;
wenzelm@4703
    95
wenzelm@4703
    96
val scan_id = scan_letter_letdigs >> implode;
wenzelm@4703
    97
val scan_longid = scan_id ^^ (Scan.repeat1 ($$ "." ^^ scan_id) >> implode);
wenzelm@4703
    98
val scan_tid = $$ "'" ^^ scan_id;
wenzelm@4703
    99
wenzelm@4703
   100
val scan_nat = scan_digits1 >> implode;
paulson@5513
   101
val scan_int = $$ "-" ^^ scan_nat || scan_nat;
wenzelm@4703
   102
wenzelm@4703
   103
val scan_id_nat = scan_id ^^ Scan.optional ($$ "." ^^ scan_nat) "";
wenzelm@4703
   104
val scan_var = $$ "?" ^^ scan_id_nat;
wenzelm@4902
   105
val scan_tvar = $$ "?" ^^ $$ "'" ^^ scan_id_nat;
wenzelm@4703
   106
wenzelm@4703
   107
wenzelm@4703
   108
wenzelm@18
   109
(** string_of_vname **)
clasohm@0
   110
wenzelm@18
   111
fun string_of_vname (x, i) =
wenzelm@18
   112
  let
wenzelm@18
   113
    val si = string_of_int i;
wenzelm@6962
   114
    val dot = if_none (try (Symbol.is_digit o last_elem o Symbol.explode) x) true;
wenzelm@18
   115
  in
wenzelm@4703
   116
    if dot then "?" ^ x ^ "." ^ si
wenzelm@4703
   117
    else if i = 0 then "?" ^ x
wenzelm@4703
   118
    else "?" ^ x ^ si
wenzelm@18
   119
  end;
clasohm@0
   120
wenzelm@4703
   121
fun string_of_vname' (x, ~1) = x
wenzelm@4703
   122
  | string_of_vname' xi = string_of_vname xi;
wenzelm@2583
   123
wenzelm@18
   124
clasohm@0
   125
wenzelm@18
   126
(** datatype token **)
clasohm@0
   127
wenzelm@18
   128
datatype token =
wenzelm@18
   129
  Token of string |
wenzelm@18
   130
  IdentSy of string |
wenzelm@3828
   131
  LongIdentSy of string |
wenzelm@18
   132
  VarSy of string |
wenzelm@18
   133
  TFreeSy of string |
wenzelm@18
   134
  TVarSy of string |
wenzelm@550
   135
  NumSy of string |
wenzelm@11697
   136
  XNumSy of string |
wenzelm@550
   137
  StrSy of string |
wenzelm@18
   138
  EndToken;
clasohm@0
   139
clasohm@0
   140
wenzelm@237
   141
(* terminal arguments *)
clasohm@0
   142
wenzelm@550
   143
val idT = Type ("id", []);
wenzelm@3828
   144
val longidT = Type ("longid", []);
wenzelm@550
   145
val varT = Type ("var", []);
wenzelm@550
   146
val tidT = Type ("tid", []);
wenzelm@550
   147
val tvarT = Type ("tvar", []);
clasohm@0
   148
wenzelm@11697
   149
val terminals = ["id", "longid", "var", "tid", "tvar", "num", "xnum", "xstr"];
wenzelm@237
   150
wenzelm@237
   151
fun is_terminal s = s mem terminals;
wenzelm@237
   152
clasohm@0
   153
wenzelm@18
   154
(* str_of_token *)
clasohm@0
   155
wenzelm@18
   156
fun str_of_token (Token s) = s
wenzelm@18
   157
  | str_of_token (IdentSy s) = s
wenzelm@3828
   158
  | str_of_token (LongIdentSy s) = s
wenzelm@18
   159
  | str_of_token (VarSy s) = s
wenzelm@18
   160
  | str_of_token (TFreeSy s) = s
wenzelm@18
   161
  | str_of_token (TVarSy s) = s
wenzelm@550
   162
  | str_of_token (NumSy s) = s
wenzelm@11697
   163
  | str_of_token (XNumSy s) = s
wenzelm@550
   164
  | str_of_token (StrSy s) = s
wenzelm@376
   165
  | str_of_token EndToken = "EOF";
clasohm@0
   166
wenzelm@18
   167
wenzelm@18
   168
(* display_token *)
clasohm@0
   169
wenzelm@18
   170
fun display_token (Token s) = quote s
wenzelm@18
   171
  | display_token (IdentSy s) = "id(" ^ s ^ ")"
wenzelm@3828
   172
  | display_token (LongIdentSy s) = "longid(" ^ s ^ ")"
wenzelm@18
   173
  | display_token (VarSy s) = "var(" ^ s ^ ")"
clasohm@330
   174
  | display_token (TFreeSy s) = "tid(" ^ s ^ ")"
wenzelm@18
   175
  | display_token (TVarSy s) = "tvar(" ^ s ^ ")"
wenzelm@11697
   176
  | display_token (NumSy s) = "num(" ^ s ^ ")"
wenzelm@11697
   177
  | display_token (XNumSy s) = "xnum(" ^ s ^ ")"
wenzelm@550
   178
  | display_token (StrSy s) = "xstr(" ^ s ^ ")"
wenzelm@18
   179
  | display_token EndToken = "";
clasohm@0
   180
wenzelm@18
   181
wenzelm@18
   182
(* matching_tokens *)
clasohm@0
   183
wenzelm@18
   184
fun matching_tokens (Token x, Token y) = (x = y)
wenzelm@18
   185
  | matching_tokens (IdentSy _, IdentSy _) = true
wenzelm@3828
   186
  | matching_tokens (LongIdentSy _, LongIdentSy _) = true
wenzelm@18
   187
  | matching_tokens (VarSy _, VarSy _) = true
wenzelm@18
   188
  | matching_tokens (TFreeSy _, TFreeSy _) = true
wenzelm@18
   189
  | matching_tokens (TVarSy _, TVarSy _) = true
wenzelm@550
   190
  | matching_tokens (NumSy _, NumSy _) = true
wenzelm@11697
   191
  | matching_tokens (XNumSy _, XNumSy _) = true
wenzelm@550
   192
  | matching_tokens (StrSy _, StrSy _) = true
wenzelm@18
   193
  | matching_tokens (EndToken, EndToken) = true
wenzelm@18
   194
  | matching_tokens _ = false;
clasohm@0
   195
clasohm@0
   196
wenzelm@376
   197
(* token_assoc *)
wenzelm@376
   198
clasohm@330
   199
fun token_assoc (list, key) =
wenzelm@376
   200
  let
wenzelm@376
   201
    fun assoc [] = []
wenzelm@376
   202
      | assoc ((keyi, xi) :: pairs) =
wenzelm@376
   203
          if is_none keyi orelse matching_tokens (the keyi, key) then
wenzelm@376
   204
            assoc pairs @ xi
wenzelm@376
   205
          else assoc pairs;
clasohm@330
   206
  in assoc list end;
clasohm@330
   207
clasohm@330
   208
wenzelm@18
   209
(* valued_token *)
clasohm@0
   210
wenzelm@18
   211
fun valued_token (Token _) = false
wenzelm@18
   212
  | valued_token (IdentSy _) = true
wenzelm@3828
   213
  | valued_token (LongIdentSy _) = true
wenzelm@18
   214
  | valued_token (VarSy _) = true
wenzelm@18
   215
  | valued_token (TFreeSy _) = true
wenzelm@18
   216
  | valued_token (TVarSy _) = true
wenzelm@550
   217
  | valued_token (NumSy _) = true
wenzelm@11697
   218
  | valued_token (XNumSy _) = true
wenzelm@550
   219
  | valued_token (StrSy _) = true
wenzelm@18
   220
  | valued_token EndToken = false;
clasohm@0
   221
clasohm@0
   222
wenzelm@18
   223
(* predef_term *)
clasohm@0
   224
wenzelm@550
   225
fun predef_term "id" = Some (IdentSy "id")
wenzelm@3828
   226
  | predef_term "longid" = Some (LongIdentSy "longid")
wenzelm@550
   227
  | predef_term "var" = Some (VarSy "var")
wenzelm@550
   228
  | predef_term "tid" = Some (TFreeSy "tid")
wenzelm@550
   229
  | predef_term "tvar" = Some (TVarSy "tvar")
wenzelm@11697
   230
  | predef_term "num" = Some (NumSy "num")
wenzelm@11697
   231
  | predef_term "xnum" = Some (XNumSy "xnum")
wenzelm@550
   232
  | predef_term "xstr" = Some (StrSy "xstr")
wenzelm@550
   233
  | predef_term _ = None;
clasohm@0
   234
clasohm@0
   235
wenzelm@4703
   236
(* xstr tokens *)
wenzelm@18
   237
wenzelm@4703
   238
val scan_chr =
wenzelm@4703
   239
  $$ "\\" |-- Scan.one Symbol.not_eof ||
wenzelm@4703
   240
  Scan.one (not_equal "'" andf Symbol.not_eof) ||
wenzelm@4703
   241
  $$ "'" --| Scan.ahead (Scan.one (not_equal "'"));
wenzelm@18
   242
wenzelm@4703
   243
val scan_str =
wenzelm@4703
   244
  $$ "'" |-- $$ "'" |--
wenzelm@4921
   245
    !! (fn (cs, _) => "Inner lexical error: malformed literal string at " ^
wenzelm@5112
   246
      quote ("''" ^ Symbol.beginning cs))
wenzelm@4921
   247
    (Scan.repeat scan_chr --| $$ "'" --| $$ "'");
wenzelm@18
   248
clasohm@0
   249
wenzelm@4703
   250
fun implode_xstr cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs));
wenzelm@18
   251
wenzelm@4703
   252
fun explode_xstr str =
wenzelm@5868
   253
  (case Scan.read Symbol.stopper scan_str (Symbol.explode str) of
wenzelm@5868
   254
    Some cs => cs
wenzelm@5868
   255
  | _ => error ("Inner lexical error: literal string expected at " ^ quote str));
wenzelm@18
   256
wenzelm@18
   257
wenzelm@18
   258
wenzelm@18
   259
(** tokenize **)
wenzelm@18
   260
wenzelm@2363
   261
fun tokenize lex xids chs =
wenzelm@18
   262
  let
wenzelm@18
   263
    val scan_xid =
wenzelm@18
   264
      if xids then $$ "_" ^^ scan_id || scan_id
wenzelm@18
   265
      else scan_id;
wenzelm@18
   266
wenzelm@550
   267
    val scan_val =
wenzelm@4902
   268
      scan_tvar >> pair TVarSy ||
wenzelm@4703
   269
      scan_var >> pair VarSy ||
wenzelm@4703
   270
      scan_tid >> pair TFreeSy ||
wenzelm@11697
   271
      scan_int >> pair NumSy ||
wenzelm@11697
   272
      $$ "#" ^^ scan_int >> pair XNumSy ||
wenzelm@3828
   273
      scan_longid >> pair LongIdentSy ||
wenzelm@18
   274
      scan_xid >> pair IdentSy;
wenzelm@18
   275
wenzelm@4703
   276
    val scan_lit = Scan.literal lex >> (pair Token o implode);
wenzelm@550
   277
wenzelm@4703
   278
    val scan_token =
wenzelm@4703
   279
      Scan.max (op <= o pairself snd) scan_lit scan_val >> (fn (tk, s) => Some (tk s)) ||
wenzelm@4703
   280
      scan_str >> (Some o StrSy o implode_xstr) ||
wenzelm@4703
   281
      Scan.one Symbol.is_blank >> K None;
wenzelm@18
   282
  in
wenzelm@4938
   283
    (case Scan.error (Scan.finite Symbol.stopper (Scan.repeat scan_token)) chs of
wenzelm@4703
   284
      (toks, []) => mapfilter I toks @ [EndToken]
wenzelm@4703
   285
    | (_, cs) => error ("Inner lexical error at: " ^ quote (implode cs)))
wenzelm@18
   286
  end;
wenzelm@18
   287
wenzelm@18
   288
wenzelm@18
   289
wenzelm@18
   290
(** scan variables **)
wenzelm@18
   291
wenzelm@18
   292
(* scan_vname *)
wenzelm@18
   293
wenzelm@18
   294
fun scan_vname chrs =
wenzelm@18
   295
  let
wenzelm@18
   296
    fun nat_of_chs n [] = n
wenzelm@18
   297
      | nat_of_chs n (c :: cs) = nat_of_chs (n * 10 + (ord c - ord "0")) cs;
wenzelm@18
   298
wenzelm@4703
   299
    val nat = nat_of_chs 0;
wenzelm@18
   300
wenzelm@18
   301
    fun split_vname chs =
wenzelm@4703
   302
      let val (cs, ds) = take_suffix Symbol.is_digit chs
wenzelm@4703
   303
      in (implode cs, nat ds) end
wenzelm@18
   304
wenzelm@18
   305
    val scan =
wenzelm@4703
   306
      scan_letter_letdigs -- Scan.optional ($$ "." |-- scan_digits1 >> nat) ~1;
wenzelm@18
   307
  in
wenzelm@18
   308
    (case scan chrs of
wenzelm@18
   309
      ((cs, ~1), cs') => (split_vname cs, cs')
wenzelm@18
   310
    | ((cs, i), cs') => ((implode cs, i), cs'))
wenzelm@18
   311
  end;
wenzelm@18
   312
wenzelm@18
   313
wenzelm@4703
   314
(* indexname *)
wenzelm@18
   315
wenzelm@4703
   316
fun indexname cs =
wenzelm@5868
   317
  (case Scan.read Symbol.stopper scan_vname cs of
wenzelm@5868
   318
    Some xi => xi
wenzelm@4703
   319
  | _ => error ("Lexical error in variable name: " ^ quote (implode cs)));
wenzelm@18
   320
wenzelm@18
   321
wenzelm@4703
   322
(* read_var *)
wenzelm@18
   323
wenzelm@550
   324
fun const c = Const (c, dummyT);
wenzelm@550
   325
fun free x = Free (x, dummyT);
wenzelm@550
   326
fun var xi = Var (xi, dummyT);
wenzelm@550
   327
wenzelm@4703
   328
fun read_var str =
wenzelm@18
   329
  let
wenzelm@550
   330
    fun tvar (x, i) = var ("'" ^ x, i);
wenzelm@18
   331
wenzelm@18
   332
    val scan =
wenzelm@4703
   333
      $$ "?" |-- $$ "'" |-- scan_vname >> tvar ||
wenzelm@4703
   334
      $$ "?" |-- scan_vname >> var ||
wenzelm@4703
   335
      Scan.any Symbol.not_eof >> (free o implode);
wenzelm@5868
   336
  in the (Scan.read Symbol.stopper scan (Symbol.explode str)) end;
wenzelm@4587
   337
wenzelm@4587
   338
wenzelm@5260
   339
(* variable kinds *)
wenzelm@5260
   340
wenzelm@9289
   341
val internal = suffix "_";
wenzelm@9289
   342
val dest_internal = unsuffix "_";
wenzelm@9289
   343
wenzelm@5286
   344
val skolem = suffix "__";
wenzelm@5286
   345
val dest_skolem = unsuffix "__";
wenzelm@5260
   346
wenzelm@5260
   347
wenzelm@5860
   348
(* read_nat *)
wenzelm@5860
   349
wenzelm@5860
   350
fun read_nat str =
wenzelm@5868
   351
  apsome (#1 o Term.read_int) (Scan.read Symbol.stopper scan_digits1 (Symbol.explode str));
wenzelm@5860
   352
wenzelm@5860
   353
wenzelm@9326
   354
(* read_xnum *)
wenzelm@9326
   355
wenzelm@9326
   356
fun read_xnum str =
wenzelm@9326
   357
  let
wenzelm@9326
   358
    val (sign, digs) =
wenzelm@9326
   359
      (case Symbol.explode str of
wenzelm@9326
   360
        "#" :: "-" :: cs => (~1, cs)
wenzelm@9326
   361
      | "#" :: cs => (1, cs)
wenzelm@11697
   362
      | "-" :: cs => (~1, cs)
wenzelm@11697
   363
      | cs => (1, cs));
wenzelm@9326
   364
  in sign * #1 (Term.read_int digs) end;
wenzelm@9326
   365
wenzelm@9326
   366
wenzelm@7784
   367
(* read_ident(s) *)
wenzelm@7784
   368
wenzelm@7784
   369
fun read_idents str =
wenzelm@7784
   370
  let
wenzelm@7784
   371
    val blanks = Scan.any Symbol.is_blank;
wenzelm@7784
   372
    val junk = Scan.any Symbol.not_eof;
wenzelm@7784
   373
    val idents = Scan.repeat1 (blanks |-- scan_id --| blanks) -- junk;
wenzelm@7784
   374
  in
wenzelm@7784
   375
    (case Scan.read Symbol.stopper idents (Symbol.explode str) of
wenzelm@7784
   376
      Some (ids, []) => ids
wenzelm@7784
   377
    | Some (_, bad) => error ("Bad identifier: " ^ quote (implode bad))
wenzelm@7784
   378
    | None => error ("No identifier found in: " ^ quote str))
wenzelm@7784
   379
  end;
wenzelm@7784
   380
wenzelm@7784
   381
clasohm@0
   382
end;