src/Pure/Syntax/lexicon.ML
author wenzelm
Mon, 09 Nov 1998 15:42:08 +0100
changeset 5840 e2d2b896c717
parent 5513 3896c7894a57
child 5860 ed11c9890852
permissions -rw-r--r--
Object logic specific operations.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
     1
(*  Title:      Pure/Syntax/lexicon.ML
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
     2
    ID:         $Id$
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
     3
    Author:     Tobias Nipkow and Markus Wenzel, TU Muenchen
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
     4
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
     5
Lexer for the inner Isabelle syntax (terms and types).
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
     6
*)
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
     7
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
     8
signature LEXICON0 =
4247
9bba9251bb4d added implode_xstr: string list -> string, explode_xstr: string -> string list;
wenzelm
parents: 3828
diff changeset
     9
sig
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    10
  val is_identifier: string -> bool
4247
9bba9251bb4d added implode_xstr: string list -> string, explode_xstr: string -> string list;
wenzelm
parents: 3828
diff changeset
    11
  val implode_xstr: string list -> string
9bba9251bb4d added implode_xstr: string list -> string, explode_xstr: string -> string list;
wenzelm
parents: 3828
diff changeset
    12
  val explode_xstr: string -> string list
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    13
  val scan_id: string list -> string * string list
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    14
  val scan_longid: string list -> string * string list
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    15
  val scan_var: string list -> string * string list
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    16
  val scan_tid: string list -> string * string list
4902
8fbccead3695 added scan_tvar;
wenzelm
parents: 4703
diff changeset
    17
  val scan_tvar: string list -> string * string list
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    18
  val scan_nat: string list -> string * string list
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    19
  val scan_int: string list -> string * string list
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    20
  val string_of_vname: indexname -> string
2583
690835a06cf2 added string_of_vname' (treats neg. index as free);
wenzelm
parents: 2363
diff changeset
    21
  val string_of_vname': indexname -> string
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    22
  val indexname: string list -> indexname
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    23
  val read_var: string -> term
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
    24
  val const: string -> term
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
    25
  val free: string -> term
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
    26
  val var: indexname -> term
5260
1835a591d3a7 binding / skolem vars;
wenzelm
parents: 5112
diff changeset
    27
  val binding: string -> string
5286
cfb74a99182c dest_binding, dest_skolem;
wenzelm
parents: 5260
diff changeset
    28
  val dest_binding: string -> string
5260
1835a591d3a7 binding / skolem vars;
wenzelm
parents: 5112
diff changeset
    29
  val skolem: string -> string
5286
cfb74a99182c dest_binding, dest_skolem;
wenzelm
parents: 5260
diff changeset
    30
  val dest_skolem: string -> string
4247
9bba9251bb4d added implode_xstr: string list -> string, explode_xstr: string -> string list;
wenzelm
parents: 3828
diff changeset
    31
end;
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    32
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    33
signature LEXICON =
4247
9bba9251bb4d added implode_xstr: string list -> string, explode_xstr: string -> string list;
wenzelm
parents: 3828
diff changeset
    34
sig
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    35
  include LEXICON0
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    36
  val is_xid: string -> bool
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 237
diff changeset
    37
  val is_tid: string -> bool
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    38
  datatype token =
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    39
    Token of string |
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    40
    IdentSy of string |
3828
f6a7ca242dc2 added longid;
wenzelm
parents: 2583
diff changeset
    41
    LongIdentSy of string |
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    42
    VarSy of string |
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    43
    TFreeSy of string |
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    44
    TVarSy of string |
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
    45
    NumSy of string |
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
    46
    StrSy of string |
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 164
diff changeset
    47
    EndToken
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
    48
  val idT: typ
3828
f6a7ca242dc2 added longid;
wenzelm
parents: 2583
diff changeset
    49
  val longidT: typ
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
    50
  val varT: typ
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
    51
  val tidT: typ
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
    52
  val tvarT: typ
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 164
diff changeset
    53
  val terminals: string list
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 164
diff changeset
    54
  val is_terminal: string -> bool
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    55
  val str_of_token: token -> string
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    56
  val display_token: token -> string
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    57
  val matching_tokens: token * token -> bool
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 237
diff changeset
    58
  val token_assoc: (token option * 'a list) list * token -> 'a list
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    59
  val valued_token: token -> bool
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 164
diff changeset
    60
  val predef_term: string -> token option
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    61
  val tokenize: Scan.lexicon -> bool -> string list -> token list
4247
9bba9251bb4d added implode_xstr: string list -> string, explode_xstr: string -> string list;
wenzelm
parents: 3828
diff changeset
    62
end;
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    63
1507
f600215b6ea7 Elimination of fully-functorial style.
paulson
parents: 1143
diff changeset
    64
structure Lexicon : LEXICON =
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    65
struct
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    66
4247
9bba9251bb4d added implode_xstr: string list -> string, explode_xstr: string -> string list;
wenzelm
parents: 3828
diff changeset
    67
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    68
(** is_identifier etc. **)
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    69
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    70
fun is_ident [] = false
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    71
  | is_ident (c :: cs) = Symbol.is_letter c andalso forall Symbol.is_letdig cs;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    72
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    73
val is_identifier = is_ident o Symbol.explode;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    74
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    75
fun is_xid s =
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    76
  (case Symbol.explode s of
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    77
    "_" :: cs => is_ident cs
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    78
  | cs => is_ident cs);
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    79
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 237
diff changeset
    80
fun is_tid s =
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    81
  (case Symbol.explode s of
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    82
    "'" :: cs => is_ident cs
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    83
  | _ => false);
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    84
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    85
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    86
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    87
(** basic scanners **)
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    88
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    89
val scan_letter_letdigs = Scan.one Symbol.is_letter -- Scan.any Symbol.is_letdig >> op ::;
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    90
val scan_digits1 = Scan.any1 Symbol.is_digit;
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    91
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    92
val scan_id = scan_letter_letdigs >> implode;
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    93
val scan_longid = scan_id ^^ (Scan.repeat1 ($$ "." ^^ scan_id) >> implode);
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    94
val scan_tid = $$ "'" ^^ scan_id;
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    95
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    96
val scan_nat = scan_digits1 >> implode;
5513
3896c7894a57 Unary minus is now #- and not #~
paulson
parents: 5286
diff changeset
    97
val scan_int = $$ "-" ^^ scan_nat || scan_nat;
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    98
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
    99
val scan_id_nat = scan_id ^^ Scan.optional ($$ "." ^^ scan_nat) "";
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   100
val scan_var = $$ "?" ^^ scan_id_nat;
4902
8fbccead3695 added scan_tvar;
wenzelm
parents: 4703
diff changeset
   101
val scan_tvar = $$ "?" ^^ $$ "'" ^^ scan_id_nat;
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   102
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   103
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   104
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   105
(** string_of_vname **)
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   106
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   107
fun string_of_vname (x, i) =
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   108
  let
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   109
    val si = string_of_int i;
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   110
    val dot = Symbol.is_digit (last_elem (Symbol.explode x)) handle _ => true;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   111
  in
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   112
    if dot then "?" ^ x ^ "." ^ si
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   113
    else if i = 0 then "?" ^ x
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   114
    else "?" ^ x ^ si
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   115
  end;
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   116
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   117
fun string_of_vname' (x, ~1) = x
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   118
  | string_of_vname' xi = string_of_vname xi;
2583
690835a06cf2 added string_of_vname' (treats neg. index as free);
wenzelm
parents: 2363
diff changeset
   119
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   120
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   121
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   122
(** datatype token **)
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   123
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   124
datatype token =
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   125
  Token of string |
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   126
  IdentSy of string |
3828
f6a7ca242dc2 added longid;
wenzelm
parents: 2583
diff changeset
   127
  LongIdentSy of string |
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   128
  VarSy of string |
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   129
  TFreeSy of string |
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   130
  TVarSy of string |
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   131
  NumSy of string |
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   132
  StrSy of string |
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   133
  EndToken;
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   134
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   135
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 164
diff changeset
   136
(* terminal arguments *)
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   137
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   138
val idT = Type ("id", []);
3828
f6a7ca242dc2 added longid;
wenzelm
parents: 2583
diff changeset
   139
val longidT = Type ("longid", []);
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   140
val varT = Type ("var", []);
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   141
val tidT = Type ("tid", []);
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   142
val tvarT = Type ("tvar", []);
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   143
3828
f6a7ca242dc2 added longid;
wenzelm
parents: 2583
diff changeset
   144
val terminals = ["id", "longid", "var", "tid", "tvar", "xnum", "xstr"];
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 164
diff changeset
   145
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 164
diff changeset
   146
fun is_terminal s = s mem terminals;
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 164
diff changeset
   147
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   148
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   149
(* str_of_token *)
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   150
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   151
fun str_of_token (Token s) = s
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   152
  | str_of_token (IdentSy s) = s
3828
f6a7ca242dc2 added longid;
wenzelm
parents: 2583
diff changeset
   153
  | str_of_token (LongIdentSy s) = s
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   154
  | str_of_token (VarSy s) = s
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   155
  | str_of_token (TFreeSy s) = s
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   156
  | str_of_token (TVarSy s) = s
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   157
  | str_of_token (NumSy s) = s
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   158
  | str_of_token (StrSy s) = s
376
d3d01131470f extended signature SCANNER by some basic scanners and type lexicon;
wenzelm
parents: 330
diff changeset
   159
  | str_of_token EndToken = "EOF";
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   160
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   161
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   162
(* display_token *)
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   163
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   164
fun display_token (Token s) = quote s
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   165
  | display_token (IdentSy s) = "id(" ^ s ^ ")"
3828
f6a7ca242dc2 added longid;
wenzelm
parents: 2583
diff changeset
   166
  | display_token (LongIdentSy s) = "longid(" ^ s ^ ")"
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   167
  | display_token (VarSy s) = "var(" ^ s ^ ")"
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 237
diff changeset
   168
  | display_token (TFreeSy s) = "tid(" ^ s ^ ")"
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   169
  | display_token (TVarSy s) = "tvar(" ^ s ^ ")"
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   170
  | display_token (NumSy s) = "xnum(" ^ s ^ ")"
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   171
  | display_token (StrSy s) = "xstr(" ^ s ^ ")"
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   172
  | display_token EndToken = "";
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   173
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   174
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   175
(* matching_tokens *)
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   176
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   177
fun matching_tokens (Token x, Token y) = (x = y)
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   178
  | matching_tokens (IdentSy _, IdentSy _) = true
3828
f6a7ca242dc2 added longid;
wenzelm
parents: 2583
diff changeset
   179
  | matching_tokens (LongIdentSy _, LongIdentSy _) = true
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   180
  | matching_tokens (VarSy _, VarSy _) = true
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   181
  | matching_tokens (TFreeSy _, TFreeSy _) = true
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   182
  | matching_tokens (TVarSy _, TVarSy _) = true
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   183
  | matching_tokens (NumSy _, NumSy _) = true
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   184
  | matching_tokens (StrSy _, StrSy _) = true
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   185
  | matching_tokens (EndToken, EndToken) = true
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   186
  | matching_tokens _ = false;
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   187
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   188
376
d3d01131470f extended signature SCANNER by some basic scanners and type lexicon;
wenzelm
parents: 330
diff changeset
   189
(* token_assoc *)
d3d01131470f extended signature SCANNER by some basic scanners and type lexicon;
wenzelm
parents: 330
diff changeset
   190
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 237
diff changeset
   191
fun token_assoc (list, key) =
376
d3d01131470f extended signature SCANNER by some basic scanners and type lexicon;
wenzelm
parents: 330
diff changeset
   192
  let
d3d01131470f extended signature SCANNER by some basic scanners and type lexicon;
wenzelm
parents: 330
diff changeset
   193
    fun assoc [] = []
d3d01131470f extended signature SCANNER by some basic scanners and type lexicon;
wenzelm
parents: 330
diff changeset
   194
      | assoc ((keyi, xi) :: pairs) =
d3d01131470f extended signature SCANNER by some basic scanners and type lexicon;
wenzelm
parents: 330
diff changeset
   195
          if is_none keyi orelse matching_tokens (the keyi, key) then
d3d01131470f extended signature SCANNER by some basic scanners and type lexicon;
wenzelm
parents: 330
diff changeset
   196
            assoc pairs @ xi
d3d01131470f extended signature SCANNER by some basic scanners and type lexicon;
wenzelm
parents: 330
diff changeset
   197
          else assoc pairs;
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 237
diff changeset
   198
  in assoc list end;
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 237
diff changeset
   199
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 237
diff changeset
   200
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   201
(* valued_token *)
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   202
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   203
fun valued_token (Token _) = false
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   204
  | valued_token (IdentSy _) = true
3828
f6a7ca242dc2 added longid;
wenzelm
parents: 2583
diff changeset
   205
  | valued_token (LongIdentSy _) = true
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   206
  | valued_token (VarSy _) = true
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   207
  | valued_token (TFreeSy _) = true
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   208
  | valued_token (TVarSy _) = true
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   209
  | valued_token (NumSy _) = true
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   210
  | valued_token (StrSy _) = true
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   211
  | valued_token EndToken = false;
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   212
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   213
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   214
(* predef_term *)
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   215
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   216
fun predef_term "id" = Some (IdentSy "id")
3828
f6a7ca242dc2 added longid;
wenzelm
parents: 2583
diff changeset
   217
  | predef_term "longid" = Some (LongIdentSy "longid")
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   218
  | predef_term "var" = Some (VarSy "var")
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   219
  | predef_term "tid" = Some (TFreeSy "tid")
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   220
  | predef_term "tvar" = Some (TVarSy "tvar")
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   221
  | predef_term "xnum" = Some (NumSy "xnum")
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   222
  | predef_term "xstr" = Some (StrSy "xstr")
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   223
  | predef_term _ = None;
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   224
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   225
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   226
(* xstr tokens *)
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   227
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   228
val scan_chr =
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   229
  $$ "\\" |-- Scan.one Symbol.not_eof ||
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   230
  Scan.one (not_equal "'" andf Symbol.not_eof) ||
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   231
  $$ "'" --| Scan.ahead (Scan.one (not_equal "'"));
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   232
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   233
val scan_str =
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   234
  $$ "'" |-- $$ "'" |--
4921
74bc10921f7d adapted to new Scan.fail_with / Scan.!!;
wenzelm
parents: 4902
diff changeset
   235
    !! (fn (cs, _) => "Inner lexical error: malformed literal string at " ^
5112
9e74cf11e4a4 Symbol.beginning;
wenzelm
parents: 4938
diff changeset
   236
      quote ("''" ^ Symbol.beginning cs))
4921
74bc10921f7d adapted to new Scan.fail_with / Scan.!!;
wenzelm
parents: 4902
diff changeset
   237
    (Scan.repeat scan_chr --| $$ "'" --| $$ "'");
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   238
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   239
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   240
fun implode_xstr cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs));
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   241
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   242
fun explode_xstr str =
4938
c8bbbf3c59fa Symbol.stopper;
wenzelm
parents: 4921
diff changeset
   243
  #1 (Scan.error (Scan.finite Symbol.stopper scan_str) (Symbol.explode str));
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   244
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   245
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   246
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   247
(** tokenize **)
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   248
2363
963285471dc5 tokenize: no gets exploded char list;
wenzelm
parents: 1507
diff changeset
   249
fun tokenize lex xids chs =
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   250
  let
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   251
    val scan_xid =
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   252
      if xids then $$ "_" ^^ scan_id || scan_id
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   253
      else scan_id;
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   254
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   255
    val scan_val =
4902
8fbccead3695 added scan_tvar;
wenzelm
parents: 4703
diff changeset
   256
      scan_tvar >> pair TVarSy ||
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   257
      scan_var >> pair VarSy ||
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   258
      scan_tid >> pair TFreeSy ||
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   259
      $$ "#" ^^ scan_int >> pair NumSy ||		(* FIXME remove "#" *)
3828
f6a7ca242dc2 added longid;
wenzelm
parents: 2583
diff changeset
   260
      scan_longid >> pair LongIdentSy ||
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   261
      scan_xid >> pair IdentSy;
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   262
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   263
    val scan_lit = Scan.literal lex >> (pair Token o implode);
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   264
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   265
    val scan_token =
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   266
      Scan.max (op <= o pairself snd) scan_lit scan_val >> (fn (tk, s) => Some (tk s)) ||
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   267
      scan_str >> (Some o StrSy o implode_xstr) ||
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   268
      Scan.one Symbol.is_blank >> K None;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   269
  in
4938
c8bbbf3c59fa Symbol.stopper;
wenzelm
parents: 4921
diff changeset
   270
    (case Scan.error (Scan.finite Symbol.stopper (Scan.repeat scan_token)) chs of
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   271
      (toks, []) => mapfilter I toks @ [EndToken]
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   272
    | (_, cs) => error ("Inner lexical error at: " ^ quote (implode cs)))
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   273
  end;
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   274
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   275
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   276
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   277
(** scan variables **)
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   278
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   279
(* scan_vname *)
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   280
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   281
fun scan_vname chrs =
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   282
  let
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   283
    fun nat_of_chs n [] = n
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   284
      | nat_of_chs n (c :: cs) = nat_of_chs (n * 10 + (ord c - ord "0")) cs;
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   285
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   286
    val nat = nat_of_chs 0;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   287
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   288
    fun split_vname chs =
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   289
      let val (cs, ds) = take_suffix Symbol.is_digit chs
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   290
      in (implode cs, nat ds) end
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   291
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   292
    val scan =
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   293
      scan_letter_letdigs -- Scan.optional ($$ "." |-- scan_digits1 >> nat) ~1;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   294
  in
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   295
    (case scan chrs of
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   296
      ((cs, ~1), cs') => (split_vname cs, cs')
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   297
    | ((cs, i), cs') => ((implode cs, i), cs'))
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   298
  end;
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   299
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   300
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   301
(* indexname *)
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   302
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   303
fun indexname cs =
4938
c8bbbf3c59fa Symbol.stopper;
wenzelm
parents: 4921
diff changeset
   304
  (case Scan.error (Scan.finite Symbol.stopper (Scan.option scan_vname)) cs of
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   305
    (Some xi, []) => xi
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   306
  | _ => error ("Lexical error in variable name: " ^ quote (implode cs)));
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   307
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   308
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   309
(* read_var *)
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   310
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   311
fun const c = Const (c, dummyT);
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   312
fun free x = Free (x, dummyT);
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   313
fun var xi = Var (xi, dummyT);
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   314
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   315
fun read_var str =
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   316
  let
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   317
    fun tvar (x, i) = var ("'" ^ x, i);
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   318
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   319
    val scan =
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   320
      $$ "?" |-- $$ "'" |-- scan_vname >> tvar ||
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   321
      $$ "?" |-- scan_vname >> var ||
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   322
      Scan.any Symbol.not_eof >> (free o implode);
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   323
  in
4938
c8bbbf3c59fa Symbol.stopper;
wenzelm
parents: 4921
diff changeset
   324
    #1 (Scan.error (Scan.finite Symbol.stopper scan) (Symbol.explode str))
4587
6bce9ef27d7e added read_var;
wenzelm
parents: 4247
diff changeset
   325
  end;
6bce9ef27d7e added read_var;
wenzelm
parents: 4247
diff changeset
   326
6bce9ef27d7e added read_var;
wenzelm
parents: 4247
diff changeset
   327
5260
1835a591d3a7 binding / skolem vars;
wenzelm
parents: 5112
diff changeset
   328
(* variable kinds *)
1835a591d3a7 binding / skolem vars;
wenzelm
parents: 5112
diff changeset
   329
5286
cfb74a99182c dest_binding, dest_skolem;
wenzelm
parents: 5260
diff changeset
   330
val binding = suffix "_BIND_";
cfb74a99182c dest_binding, dest_skolem;
wenzelm
parents: 5260
diff changeset
   331
val dest_binding = unsuffix "_BIND_";
5260
1835a591d3a7 binding / skolem vars;
wenzelm
parents: 5112
diff changeset
   332
5286
cfb74a99182c dest_binding, dest_skolem;
wenzelm
parents: 5260
diff changeset
   333
val skolem = suffix "__";
cfb74a99182c dest_binding, dest_skolem;
wenzelm
parents: 5260
diff changeset
   334
val dest_skolem = unsuffix "__";
5260
1835a591d3a7 binding / skolem vars;
wenzelm
parents: 5112
diff changeset
   335
1835a591d3a7 binding / skolem vars;
wenzelm
parents: 5112
diff changeset
   336
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   337
end;