src/Pure/Syntax/lexicon.ML
author wenzelm
Wed, 15 Feb 2012 13:24:22 +0100
changeset 46483 10a9c31a22b4
parent 45703 c7a13ce60161
child 48992 0518bf89c777
permissions -rw-r--r--
renamed "xstr" to "str_token";
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
     1
(*  Title:      Pure/Syntax/lexicon.ML
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
     2
    Author:     Tobias Nipkow and Markus Wenzel, TU Muenchen
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
     3
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
     4
Lexer for the inner Isabelle syntax (terms and types).
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
     5
*)
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
     6
42290
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
     7
signature LEXICON =
4247
9bba9251bb4d added implode_xstr: string list -> string, explode_xstr: string -> string list;
wenzelm
parents: 3828
diff changeset
     8
sig
42476
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
     9
  structure Syntax:
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
    10
  sig
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
    11
    val const: string -> term
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
    12
    val free: string -> term
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
    13
    val var: indexname -> term
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
    14
  end
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    15
  val is_identifier: string -> bool
14679
6ed90bd68eda added is_ascii_identifier;
wenzelm
parents: 13808
diff changeset
    16
  val is_ascii_identifier: string -> bool
42290
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    17
  val is_xid: string -> bool
20165
4de20306a88a export is_tid;
wenzelm
parents: 20096
diff changeset
    18
  val is_tid: string -> bool
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
    19
  val scan_id: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
    20
  val scan_longid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
    21
  val scan_tid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
    22
  val scan_nat: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
    23
  val scan_int: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
40290
47f572aff50a support for floating-point tokens in outer syntax (coinciding with inner syntax version);
wenzelm
parents: 39510
diff changeset
    24
  val scan_float: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
    25
  val scan_hex: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
    26
  val scan_bin: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
    27
  val scan_var: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
    28
  val scan_tvar: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
27800
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
    29
  datatype token_kind =
27887
9f3fd48cf673 token_kind: add Space, Comment;
wenzelm
parents: 27806
diff changeset
    30
    Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy |
28904
3ef9489eeef5 New lexical item "float".
nipkow
parents: 28413
diff changeset
    31
    NumSy | FloatSy | XNumSy | StrSy | Space | Comment | EOF
27800
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
    32
  datatype token = Token of token_kind * string * Position.range
27806
ece79c0597fe pos_of_token: Position.T;
wenzelm
parents: 27800
diff changeset
    33
  val str_of_token: token -> string
ece79c0597fe pos_of_token: Position.T;
wenzelm
parents: 27800
diff changeset
    34
  val pos_of_token: token -> Position.T
27887
9f3fd48cf673 token_kind: add Space, Comment;
wenzelm
parents: 27806
diff changeset
    35
  val is_proper: token -> bool
27800
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
    36
  val mk_eof: Position.T -> token
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
    37
  val eof: token
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
    38
  val is_eof: token -> bool
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
    39
  val stopper: token Scan.stopper
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
    40
  val idT: typ
3828
f6a7ca242dc2 added longid;
wenzelm
parents: 2583
diff changeset
    41
  val longidT: typ
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
    42
  val varT: typ
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
    43
  val tidT: typ
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
    44
  val tvarT: typ
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 164
diff changeset
    45
  val terminals: string list
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 164
diff changeset
    46
  val is_terminal: string -> bool
44736
c2a3f1c84179 bulk reports for improved message throughput;
wenzelm
parents: 44706
diff changeset
    47
  val report_of_token: token -> Position.report
39510
d9f5f01faa1b Syntax.read_asts error: report token ranges within message -- no side-effect here;
wenzelm
parents: 39507
diff changeset
    48
  val reported_token_range: Proof.context -> token -> string
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    49
  val matching_tokens: token * token -> bool
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    50
  val valued_token: token -> bool
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 164
diff changeset
    51
  val predef_term: string -> token option
46483
10a9c31a22b4 renamed "xstr" to "str_token";
wenzelm
parents: 45703
diff changeset
    52
  val implode_str: string list -> string
10a9c31a22b4 renamed "xstr" to "str_token";
wenzelm
parents: 45703
diff changeset
    53
  val explode_str: string -> string list
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
    54
  val tokenize: Scan.lexicon -> bool -> Symbol_Pos.T list -> token list
42290
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    55
  val read_indexname: string -> indexname
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    56
  val read_var: string -> term
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    57
  val read_variable: string -> indexname option
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    58
  val read_nat: string -> int option
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    59
  val read_int: string -> int option
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    60
  val read_xnum: string -> {radix: int, leading_zeros: int, value: int}
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    61
  val read_float: string -> {mant: int, exp: int}
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    62
  val mark_class: string -> string val unmark_class: string -> string
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    63
  val mark_type: string -> string val unmark_type: string -> string
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    64
  val mark_const: string -> string val unmark_const: string -> string
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    65
  val mark_fixed: string -> string val unmark_fixed: string -> string
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    66
  val unmark:
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    67
   {case_class: string -> 'a,
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    68
    case_type: string -> 'a,
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    69
    case_const: string -> 'a,
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    70
    case_fixed: string -> 'a,
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    71
    case_default: string -> 'a} -> string -> 'a
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    72
  val is_marked: string -> bool
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    73
  val dummy_type: term
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
    74
  val fun_type: term
4247
9bba9251bb4d added implode_xstr: string list -> string, explode_xstr: string -> string list;
wenzelm
parents: 3828
diff changeset
    75
end;
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    76
14679
6ed90bd68eda added is_ascii_identifier;
wenzelm
parents: 13808
diff changeset
    77
structure Lexicon: LEXICON =
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    78
struct
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    79
42476
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
    80
(** syntaxtic terms **)
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
    81
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
    82
structure Syntax =
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
    83
struct
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
    84
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
    85
fun const c = Const (c, dummyT);
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
    86
fun free x = Free (x, dummyT);
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
    87
fun var xi = Var (xi, dummyT);
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
    88
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
    89
end;
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
    90
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
    91
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
    92
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    93
(** is_identifier etc. **)
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    94
16150
c33fe18456fa moved is_ident to General/symbol.ML;
wenzelm
parents: 15991
diff changeset
    95
val is_identifier = Symbol.is_ident o Symbol.explode;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
    96
14679
6ed90bd68eda added is_ascii_identifier;
wenzelm
parents: 13808
diff changeset
    97
fun is_ascii_identifier s =
6ed90bd68eda added is_ascii_identifier;
wenzelm
parents: 13808
diff changeset
    98
  let val cs = Symbol.explode s
16150
c33fe18456fa moved is_ident to General/symbol.ML;
wenzelm
parents: 15991
diff changeset
    99
  in forall Symbol.is_ascii cs andalso Symbol.is_ident cs end;
14679
6ed90bd68eda added is_ascii_identifier;
wenzelm
parents: 13808
diff changeset
   100
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   101
fun is_xid s =
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   102
  (case Symbol.explode s of
16150
c33fe18456fa moved is_ident to General/symbol.ML;
wenzelm
parents: 15991
diff changeset
   103
    "_" :: cs => Symbol.is_ident cs
c33fe18456fa moved is_ident to General/symbol.ML;
wenzelm
parents: 15991
diff changeset
   104
  | cs => Symbol.is_ident cs);
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   105
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 237
diff changeset
   106
fun is_tid s =
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   107
  (case Symbol.explode s of
16150
c33fe18456fa moved is_ident to General/symbol.ML;
wenzelm
parents: 15991
diff changeset
   108
    "'" :: cs => Symbol.is_ident cs
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   109
  | _ => false);
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   110
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   111
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   112
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   113
(** basic scanners **)
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   114
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
   115
open Basic_Symbol_Pos;
27773
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   116
43947
9b00f09f7721 defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents: 43432
diff changeset
   117
fun !!! msg = Symbol_Pos.!!! (fn () => "Inner lexical error: " ^ msg);
27773
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   118
40525
14a2e686bdac eliminated slightly odd pervasive Symbol_Pos.symbol;
wenzelm
parents: 40290
diff changeset
   119
val scan_id =
14a2e686bdac eliminated slightly odd pervasive Symbol_Pos.symbol;
wenzelm
parents: 40290
diff changeset
   120
  Scan.one (Symbol.is_letter o Symbol_Pos.symbol) :::
14a2e686bdac eliminated slightly odd pervasive Symbol_Pos.symbol;
wenzelm
parents: 40290
diff changeset
   121
  Scan.many (Symbol.is_letdig o Symbol_Pos.symbol);
14a2e686bdac eliminated slightly odd pervasive Symbol_Pos.symbol;
wenzelm
parents: 40290
diff changeset
   122
27773
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   123
val scan_longid = scan_id @@@ (Scan.repeat1 ($$$ "." @@@ scan_id) >> flat);
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   124
val scan_tid = $$$ "'" @@@ scan_id;
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   125
40525
14a2e686bdac eliminated slightly odd pervasive Symbol_Pos.symbol;
wenzelm
parents: 40290
diff changeset
   126
val scan_nat = Scan.many1 (Symbol.is_digit o Symbol_Pos.symbol);
27773
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   127
val scan_int = $$$ "-" @@@ scan_nat || scan_nat;
28904
3ef9489eeef5 New lexical item "float".
nipkow
parents: 28413
diff changeset
   128
val scan_natdot = scan_nat @@@ $$$ "." @@@ scan_nat;
3ef9489eeef5 New lexical item "float".
nipkow
parents: 28413
diff changeset
   129
val scan_float = $$$ "-" @@@ scan_natdot || scan_natdot;
40525
14a2e686bdac eliminated slightly odd pervasive Symbol_Pos.symbol;
wenzelm
parents: 40290
diff changeset
   130
val scan_hex = $$$ "0" @@@ $$$ "x" @@@ Scan.many1 (Symbol.is_ascii_hex o Symbol_Pos.symbol);
27773
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   131
val scan_bin = $$$ "0" @@@ $$$ "b" @@@ Scan.many1 (fn (s, _) => s = "0" orelse s = "1");
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   132
27773
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   133
val scan_id_nat = scan_id @@@ Scan.optional ($$$ "." @@@ scan_nat) [];
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   134
val scan_var = $$$ "?" @@@ scan_id_nat;
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   135
val scan_tvar = $$$ "?" @@@ $$$ "'" @@@ scan_id_nat;
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   136
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   137
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   138
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   139
(** datatype token **)
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   140
27800
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   141
datatype token_kind =
27887
9f3fd48cf673 token_kind: add Space, Comment;
wenzelm
parents: 27806
diff changeset
   142
  Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy |
28904
3ef9489eeef5 New lexical item "float".
nipkow
parents: 28413
diff changeset
   143
  NumSy | FloatSy | XNumSy | StrSy | Space | Comment | EOF;
27800
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   144
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   145
datatype token = Token of token_kind * string * Position.range;
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   146
27806
ece79c0597fe pos_of_token: Position.T;
wenzelm
parents: 27800
diff changeset
   147
fun str_of_token (Token (_, s, _)) = s;
ece79c0597fe pos_of_token: Position.T;
wenzelm
parents: 27800
diff changeset
   148
fun pos_of_token (Token (_, _, (pos, _))) = pos;
ece79c0597fe pos_of_token: Position.T;
wenzelm
parents: 27800
diff changeset
   149
27887
9f3fd48cf673 token_kind: add Space, Comment;
wenzelm
parents: 27806
diff changeset
   150
fun is_proper (Token (Space, _, _)) = false
9f3fd48cf673 token_kind: add Space, Comment;
wenzelm
parents: 27806
diff changeset
   151
  | is_proper (Token (Comment, _, _)) = false
9f3fd48cf673 token_kind: add Space, Comment;
wenzelm
parents: 27806
diff changeset
   152
  | is_proper _ = true;
9f3fd48cf673 token_kind: add Space, Comment;
wenzelm
parents: 27806
diff changeset
   153
27800
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   154
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   155
(* stopper *)
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   156
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   157
fun mk_eof pos = Token (EOF, "", (pos, Position.none));
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   158
val eof = mk_eof Position.none;
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   159
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   160
fun is_eof (Token (EOF, _, _)) = true
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   161
  | is_eof _ = false;
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   162
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   163
val stopper = Scan.stopper (K eof) is_eof;
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   164
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   165
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 164
diff changeset
   166
(* terminal arguments *)
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   167
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   168
val idT = Type ("id", []);
3828
f6a7ca242dc2 added longid;
wenzelm
parents: 2583
diff changeset
   169
val longidT = Type ("longid", []);
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   170
val varT = Type ("var", []);
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   171
val tidT = Type ("tid", []);
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   172
val tvarT = Type ("tvar", []);
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   173
29156
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   174
val terminal_kinds =
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   175
 [("id", IdentSy),
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   176
  ("longid", LongIdentSy),
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   177
  ("var", VarSy),
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   178
  ("tid", TFreeSy),
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   179
  ("tvar", TVarSy),
45703
c7a13ce60161 renamed inner syntax categories "num" to "num_token" and "xnum" to "xnum_token";
wenzelm
parents: 45666
diff changeset
   180
  ("num_token", NumSy),
29156
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   181
  ("float_token", FloatSy),
45703
c7a13ce60161 renamed inner syntax categories "num" to "num_token" and "xnum" to "xnum_token";
wenzelm
parents: 45666
diff changeset
   182
  ("xnum_token", XNumSy),
46483
10a9c31a22b4 renamed "xstr" to "str_token";
wenzelm
parents: 45703
diff changeset
   183
  ("str_token", StrSy)];
29156
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   184
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   185
val terminals = map #1 terminal_kinds;
20664
ffbc5a57191a member (op =);
wenzelm
parents: 20313
diff changeset
   186
val is_terminal = member (op =) terminals;
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 164
diff changeset
   187
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   188
27887
9f3fd48cf673 token_kind: add Space, Comment;
wenzelm
parents: 27806
diff changeset
   189
(* markup *)
9f3fd48cf673 token_kind: add Space, Comment;
wenzelm
parents: 27806
diff changeset
   190
9f3fd48cf673 token_kind: add Space, Comment;
wenzelm
parents: 27806
diff changeset
   191
val token_kind_markup =
45666
d83797ef0d2d separate module for concrete Isabelle markup;
wenzelm
parents: 44736
diff changeset
   192
 fn Literal     => Isabelle_Markup.literal
44706
fe319b45315c eliminated markup for plain identifiers (frequent but insignificant);
wenzelm
parents: 43947
diff changeset
   193
  | IdentSy     => Markup.empty
fe319b45315c eliminated markup for plain identifiers (frequent but insignificant);
wenzelm
parents: 43947
diff changeset
   194
  | LongIdentSy => Markup.empty
45666
d83797ef0d2d separate module for concrete Isabelle markup;
wenzelm
parents: 44736
diff changeset
   195
  | VarSy       => Isabelle_Markup.var
d83797ef0d2d separate module for concrete Isabelle markup;
wenzelm
parents: 44736
diff changeset
   196
  | TFreeSy     => Isabelle_Markup.tfree
d83797ef0d2d separate module for concrete Isabelle markup;
wenzelm
parents: 44736
diff changeset
   197
  | TVarSy      => Isabelle_Markup.tvar
d83797ef0d2d separate module for concrete Isabelle markup;
wenzelm
parents: 44736
diff changeset
   198
  | NumSy       => Isabelle_Markup.numeral
d83797ef0d2d separate module for concrete Isabelle markup;
wenzelm
parents: 44736
diff changeset
   199
  | FloatSy     => Isabelle_Markup.numeral
d83797ef0d2d separate module for concrete Isabelle markup;
wenzelm
parents: 44736
diff changeset
   200
  | XNumSy      => Isabelle_Markup.numeral
d83797ef0d2d separate module for concrete Isabelle markup;
wenzelm
parents: 44736
diff changeset
   201
  | StrSy       => Isabelle_Markup.inner_string
38474
e498dc2eb576 uniform Markup.empty/Markup.Empty in ML and Scala;
wenzelm
parents: 38237
diff changeset
   202
  | Space       => Markup.empty
45666
d83797ef0d2d separate module for concrete Isabelle markup;
wenzelm
parents: 44736
diff changeset
   203
  | Comment     => Isabelle_Markup.inner_comment
38474
e498dc2eb576 uniform Markup.empty/Markup.Empty in ML and Scala;
wenzelm
parents: 38237
diff changeset
   204
  | EOF         => Markup.empty;
27887
9f3fd48cf673 token_kind: add Space, Comment;
wenzelm
parents: 27806
diff changeset
   205
44736
c2a3f1c84179 bulk reports for improved message throughput;
wenzelm
parents: 44706
diff changeset
   206
fun report_of_token (Token (kind, s, (pos, _))) =
43432
224006e5ac46 inner literal/delimiter corresponds to outer keyword/operator;
wenzelm
parents: 42476
diff changeset
   207
  let val markup =
45666
d83797ef0d2d separate module for concrete Isabelle markup;
wenzelm
parents: 44736
diff changeset
   208
    if kind = Literal andalso not (is_ascii_identifier s) then Isabelle_Markup.delimiter
43432
224006e5ac46 inner literal/delimiter corresponds to outer keyword/operator;
wenzelm
parents: 42476
diff changeset
   209
    else token_kind_markup kind
44736
c2a3f1c84179 bulk reports for improved message throughput;
wenzelm
parents: 44706
diff changeset
   210
  in (pos, markup) end;
27887
9f3fd48cf673 token_kind: add Space, Comment;
wenzelm
parents: 27806
diff changeset
   211
39510
d9f5f01faa1b Syntax.read_asts error: report token ranges within message -- no side-effect here;
wenzelm
parents: 39507
diff changeset
   212
fun reported_token_range ctxt tok =
39168
e3ac771235f7 report token range after inner parse error -- often provides important clues about misunderstanding concerning lexical phase;
wenzelm
parents: 38474
diff changeset
   213
  if is_proper tok
45666
d83797ef0d2d separate module for concrete Isabelle markup;
wenzelm
parents: 44736
diff changeset
   214
  then Context_Position.reported_text ctxt (pos_of_token tok) Isabelle_Markup.token_range ""
39510
d9f5f01faa1b Syntax.read_asts error: report token ranges within message -- no side-effect here;
wenzelm
parents: 39507
diff changeset
   215
  else "";
39168
e3ac771235f7 report token range after inner parse error -- often provides important clues about misunderstanding concerning lexical phase;
wenzelm
parents: 38474
diff changeset
   216
27887
9f3fd48cf673 token_kind: add Space, Comment;
wenzelm
parents: 27806
diff changeset
   217
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   218
(* matching_tokens *)
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   219
27800
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   220
fun matching_tokens (Token (Literal, x, _), Token (Literal, y, _)) = x = y
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   221
  | matching_tokens (Token (k, _, _), Token (k', _, _)) = k = k';
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   222
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   223
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   224
(* valued_token *)
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   225
27800
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   226
fun valued_token (Token (Literal, _, _)) = false
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   227
  | valued_token (Token (EOF, _, _)) = false
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   228
  | valued_token _ = true;
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   229
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   230
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   231
(* predef_term *)
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   232
29156
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   233
fun predef_term s =
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   234
  (case AList.lookup (op =) terminal_kinds s of
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   235
    SOME sy => SOME (Token (sy, s, Position.no_range))
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   236
  | NONE => NONE);
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   237
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   238
46483
10a9c31a22b4 renamed "xstr" to "str_token";
wenzelm
parents: 45703
diff changeset
   239
(* str tokens *)
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   240
14730
59ab60c6fcc6 support nested comments;
wenzelm
parents: 14679
diff changeset
   241
val scan_chr =
27773
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   242
  $$$ "\\" |-- $$$ "'" ||
40525
14a2e686bdac eliminated slightly odd pervasive Symbol_Pos.symbol;
wenzelm
parents: 40290
diff changeset
   243
  Scan.one
14a2e686bdac eliminated slightly odd pervasive Symbol_Pos.symbol;
wenzelm
parents: 40290
diff changeset
   244
    ((fn s => s <> "\\" andalso s <> "'" andalso Symbol.is_regular s) o
14a2e686bdac eliminated slightly odd pervasive Symbol_Pos.symbol;
wenzelm
parents: 40290
diff changeset
   245
      Symbol_Pos.symbol) >> single ||
27773
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   246
  $$$ "'" --| Scan.ahead (~$$$ "'");
14730
59ab60c6fcc6 support nested comments;
wenzelm
parents: 14679
diff changeset
   247
59ab60c6fcc6 support nested comments;
wenzelm
parents: 14679
diff changeset
   248
val scan_str =
27800
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   249
  $$$ "'" @@@ $$$ "'" @@@ !!! "missing end of string"
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   250
    ((Scan.repeat scan_chr >> flat) @@@ $$$ "'" @@@ $$$ "'");
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   251
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   252
val scan_str_body =
27773
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   253
  $$$ "'" |-- $$$ "'" |-- !!! "missing end of string"
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   254
    ((Scan.repeat scan_chr >> flat) --| $$$ "'" --| $$$ "'");
14730
59ab60c6fcc6 support nested comments;
wenzelm
parents: 14679
diff changeset
   255
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   256
46483
10a9c31a22b4 renamed "xstr" to "str_token";
wenzelm
parents: 45703
diff changeset
   257
fun implode_str cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs));
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   258
46483
10a9c31a22b4 renamed "xstr" to "str_token";
wenzelm
parents: 45703
diff changeset
   259
fun explode_str str =
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
   260
  (case Scan.read Symbol_Pos.stopper scan_str_body (Symbol_Pos.explode (str, Position.none)) of
40525
14a2e686bdac eliminated slightly odd pervasive Symbol_Pos.symbol;
wenzelm
parents: 40290
diff changeset
   261
    SOME cs => map Symbol_Pos.symbol cs
5868
0022d0a913b5 tuned usage of read;
wenzelm
parents: 5860
diff changeset
   262
  | _ => error ("Inner lexical error: literal string expected at " ^ quote str));
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   263
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   264
27806
ece79c0597fe pos_of_token: Position.T;
wenzelm
parents: 27800
diff changeset
   265
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   266
(** tokenize **)
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   267
27800
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   268
fun token_leq (Token (_, s1, _), Token (_, s2, _)) = s1 <= s2;
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
   269
fun token kind ss = Token (kind, Symbol_Pos.content ss, Symbol_Pos.range ss);
27800
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   270
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   271
fun tokenize lex xids syms =
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   272
  let
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   273
    val scan_xid =
27773
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   274
      if xids then $$$ "_" @@@ scan_id || scan_id
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   275
      else scan_id;
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   276
20096
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   277
    val scan_num = scan_hex || scan_bin || scan_int;
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   278
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   279
    val scan_val =
26007
3760d3ff4cce basic scanners: produce symbol list instead of imploded string;
wenzelm
parents: 24630
diff changeset
   280
      scan_tvar >> token TVarSy ||
3760d3ff4cce basic scanners: produce symbol list instead of imploded string;
wenzelm
parents: 24630
diff changeset
   281
      scan_var >> token VarSy ||
3760d3ff4cce basic scanners: produce symbol list instead of imploded string;
wenzelm
parents: 24630
diff changeset
   282
      scan_tid >> token TFreeSy ||
28904
3ef9489eeef5 New lexical item "float".
nipkow
parents: 28413
diff changeset
   283
      scan_float >> token FloatSy ||
26007
3760d3ff4cce basic scanners: produce symbol list instead of imploded string;
wenzelm
parents: 24630
diff changeset
   284
      scan_num >> token NumSy ||
27773
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   285
      $$$ "#" @@@ scan_num >> token XNumSy ||
26007
3760d3ff4cce basic scanners: produce symbol list instead of imploded string;
wenzelm
parents: 24630
diff changeset
   286
      scan_longid >> token LongIdentSy ||
3760d3ff4cce basic scanners: produce symbol list instead of imploded string;
wenzelm
parents: 24630
diff changeset
   287
      scan_xid >> token IdentSy;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   288
27800
df444ddeff56 datatype token: maintain range, tuned representation;
wenzelm
parents: 27773
diff changeset
   289
    val scan_lit = Scan.literal lex >> token Literal;
550
353eea6ec232 replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
wenzelm
parents: 376
diff changeset
   290
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   291
    val scan_token =
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
   292
      Symbol_Pos.scan_comment !!! >> token Comment ||
27887
9f3fd48cf673 token_kind: add Space, Comment;
wenzelm
parents: 27806
diff changeset
   293
      Scan.max token_leq scan_lit scan_val ||
9f3fd48cf673 token_kind: add Space, Comment;
wenzelm
parents: 27806
diff changeset
   294
      scan_str >> token StrSy ||
40525
14a2e686bdac eliminated slightly odd pervasive Symbol_Pos.symbol;
wenzelm
parents: 40290
diff changeset
   295
      Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol) >> token Space;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   296
  in
27773
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   297
    (case Scan.error
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
   298
        (Scan.finite Symbol_Pos.stopper (Scan.repeat scan_token)) syms of
27887
9f3fd48cf673 token_kind: add Space, Comment;
wenzelm
parents: 27806
diff changeset
   299
      (toks, []) => toks
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
   300
    | (_, ss) => error ("Inner lexical error at: " ^ Symbol_Pos.content ss ^
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
   301
        Position.str_of (#1 (Symbol_Pos.range ss))))
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   302
  end;
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   303
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   304
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   305
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   306
(** scan variables **)
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   307
15991
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   308
(* scan_indexname *)
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   309
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   310
local
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   311
27773
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   312
val scan_vname =
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   313
  let
15991
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   314
    fun nat n [] = n
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   315
      | nat n (c :: cs) = nat (n * 10 + (ord c - ord "0")) cs;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   316
15991
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   317
    fun idxname cs ds = (implode (rev cs), nat 0 ds);
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   318
    fun chop_idx [] ds = idxname [] ds
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   319
      | chop_idx (cs as (_ :: "\\<^isub>" :: _)) ds = idxname cs ds
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   320
      | chop_idx (cs as (_ :: "\\<^isup>" :: _)) ds = idxname cs ds
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   321
      | chop_idx (c :: cs) ds =
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   322
          if Symbol.is_digit c then chop_idx cs (c :: ds)
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   323
          else idxname (c :: cs) ds;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   324
40525
14a2e686bdac eliminated slightly odd pervasive Symbol_Pos.symbol;
wenzelm
parents: 40290
diff changeset
   325
    val scan = (scan_id >> map Symbol_Pos.symbol) --
14a2e686bdac eliminated slightly odd pervasive Symbol_Pos.symbol;
wenzelm
parents: 40290
diff changeset
   326
      Scan.optional ($$$ "." |-- scan_nat >> (nat 0 o map Symbol_Pos.symbol)) ~1;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   327
  in
27773
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   328
    scan >>
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   329
      (fn (cs, ~1) => chop_idx (rev cs) []
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   330
        | (cs, i) => (implode cs, i))
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   331
  end;
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   332
15991
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   333
in
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   334
27773
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   335
val scan_indexname = $$$ "'" |-- scan_vname >> (fn (x, i) => ("'" ^ x, i)) || scan_vname;
15443
07f78cc82a73 indexname function now parses type variables as well; changed input
berghofe
parents: 14981
diff changeset
   336
15991
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   337
end;
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   338
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   339
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   340
(* indexname *)
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   341
20313
bf9101cc4385 renamed Syntax.indexname to Syntax.read_indexname;
wenzelm
parents: 20165
diff changeset
   342
fun read_indexname s =
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
   343
  (case Scan.read Symbol_Pos.stopper scan_indexname (Symbol_Pos.explode (s, Position.none)) of
15531
08c8dad8e399 Deleted Library.option type.
skalberg
parents: 15443
diff changeset
   344
    SOME xi => xi
20313
bf9101cc4385 renamed Syntax.indexname to Syntax.read_indexname;
wenzelm
parents: 20165
diff changeset
   345
  | _ => error ("Lexical error in variable name: " ^ quote s));
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   346
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   347
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   348
(* read_var *)
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   349
4703
a50ab39756db adapted to symbols, scan;
wenzelm
parents: 4587
diff changeset
   350
fun read_var str =
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   351
  let
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents: 0
diff changeset
   352
    val scan =
42476
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
   353
      $$$ "?" |-- scan_indexname --| Scan.ahead (Scan.one Symbol_Pos.is_eof)
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
   354
        >> Syntax.var ||
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
   355
      Scan.many (Symbol.is_regular o Symbol_Pos.symbol)
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
   356
        >> (Syntax.free o implode o map Symbol_Pos.symbol);
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
   357
  in the (Scan.read Symbol_Pos.stopper scan (Symbol_Pos.explode (str, Position.none))) end;
15991
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   358
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   359
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   360
(* read_variable *)
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   361
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   362
fun read_variable str =
27773
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   363
  let val scan = $$$ "?" |-- scan_indexname || scan_indexname
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
   364
  in Scan.read Symbol_Pos.stopper scan (Symbol_Pos.explode (str, Position.none)) end;
4587
6bce9ef27d7e added read_var;
wenzelm
parents: 4247
diff changeset
   365
6bce9ef27d7e added read_var;
wenzelm
parents: 4247
diff changeset
   366
20313
bf9101cc4385 renamed Syntax.indexname to Syntax.read_indexname;
wenzelm
parents: 20165
diff changeset
   367
(* read numbers *)
bf9101cc4385 renamed Syntax.indexname to Syntax.read_indexname;
wenzelm
parents: 20165
diff changeset
   368
bf9101cc4385 renamed Syntax.indexname to Syntax.read_indexname;
wenzelm
parents: 20165
diff changeset
   369
local
bf9101cc4385 renamed Syntax.indexname to Syntax.read_indexname;
wenzelm
parents: 20165
diff changeset
   370
bf9101cc4385 renamed Syntax.indexname to Syntax.read_indexname;
wenzelm
parents: 20165
diff changeset
   371
fun nat cs =
40525
14a2e686bdac eliminated slightly odd pervasive Symbol_Pos.symbol;
wenzelm
parents: 40290
diff changeset
   372
  Option.map (#1 o Library.read_int o map Symbol_Pos.symbol)
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
   373
    (Scan.read Symbol_Pos.stopper scan_nat cs);
5860
ed11c9890852 added read_nat;
wenzelm
parents: 5513
diff changeset
   374
20313
bf9101cc4385 renamed Syntax.indexname to Syntax.read_indexname;
wenzelm
parents: 20165
diff changeset
   375
in
bf9101cc4385 renamed Syntax.indexname to Syntax.read_indexname;
wenzelm
parents: 20165
diff changeset
   376
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
   377
fun read_nat s = nat (Symbol_Pos.explode (s, Position.none));
20313
bf9101cc4385 renamed Syntax.indexname to Syntax.read_indexname;
wenzelm
parents: 20165
diff changeset
   378
bf9101cc4385 renamed Syntax.indexname to Syntax.read_indexname;
wenzelm
parents: 20165
diff changeset
   379
fun read_int s =
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29565
diff changeset
   380
  (case Symbol_Pos.explode (s, Position.none) of
27773
a52166b228b9 improved position handling due to SymbolPos.T;
wenzelm
parents: 27121
diff changeset
   381
    ("-", _) :: cs => Option.map ~ (nat cs)
20313
bf9101cc4385 renamed Syntax.indexname to Syntax.read_indexname;
wenzelm
parents: 20165
diff changeset
   382
  | cs => nat cs);
bf9101cc4385 renamed Syntax.indexname to Syntax.read_indexname;
wenzelm
parents: 20165
diff changeset
   383
bf9101cc4385 renamed Syntax.indexname to Syntax.read_indexname;
wenzelm
parents: 20165
diff changeset
   384
end;
5860
ed11c9890852 added read_nat;
wenzelm
parents: 5513
diff changeset
   385
ed11c9890852 added read_nat;
wenzelm
parents: 5513
diff changeset
   386
20096
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   387
(* read_xnum: hex/bin/decimal *)
9326
1625c1f172b3 added read_xnum;
wenzelm
parents: 9289
diff changeset
   388
15991
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   389
local
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   390
20096
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   391
val ten = ord "0" + 10;
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   392
val a = ord "a";
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   393
val A = ord "A";
35428
bd7d6f65976e more systematic mark/unmark operations;
wenzelm
parents: 35262
diff changeset
   394
val _ = a > A orelse raise Fail "Bad ASCII";
20096
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   395
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   396
fun remap_hex c =
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   397
  let val x = ord c in
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   398
    if x >= a then chr (x - a + ten)
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   399
    else if x >= A then chr (x - A + ten)
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   400
    else c
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   401
  end;
15991
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   402
21781
8314ebb5364d read_xnum: return leading_zeros, radix;
wenzelm
parents: 21774
diff changeset
   403
fun leading_zeros ["0"] = 0
8314ebb5364d read_xnum: return leading_zeros, radix;
wenzelm
parents: 21774
diff changeset
   404
  | leading_zeros ("0" :: cs) = 1 + leading_zeros cs
8314ebb5364d read_xnum: return leading_zeros, radix;
wenzelm
parents: 21774
diff changeset
   405
  | leading_zeros _ = 0;
8314ebb5364d read_xnum: return leading_zeros, radix;
wenzelm
parents: 21774
diff changeset
   406
15991
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   407
in
15965
f422f8283491 Use of IntInf.int instead of int in most numeric simprocs; avoids
paulson
parents: 15570
diff changeset
   408
9326
1625c1f172b3 added read_xnum;
wenzelm
parents: 9289
diff changeset
   409
fun read_xnum str =
1625c1f172b3 added read_xnum;
wenzelm
parents: 9289
diff changeset
   410
  let
20096
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   411
    val (sign, radix, digs) =
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   412
      (case Symbol.explode (perhaps (try (unprefix "#")) str) of
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   413
        "0" :: "x" :: cs => (1, 16, map remap_hex cs)
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   414
      | "0" :: "b" :: cs => (1, 2, cs)
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   415
      | "-" :: cs => (~1, 10, cs)
7058714024b3 uniform treatment of num/xnum;
wenzelm
parents: 20091
diff changeset
   416
      | cs => (1, 10, cs));
29156
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   417
  in
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   418
   {radix = radix,
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   419
    leading_zeros = leading_zeros digs,
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   420
    value = sign * #1 (Library.read_radix_int radix digs)}
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   421
  end;
9326
1625c1f172b3 added read_xnum;
wenzelm
parents: 9289
diff changeset
   422
15991
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   423
end;
670f8e4b5a98 added read_variable: optional question mark on input;
wenzelm
parents: 15965
diff changeset
   424
28904
3ef9489eeef5 New lexical item "float".
nipkow
parents: 28413
diff changeset
   425
fun read_float str =
3ef9489eeef5 New lexical item "float".
nipkow
parents: 28413
diff changeset
   426
  let
3ef9489eeef5 New lexical item "float".
nipkow
parents: 28413
diff changeset
   427
    val (sign, cs) =
29156
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   428
      (case Symbol.explode str of
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   429
        "-" :: cs => (~1, cs)
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   430
      | cs => (1, cs));
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   431
    val (intpart, fracpart) =
28904
3ef9489eeef5 New lexical item "float".
nipkow
parents: 28413
diff changeset
   432
      (case take_prefix Symbol.is_digit cs of
29156
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   433
        (intpart, "." :: fracpart) => (intpart, fracpart)
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   434
      | _ => raise Fail "read_float");
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   435
  in
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   436
   {mant = sign * #1 (Library.read_int (intpart @ fracpart)),
89f76a58a378 renamed terminal category "float" to "float_token", to avoid name
wenzelm
parents: 28904
diff changeset
   437
    exp = length fracpart}
42046
6341c23baf10 added Lexicon.encode_position, Lexicon.decode_position;
wenzelm
parents: 40525
diff changeset
   438
  end;
6341c23baf10 added Lexicon.encode_position, Lexicon.decode_position;
wenzelm
parents: 40525
diff changeset
   439
42290
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   440
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   441
(* marked logical entities *)
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   442
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   443
fun marker s = (prefix s, unprefix s);
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   444
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   445
val (mark_class, unmark_class) = marker "\\<^class>";
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   446
val (mark_type, unmark_type) = marker "\\<^type>";
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   447
val (mark_const, unmark_const) = marker "\\<^const>";
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   448
val (mark_fixed, unmark_fixed) = marker "\\<^fixed>";
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   449
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   450
fun unmark {case_class, case_type, case_const, case_fixed, case_default} s =
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   451
  (case try unmark_class s of
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   452
    SOME c => case_class c
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   453
  | NONE =>
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   454
      (case try unmark_type s of
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   455
        SOME c => case_type c
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   456
      | NONE =>
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   457
          (case try unmark_const s of
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   458
            SOME c => case_const c
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   459
          | NONE =>
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   460
              (case try unmark_fixed s of
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   461
                SOME c => case_fixed c
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   462
              | NONE => case_default s))));
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   463
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   464
val is_marked =
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   465
  unmark {case_class = K true, case_type = K true, case_const = K true,
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   466
    case_fixed = K true, case_default = K false};
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   467
42476
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
   468
val dummy_type = Syntax.const (mark_type "dummy");
d0bc1268ef09 clarified auxiliary structure Lexicon.Syntax;
wenzelm
parents: 42290
diff changeset
   469
val fun_type = Syntax.const (mark_type "fun");
42290
b1f544c84040 discontinued special treatment of structure Lexicon;
wenzelm
parents: 42264
diff changeset
   470
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   471
end;