src/Pure/ML/ml_lex.ML
author wenzelm
Sat, 13 Dec 2008 15:00:39 +0100
changeset 29091 b81fe045e799
parent 27817 78cae5cca09e
child 29606 fedb8be05f24
permissions -rw-r--r--
Context.display_names;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
     1
(*  Title:      Pure/ML/ml_lex.ML
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
     2
    ID:         $Id$
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
     3
    Author:     Makarius
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
     4
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
     5
Lexical syntax for SML.
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
     6
*)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
     7
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
     8
signature ML_LEX =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
     9
sig
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    10
  datatype token_kind =
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
    11
    Keyword | Ident | LongIdent | TypeVar | Word | Int | Real | Char | String |
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    12
    Space | Comment | Error of string | EOF
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    13
  eqtype token
27732
8dbf5761a24a abstract type Scan.stopper;
wenzelm
parents: 24596
diff changeset
    14
  val stopper: token Scan.stopper
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
    15
  val is_regular: token -> bool
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
    16
  val is_improper: token -> bool
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
    17
  val pos_of: token -> string
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    18
  val kind_of: token -> token_kind
27817
78cae5cca09e renamed ML_Lex.val_of to content_of;
wenzelm
parents: 27799
diff changeset
    19
  val content_of: token -> string
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    20
  val keywords: string list
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
    21
  val source: (Symbol.symbol, 'a) Source.source ->
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    22
    (token, (SymbolPos.T, Position.T * (Symbol.symbol, 'a) Source.source)
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    23
      Source.source) Source.source
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    24
end;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    25
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    26
structure ML_Lex: ML_LEX =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    27
struct
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    28
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    29
(** tokens **)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    30
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    31
(* datatype token *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    32
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    33
datatype token_kind =
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
    34
  Keyword | Ident | LongIdent | TypeVar | Word | Int | Real | Char | String |
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    35
  Space | Comment | Error of string | EOF;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    36
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    37
datatype token = Token of Position.range * (token_kind * string);
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    38
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    39
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    40
(* position *)
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    41
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    42
fun position_of (Token ((pos, _), _)) = pos;
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    43
fun end_position_of (Token ((_, pos), _)) = pos;
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    44
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    45
val pos_of = Position.str_of o position_of;
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    46
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    47
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
    48
(* control tokens *)
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    49
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    50
fun mk_eof pos = Token ((pos, Position.none), (EOF, ""));
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    51
val eof = mk_eof Position.none;
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    52
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    53
fun is_eof (Token (_, (EOF, _))) = true
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    54
  | is_eof _ = false;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    55
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    56
val stopper =
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    57
  Scan.stopper (fn [] => eof | toks => mk_eof (end_position_of (List.last toks))) is_eof;
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    58
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    59
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    60
(* token content *)
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    61
27817
78cae5cca09e renamed ML_Lex.val_of to content_of;
wenzelm
parents: 27799
diff changeset
    62
fun content_of (Token (_, (_, x))) = x;
78cae5cca09e renamed ML_Lex.val_of to content_of;
wenzelm
parents: 27799
diff changeset
    63
fun token_leq (tok, tok') = content_of tok <= content_of tok';
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    64
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    65
fun kind_of (Token (_, (k, _))) = k;
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    66
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
    67
fun is_regular (Token (_, (Error _, _))) = false
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
    68
  | is_regular (Token (_, (EOF, _))) = false
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
    69
  | is_regular _ = true;
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
    70
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
    71
fun is_improper (Token (_, (Space, _))) = true
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
    72
  | is_improper (Token (_, (Comment, _))) = true
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
    73
  | is_improper _ = false;
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
    74
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
    75
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    76
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    77
(** scanners **)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    78
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    79
open BasicSymbolPos;
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    80
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    81
fun !!! msg = SymbolPos.!!! ("SML lexical error: " ^ msg);
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    82
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    83
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    84
(* blanks *)
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    85
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    86
val scan_blank = Scan.one (Symbol.is_ascii_blank o symbol);
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
    87
val scan_blanks1 = Scan.repeat1 scan_blank;
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    88
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    89
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    90
(* keywords *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    91
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    92
val keywords = ["#", "(", ")", ",", "->", "...", ":", ":>", ";", "=",
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    93
  "=>", "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as",
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    94
  "case", "datatype", "do", "else", "end", "eqtype", "exception", "fn",
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    95
  "fun", "functor", "handle", "if", "in", "include", "infix", "infixr",
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    96
  "let", "local", "nonfix", "of", "op", "open", "orelse", "raise", "rec",
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    97
  "sharing", "sig", "signature", "struct", "structure", "then", "type",
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    98
  "val", "where", "while", "with", "withtype"];
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    99
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   100
val lex = Scan.make_lexicon (map explode keywords);
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   101
fun scan_keyword x = Scan.literal lex x;
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   102
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   103
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   104
(* identifiers *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   105
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   106
local
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   107
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   108
val scan_letdigs =
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   109
  Scan.many ((Symbol.is_ascii_letter orf Symbol.is_ascii_digit orf Symbol.is_ascii_quasi) o symbol);
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   110
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   111
val scan_alphanumeric = Scan.one (Symbol.is_ascii_letter o symbol) -- scan_letdigs >> op ::;
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   112
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   113
val scan_symbolic = Scan.many1 (member (op =) (explode "!#$%&*+-/:<=>?@\\^`|~") o symbol);
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   114
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   115
in
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   116
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   117
val scan_ident = scan_alphanumeric || scan_symbolic;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   118
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   119
val scan_longident =
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   120
  (Scan.repeat1 (scan_alphanumeric @@@ $$$ ".") >> flat) @@@ (scan_ident || $$$ "=");
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   121
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   122
val scan_typevar = $$$ "'" @@@ scan_letdigs;
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   123
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   124
end;
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   125
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   126
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   127
(* numerals *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   128
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   129
local
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   130
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   131
val scan_dec = Scan.many1 (Symbol.is_ascii_digit o symbol);
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   132
val scan_hex = Scan.many1 (Symbol.is_ascii_hex o symbol);
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   133
val scan_sign = Scan.optional ($$$ "~") [];
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   134
val scan_decint = scan_sign @@@ scan_dec;
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   135
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   136
in
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   137
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   138
val scan_word =
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   139
  $$$ "0" @@@ $$$ "w" @@@ $$$ "x" @@@ scan_hex ||
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   140
  $$$ "0" @@@ $$$ "w" @@@ scan_dec;
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   141
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   142
val scan_int = scan_sign @@@ ($$$ "0" @@@ $$$ "x" @@@ scan_hex || scan_dec);
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   143
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   144
val scan_exp = ($$$ "E" || $$$ "e") @@@ scan_decint;
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   145
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   146
val scan_real =
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   147
  scan_decint @@@ $$$ "." @@@ scan_dec @@@ Scan.optional scan_exp [] ||
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   148
  scan_decint @@@ scan_exp;
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   149
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   150
end;
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   151
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   152
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   153
(* chars and strings *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   154
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   155
local
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   156
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   157
val scan_escape =
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   158
  Scan.one (member (op =) (explode "\"\\abtnvfr") o symbol) >> single ||
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   159
  $$$ "^" @@@ (Scan.one (fn (s, _) => ord "@" <= ord s andalso ord s <= ord "_") >> single) ||
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   160
  Scan.one (Symbol.is_ascii_digit o symbol) --
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   161
    Scan.one (Symbol.is_ascii_digit o symbol) --
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   162
    Scan.one (Symbol.is_ascii_digit o symbol) >> (fn ((a, b), c) => [a, b, c]);
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   163
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   164
val scan_str =
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   165
  Scan.one (fn (s, _) => Symbol.is_printable s andalso s <> "\"" andalso s <> "\\") >> single ||
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   166
  $$$ "\\" @@@ !!! "bad escape character in string" scan_escape;
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   167
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   168
val scan_gap = $$$ "\\" @@@ scan_blanks1 @@@ $$$ "\\";
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   169
val scan_gaps = Scan.repeat scan_gap >> flat;
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   170
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   171
in
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   172
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   173
val scan_char =
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   174
  $$$ "#" @@@ $$$ "\"" @@@ scan_gaps @@@ scan_str @@@ scan_gaps @@@ $$$ "\"";
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   175
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   176
val scan_string =
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   177
  $$$ "\"" @@@ !!! "missing quote at end of string"
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   178
    ((Scan.repeat (scan_gap || scan_str) >> flat) @@@ $$$ "\"");
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   179
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   180
end;
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   181
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   182
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   183
(* token source *)
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   184
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   185
local
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   186
27799
52f07d5292cd tuned SymbolPos interface;
wenzelm
parents: 27772
diff changeset
   187
fun token k ss = Token (SymbolPos.range ss, (k, SymbolPos.implode ss));
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   188
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   189
val scan = !!! "bad input"
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   190
 (scan_char >> token Char ||
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   191
  scan_string >> token String ||
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   192
  scan_blanks1 >> token Space ||
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   193
  SymbolPos.scan_comment !!! >> token Comment ||
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   194
  Scan.max token_leq
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   195
   (scan_keyword >> token Keyword)
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   196
   (scan_word >> token Word ||
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   197
    scan_real >> token Real ||
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   198
    scan_int >> token Int ||
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   199
    scan_longident >> token LongIdent ||
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   200
    scan_ident >> token Ident ||
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   201
    scan_typevar >> token TypeVar));
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   202
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   203
fun recover msg =
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   204
  Scan.many (((not o Symbol.is_blank) andf Symbol.is_regular) o symbol)
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   205
  >> (fn cs => [token (Error msg) cs]);
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   206
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   207
in
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   208
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   209
fun source src =
27772
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   210
  SymbolPos.source (Position.line 1) src
b933c997eab3 improved position handling due to SymbolPos.T;
wenzelm
parents: 27732
diff changeset
   211
  |> Source.source SymbolPos.stopper (Scan.bulk scan) (SOME (false, recover));
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   212
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   213
end;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   214
24596
f1333a841b26 removed obsolete Selector token;
wenzelm
parents: 24579
diff changeset
   215
end;
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   216