--- a/src/Pure/ML/ml_lex.ML Thu Aug 07 13:45:09 2008 +0200
+++ b/src/Pure/ML/ml_lex.ML Thu Aug 07 13:45:11 2008 +0200
@@ -19,7 +19,8 @@
val val_of: token -> string
val keywords: string list
val source: (Symbol.symbol, 'a) Source.source ->
- (token, int * (Symbol.symbol, 'a) Source.source) Source.source
+ (token, (SymbolPos.T, Position.T * (Symbol.symbol, 'a) Source.source)
+ Source.source) Source.source
end;
structure ML_Lex: ML_LEX =
@@ -33,18 +34,35 @@
Keyword | Ident | LongIdent | TypeVar | Word | Int | Real | Char | String |
Space | Comment | Error of string | EOF;
-datatype token = Token of int * (token_kind * string);
+datatype token = Token of Position.range * (token_kind * string);
+
+
+(* position *)
+
+fun position_of (Token ((pos, _), _)) = pos;
+fun end_position_of (Token ((_, pos), _)) = pos;
+
+val pos_of = Position.str_of o position_of;
(* control tokens *)
-val eof = Token (0, (EOF, ""));
+fun mk_eof pos = Token ((pos, Position.none), (EOF, ""));
+val eof = mk_eof Position.none;
fun is_eof (Token (_, (EOF, _))) = true
| is_eof _ = false;
-val stopper = Scan.stopper (K eof) is_eof;
+val stopper =
+ Scan.stopper (fn [] => eof | toks => mk_eof (end_position_of (List.last toks))) is_eof;
+
+(* token content *)
+
+fun val_of (Token (_, (_, x))) = x;
+fun token_leq (tok, tok') = val_of tok <= val_of tok';
+
+fun kind_of (Token (_, (k, _))) = k;
fun is_regular (Token (_, (Error _, _))) = false
| is_regular (Token (_, (EOF, _))) = false
@@ -55,36 +73,18 @@
| is_improper _ = false;
-(* token content *)
-
-fun pos_of (Token (n, _)) = " (line " ^ string_of_int n ^ ")";
-
-fun kind_of (Token (_, (k, _))) = k;
-fun val_of (Token (_, (_, x))) = x;
-
-fun token_leq (Token (_, (_, x)), Token (_, (_, x'))) = x <= x';
-
-
(** scanners **)
-(* diagnostics *)
+open BasicSymbolPos;
-fun lex_err msg ((n, cs), _) = "SML lexical error (line " ^ string_of_int n ^ "): " ^ msg cs;
-fun !!! msg scan = Scan.!! (lex_err (K msg)) scan;
+fun !!! msg = SymbolPos.!!! ("SML lexical error: " ^ msg);
-(* line numbering *)
-
-fun incr_line scan = Scan.depend (fn (n: int) => scan >> pair (n + 1));
-val keep_line = Scan.lift;
+(* blanks *)
-val scan_blank =
- incr_line ($$ "\n") ||
- keep_line (Scan.one Symbol.is_ascii_blank);
-
-val scan_blanks = Scan.repeat scan_blank >> implode;
-val scan_blanks1 = Scan.repeat1 scan_blank >> implode;
+val scan_blank = Scan.one (Symbol.is_ascii_blank o symbol);
+val scan_blanks1 = Scan.repeat1 scan_blank;
(* keywords *)
@@ -97,7 +97,8 @@
"sharing", "sig", "signature", "struct", "structure", "then", "type",
"val", "where", "while", "with", "withtype"];
-val scan_keyword = Scan.literal (Scan.make_lexicon (map explode keywords)) >> implode;
+val lex = Scan.make_lexicon (map explode keywords);
+fun scan_keyword x = Scan.literal lex x;
(* identifiers *)
@@ -105,20 +106,20 @@
local
val scan_letdigs =
- Scan.many (Symbol.is_ascii_letter orf Symbol.is_ascii_digit orf Symbol.is_ascii_quasi) >> implode;
+ Scan.many ((Symbol.is_ascii_letter orf Symbol.is_ascii_digit orf Symbol.is_ascii_quasi) o symbol);
-val scan_alphanumeric = Scan.one Symbol.is_ascii_letter ^^ scan_letdigs;
+val scan_alphanumeric = Scan.one (Symbol.is_ascii_letter o symbol) -- scan_letdigs >> op ::;
-val scan_symbolic = Scan.many1 (member (op =) (explode "!#$%&*+-/:<=>?@\\^`|~")) >> implode;
+val scan_symbolic = Scan.many1 (member (op =) (explode "!#$%&*+-/:<=>?@\\^`|~") o symbol);
in
val scan_ident = scan_alphanumeric || scan_symbolic;
val scan_longident =
- (Scan.repeat1 (scan_alphanumeric ^^ $$ ".") >> implode) ^^ (scan_ident || $$ "=");
+ (Scan.repeat1 (scan_alphanumeric @@@ $$$ ".") >> flat) @@@ (scan_ident || $$$ "=");
-val scan_typevar = $$ "'" ^^ scan_letdigs;
+val scan_typevar = $$$ "'" @@@ scan_letdigs;
end;
@@ -127,22 +128,24 @@
local
-val scan_dec = Scan.many1 Symbol.is_ascii_digit >> implode;
-val scan_hex = Scan.many1 Symbol.is_ascii_hex >> implode;
-val scan_sign = Scan.optional ($$ "~") "";
-val scan_decint = scan_sign ^^ scan_dec;
+val scan_dec = Scan.many1 (Symbol.is_ascii_digit o symbol);
+val scan_hex = Scan.many1 (Symbol.is_ascii_hex o symbol);
+val scan_sign = Scan.optional ($$$ "~") [];
+val scan_decint = scan_sign @@@ scan_dec;
in
-val scan_word = Scan.this_string "0wx" ^^ scan_hex || Scan.this_string "0w" ^^ scan_dec;
+val scan_word =
+ $$$ "0" @@@ $$$ "w" @@@ $$$ "x" @@@ scan_hex ||
+ $$$ "0" @@@ $$$ "w" @@@ scan_dec;
-val scan_int = scan_sign ^^ (Scan.this_string "0x" ^^ scan_hex || scan_dec);
+val scan_int = scan_sign @@@ ($$$ "0" @@@ $$$ "x" @@@ scan_hex || scan_dec);
-val scan_exp = ($$ "E" || $$ "e") ^^ scan_decint;
+val scan_exp = ($$$ "E" || $$$ "e") @@@ scan_decint;
val scan_real =
- scan_decint ^^ $$ "." ^^ scan_dec ^^ Scan.optional scan_exp "" ||
- scan_decint ^^ scan_exp;
+ scan_decint @@@ $$$ "." @@@ scan_dec @@@ Scan.optional scan_exp [] ||
+ scan_decint @@@ scan_exp;
end;
@@ -152,90 +155,62 @@
local
val scan_escape =
- Scan.one (member (op =) (explode "\"\\abtnvfr")) ||
- $$ "^" ^^ Scan.one (fn s => ord "@" <= ord s andalso ord s <= ord "_") ||
- Scan.one Symbol.is_ascii_digit ^^
- Scan.one Symbol.is_ascii_digit ^^
- Scan.one Symbol.is_ascii_digit;
+ Scan.one (member (op =) (explode "\"\\abtnvfr") o symbol) >> single ||
+ $$$ "^" @@@ (Scan.one (fn (s, _) => ord "@" <= ord s andalso ord s <= ord "_") >> single) ||
+ Scan.one (Symbol.is_ascii_digit o symbol) --
+ Scan.one (Symbol.is_ascii_digit o symbol) --
+ Scan.one (Symbol.is_ascii_digit o symbol) >> (fn ((a, b), c) => [a, b, c]);
val scan_str =
- keep_line (Scan.one (fn s => Symbol.is_printable s andalso s <> "\"" andalso s <> "\\")) ||
- keep_line ($$ "\\") ^^ !!! "bad escape character in string" (keep_line scan_escape);
+ Scan.one (fn (s, _) => Symbol.is_printable s andalso s <> "\"" andalso s <> "\\") >> single ||
+ $$$ "\\" @@@ !!! "bad escape character in string" scan_escape;
-val scan_gap = keep_line ($$ "\\") ^^ scan_blanks1 ^^ keep_line ($$ "\\");
-val scan_gaps = Scan.repeat scan_gap >> implode;
+val scan_gap = $$$ "\\" @@@ scan_blanks1 @@@ $$$ "\\";
+val scan_gaps = Scan.repeat scan_gap >> flat;
in
val scan_char =
- keep_line ($$ "#" ^^ $$ "\"") ^^ scan_gaps ^^ scan_str ^^ scan_gaps ^^ keep_line ($$ "\"");
+ $$$ "#" @@@ $$$ "\"" @@@ scan_gaps @@@ scan_str @@@ scan_gaps @@@ $$$ "\"";
val scan_string =
- keep_line ($$ "\"") ^^
- !!! "missing quote at end of string"
- ((Scan.repeat (scan_gap || scan_str) >> implode) ^^ keep_line ($$ "\""));
+ $$$ "\"" @@@ !!! "missing quote at end of string"
+ ((Scan.repeat (scan_gap || scan_str) >> flat) @@@ $$$ "\"");
end;
-(* scan nested comments *)
-
-local
-
-val scan_cmt =
- Scan.lift scan_blank ||
- Scan.depend (fn (d: int) => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
- Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) ||
- Scan.lift (keep_line ($$ "*" --| Scan.ahead (~$$ ")"))) ||
- Scan.lift (keep_line (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s)));
-
-in
-
-val scan_comment =
- keep_line ($$ "(" ^^ $$ "*") ^^
- !!! "missing end of comment"
- (Scan.pass 0 (Scan.repeat scan_cmt >> implode) ^^ keep_line ($$ "*" ^^ $$ ")"));
-
-end;
-
-
-(* scan token *)
-
-val scan =
- let
- val scanner = Scan.state :|-- (fn n =>
- let
- fun token k x = Token (n, (k, x));
- in
- scan_char >> token Char ||
- scan_string >> token String ||
- scan_blanks1 >> token Space ||
- scan_comment >> token Comment ||
- keep_line (Scan.max token_leq
- (scan_keyword >> token Keyword)
- (scan_word >> token Word ||
- scan_real >> token Real ||
- scan_int >> token Int ||
- scan_longident >> token LongIdent ||
- scan_ident >> token Ident ||
- scan_typevar >> token TypeVar))
- end);
- in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end;
-
-
(* token source *)
local
-val is_junk = (not o Symbol.is_blank) andf Symbol.is_regular;
+fun token k s =
+ let val (x, range) = SymbolPos.implode s
+ in Token (range, (k, x)) end;
-fun recover msg = Scan.state :|-- (fn n =>
- keep_line (Scan.many is_junk) >> (fn cs => [Token (n, (Error msg, implode cs))]));
+val scan = !!! "bad input"
+ (scan_char >> token Char ||
+ scan_string >> token String ||
+ scan_blanks1 >> token Space ||
+ SymbolPos.scan_comment !!! >> token Comment ||
+ Scan.max token_leq
+ (scan_keyword >> token Keyword)
+ (scan_word >> token Word ||
+ scan_real >> token Real ||
+ scan_int >> token Int ||
+ scan_longident >> token LongIdent ||
+ scan_ident >> token Ident ||
+ scan_typevar >> token TypeVar));
+
+fun recover msg =
+ Scan.many (((not o Symbol.is_blank) andf Symbol.is_regular) o symbol)
+ >> (fn cs => [token (Error msg) cs]);
in
fun source src =
- Source.source' 1 Symbol.stopper (Scan.bulk scan) (SOME (false, recover)) src;
+ SymbolPos.source (Position.line 1) src
+ |> Source.source SymbolPos.stopper (Scan.bulk scan) (SOME (false, recover));
end;