improved position handling due to SymbolPos.T;
authorwenzelm
Thu, 07 Aug 2008 13:45:11 +0200
changeset 27772 b933c997eab3
parent 27771 98499933a50f
child 27773 a52166b228b9
improved position handling due to SymbolPos.T; SymbolPos.scan_comment; tuned;
src/Pure/ML/ml_lex.ML
--- a/src/Pure/ML/ml_lex.ML	Thu Aug 07 13:45:09 2008 +0200
+++ b/src/Pure/ML/ml_lex.ML	Thu Aug 07 13:45:11 2008 +0200
@@ -19,7 +19,8 @@
   val val_of: token -> string
   val keywords: string list
   val source: (Symbol.symbol, 'a) Source.source ->
-    (token, int * (Symbol.symbol, 'a) Source.source) Source.source
+    (token, (SymbolPos.T, Position.T * (Symbol.symbol, 'a) Source.source)
+      Source.source) Source.source
 end;
 
 structure ML_Lex: ML_LEX =
@@ -33,18 +34,35 @@
   Keyword | Ident | LongIdent | TypeVar | Word | Int | Real | Char | String |
   Space | Comment | Error of string | EOF;
 
-datatype token = Token of int * (token_kind * string);
+datatype token = Token of Position.range * (token_kind * string);
+
+
+(* position *)
+
+fun position_of (Token ((pos, _), _)) = pos;
+fun end_position_of (Token ((_, pos), _)) = pos;
+
+val pos_of = Position.str_of o position_of;
 
 
 (* control tokens *)
 
-val eof = Token (0, (EOF, ""));
+fun mk_eof pos = Token ((pos, Position.none), (EOF, ""));
+val eof = mk_eof Position.none;
 
 fun is_eof (Token (_, (EOF, _))) = true
   | is_eof _ = false;
 
-val stopper = Scan.stopper (K eof) is_eof;
+val stopper =
+  Scan.stopper (fn [] => eof | toks => mk_eof (end_position_of (List.last toks))) is_eof;
+
 
+(* token content *)
+
+fun val_of (Token (_, (_, x))) = x;
+fun token_leq (tok, tok') = val_of tok <= val_of tok';
+
+fun kind_of (Token (_, (k, _))) = k;
 
 fun is_regular (Token (_, (Error _, _))) = false
   | is_regular (Token (_, (EOF, _))) = false
@@ -55,36 +73,18 @@
   | is_improper _ = false;
 
 
-(* token content *)
-
-fun pos_of (Token (n, _)) = " (line " ^ string_of_int n ^ ")";
-
-fun kind_of (Token (_, (k, _))) = k;
-fun val_of (Token (_, (_, x))) = x;
-
-fun token_leq (Token (_, (_, x)), Token (_, (_, x'))) = x <= x';
-
-
 
 (** scanners **)
 
-(* diagnostics *)
+open BasicSymbolPos;
 
-fun lex_err msg ((n, cs), _) = "SML lexical error (line " ^ string_of_int n ^ "): " ^ msg cs;
-fun !!! msg scan = Scan.!! (lex_err (K msg)) scan;
+fun !!! msg = SymbolPos.!!! ("SML lexical error: " ^ msg);
 
 
-(* line numbering *)
-
-fun incr_line scan = Scan.depend (fn (n: int) => scan >> pair (n + 1));
-val keep_line = Scan.lift;
+(* blanks *)
 
-val scan_blank =
-  incr_line ($$ "\n") ||
-  keep_line (Scan.one Symbol.is_ascii_blank);
-
-val scan_blanks = Scan.repeat scan_blank >> implode;
-val scan_blanks1 = Scan.repeat1 scan_blank >> implode;
+val scan_blank = Scan.one (Symbol.is_ascii_blank o symbol);
+val scan_blanks1 = Scan.repeat1 scan_blank;
 
 
 (* keywords *)
@@ -97,7 +97,8 @@
   "sharing", "sig", "signature", "struct", "structure", "then", "type",
   "val", "where", "while", "with", "withtype"];
 
-val scan_keyword = Scan.literal (Scan.make_lexicon (map explode keywords)) >> implode;
+val lex = Scan.make_lexicon (map explode keywords);
+fun scan_keyword x = Scan.literal lex x;
 
 
 (* identifiers *)
@@ -105,20 +106,20 @@
 local
 
 val scan_letdigs =
-  Scan.many (Symbol.is_ascii_letter orf Symbol.is_ascii_digit orf Symbol.is_ascii_quasi) >> implode;
+  Scan.many ((Symbol.is_ascii_letter orf Symbol.is_ascii_digit orf Symbol.is_ascii_quasi) o symbol);
 
-val scan_alphanumeric = Scan.one Symbol.is_ascii_letter ^^ scan_letdigs;
+val scan_alphanumeric = Scan.one (Symbol.is_ascii_letter o symbol) -- scan_letdigs >> op ::;
 
-val scan_symbolic = Scan.many1 (member (op =) (explode "!#$%&*+-/:<=>?@\\^`|~")) >> implode;
+val scan_symbolic = Scan.many1 (member (op =) (explode "!#$%&*+-/:<=>?@\\^`|~") o symbol);
 
 in
 
 val scan_ident = scan_alphanumeric || scan_symbolic;
 
 val scan_longident =
-  (Scan.repeat1 (scan_alphanumeric ^^ $$ ".") >> implode) ^^ (scan_ident || $$ "=");
+  (Scan.repeat1 (scan_alphanumeric @@@ $$$ ".") >> flat) @@@ (scan_ident || $$$ "=");
 
-val scan_typevar = $$ "'" ^^ scan_letdigs;
+val scan_typevar = $$$ "'" @@@ scan_letdigs;
 
 end;
 
@@ -127,22 +128,24 @@
 
 local
 
-val scan_dec = Scan.many1 Symbol.is_ascii_digit >> implode;
-val scan_hex = Scan.many1 Symbol.is_ascii_hex >> implode;
-val scan_sign = Scan.optional ($$ "~") "";
-val scan_decint = scan_sign ^^ scan_dec;
+val scan_dec = Scan.many1 (Symbol.is_ascii_digit o symbol);
+val scan_hex = Scan.many1 (Symbol.is_ascii_hex o symbol);
+val scan_sign = Scan.optional ($$$ "~") [];
+val scan_decint = scan_sign @@@ scan_dec;
 
 in
 
-val scan_word = Scan.this_string "0wx" ^^ scan_hex || Scan.this_string "0w" ^^ scan_dec;
+val scan_word =
+  $$$ "0" @@@ $$$ "w" @@@ $$$ "x" @@@ scan_hex ||
+  $$$ "0" @@@ $$$ "w" @@@ scan_dec;
 
-val scan_int = scan_sign ^^ (Scan.this_string "0x" ^^ scan_hex || scan_dec);
+val scan_int = scan_sign @@@ ($$$ "0" @@@ $$$ "x" @@@ scan_hex || scan_dec);
 
-val scan_exp = ($$ "E" || $$ "e") ^^ scan_decint;
+val scan_exp = ($$$ "E" || $$$ "e") @@@ scan_decint;
 
 val scan_real =
-  scan_decint ^^ $$ "." ^^ scan_dec ^^ Scan.optional scan_exp "" ||
-  scan_decint ^^ scan_exp;
+  scan_decint @@@ $$$ "." @@@ scan_dec @@@ Scan.optional scan_exp [] ||
+  scan_decint @@@ scan_exp;
 
 end;
 
@@ -152,90 +155,62 @@
 local
 
 val scan_escape =
-  Scan.one (member (op =) (explode "\"\\abtnvfr")) ||
-  $$ "^" ^^ Scan.one (fn s => ord "@" <= ord s andalso ord s <= ord "_") ||
-  Scan.one Symbol.is_ascii_digit ^^
-    Scan.one Symbol.is_ascii_digit ^^
-    Scan.one Symbol.is_ascii_digit;
+  Scan.one (member (op =) (explode "\"\\abtnvfr") o symbol) >> single ||
+  $$$ "^" @@@ (Scan.one (fn (s, _) => ord "@" <= ord s andalso ord s <= ord "_") >> single) ||
+  Scan.one (Symbol.is_ascii_digit o symbol) --
+    Scan.one (Symbol.is_ascii_digit o symbol) --
+    Scan.one (Symbol.is_ascii_digit o symbol) >> (fn ((a, b), c) => [a, b, c]);
 
 val scan_str =
-  keep_line (Scan.one (fn s => Symbol.is_printable s andalso s <> "\"" andalso s <> "\\")) ||
-  keep_line ($$ "\\") ^^ !!! "bad escape character in string" (keep_line scan_escape);
+  Scan.one (fn (s, _) => Symbol.is_printable s andalso s <> "\"" andalso s <> "\\") >> single ||
+  $$$ "\\" @@@ !!! "bad escape character in string" scan_escape;
 
-val scan_gap = keep_line ($$ "\\") ^^ scan_blanks1 ^^ keep_line ($$ "\\");
-val scan_gaps = Scan.repeat scan_gap >> implode;
+val scan_gap = $$$ "\\" @@@ scan_blanks1 @@@ $$$ "\\";
+val scan_gaps = Scan.repeat scan_gap >> flat;
 
 in
 
 val scan_char =
-  keep_line ($$ "#" ^^ $$ "\"") ^^ scan_gaps ^^ scan_str ^^ scan_gaps ^^ keep_line ($$ "\"");
+  $$$ "#" @@@ $$$ "\"" @@@ scan_gaps @@@ scan_str @@@ scan_gaps @@@ $$$ "\"";
 
 val scan_string =
-  keep_line ($$ "\"") ^^
-  !!! "missing quote at end of string"
-    ((Scan.repeat (scan_gap || scan_str) >> implode) ^^ keep_line ($$ "\""));
+  $$$ "\"" @@@ !!! "missing quote at end of string"
+    ((Scan.repeat (scan_gap || scan_str) >> flat) @@@ $$$ "\"");
 
 end;
 
 
-(* scan nested comments *)
-
-local
-
-val scan_cmt =
-  Scan.lift scan_blank ||
-  Scan.depend (fn (d: int) => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
-  Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) ||
-  Scan.lift (keep_line ($$ "*" --| Scan.ahead (~$$ ")"))) ||
-  Scan.lift (keep_line (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s)));
-
-in
-
-val scan_comment =
-  keep_line ($$ "(" ^^ $$ "*") ^^
-  !!! "missing end of comment"
-    (Scan.pass 0 (Scan.repeat scan_cmt >> implode) ^^ keep_line ($$ "*" ^^ $$ ")"));
-
-end;
-
-
-(* scan token *)
-
-val scan =
-  let
-    val scanner = Scan.state :|-- (fn n =>
-      let
-        fun token k x = Token (n, (k, x));
-      in
-        scan_char >> token Char ||
-        scan_string >> token String ||
-        scan_blanks1 >> token Space ||
-        scan_comment >> token Comment ||
-        keep_line (Scan.max token_leq
-          (scan_keyword >> token Keyword)
-          (scan_word >> token Word ||
-            scan_real >> token Real ||
-            scan_int >> token Int ||
-            scan_longident >> token LongIdent ||
-            scan_ident >> token Ident ||
-            scan_typevar >> token TypeVar))
-      end);
-  in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end;
-
-
 (* token source *)
 
 local
 
-val is_junk = (not o Symbol.is_blank) andf Symbol.is_regular;
+fun token k s =
+  let val (x, range) = SymbolPos.implode s
+  in Token (range, (k, x)) end;
 
-fun recover msg = Scan.state :|-- (fn n =>
-  keep_line (Scan.many is_junk) >> (fn cs => [Token (n, (Error msg, implode cs))]));
+val scan = !!! "bad input"
+ (scan_char >> token Char ||
+  scan_string >> token String ||
+  scan_blanks1 >> token Space ||
+  SymbolPos.scan_comment !!! >> token Comment ||
+  Scan.max token_leq
+   (scan_keyword >> token Keyword)
+   (scan_word >> token Word ||
+    scan_real >> token Real ||
+    scan_int >> token Int ||
+    scan_longident >> token LongIdent ||
+    scan_ident >> token Ident ||
+    scan_typevar >> token TypeVar));
+
+fun recover msg =
+  Scan.many (((not o Symbol.is_blank) andf Symbol.is_regular) o symbol)
+  >> (fn cs => [token (Error msg) cs]);
 
 in
 
 fun source src =
-  Source.source' 1 Symbol.stopper (Scan.bulk scan) (SOME (false, recover)) src;
+  SymbolPos.source (Position.line 1) src
+  |> Source.source SymbolPos.stopper (Scan.bulk scan) (SOME (false, recover));
 
 end;