removed obsolete Selector token;
authorwenzelm
Sun, 16 Sep 2007 14:52:31 +0200
changeset 24596 f1333a841b26
parent 24595 5c290506fbc0
child 24597 cbf2c5cf335e
removed obsolete Selector token; tuned signature; string syntax: proper escape format;
src/Pure/ML/ml_lex.ML
--- a/src/Pure/ML/ml_lex.ML	Sun Sep 16 14:52:29 2007 +0200
+++ b/src/Pure/ML/ml_lex.ML	Sun Sep 16 14:52:31 2007 +0200
@@ -8,23 +8,18 @@
 signature ML_LEX =
 sig
   datatype token_kind =
-    Keyword | Ident | LongIdent | TypeVar | Selector | Word | Int | Real | Char | String |
+    Keyword | Ident | LongIdent | TypeVar | Word | Int | Real | Char | String |
     Space | Comment | Error of string | EOF
   eqtype token
-  val str_of_kind: token_kind -> string
   val stopper: token * (token -> bool)
-  val not_eof: token -> bool
+  val is_regular: token -> bool
+  val is_improper: token -> bool
+  val pos_of: token -> string
   val kind_of: token -> token_kind
-  val is_kind: token_kind -> token -> bool
-  val is_proper: token -> bool
   val val_of: token -> string
-  val !!! : string -> (int * 'a -> 'b) -> int * 'a -> 'b
   val keywords: string list
-  val scan: int * string list -> token * (int * string list)
-  val source: bool option -> (string, 'a) Source.source ->
-    (token, int * (string, 'a) Source.source) Source.source
-  val source_proper: (token, 'a) Source.source ->
-    (token, (token, 'a) Source.source) Source.source
+  val source: (Symbol.symbol, 'a) Source.source ->
+    (token, int * (Symbol.symbol, 'a) Source.source) Source.source
 end;
 
 structure ML_Lex: ML_LEX =
@@ -35,29 +30,13 @@
 (* datatype token *)
 
 datatype token_kind =
-  Keyword | Ident | LongIdent | TypeVar | Selector | Word | Int | Real | Char | String |
+  Keyword | Ident | LongIdent | TypeVar | Word | Int | Real | Char | String |
   Space | Comment | Error of string | EOF;
 
 datatype token = Token of int * (token_kind * string);
 
-val str_of_kind =
- fn Keyword => "keyword"
-  | Ident => "identifier"
-  | LongIdent => "long identifier"
-  | TypeVar => "type variable"
-  | Selector => "record selector"
-  | Word => "word"
-  | Int => "integer"
-  | Real => "real"
-  | Char => "character"
-  | String => "string"
-  | Space => "white space"
-  | Comment => "comment"
-  | Error _ => "bad input"
-  | EOF => "end-of-file";
 
-
-(* end-of-file *)
+(* control tokens *)
 
 val eof = Token (0, (EOF, ""));
 
@@ -65,22 +44,22 @@
   | is_eof _ = false;
 
 val stopper = (eof, is_eof);
-val not_eof = not o is_eof;
 
 
-(* token kind *)
+fun is_regular (Token (_, (Error _, _))) = false
+  | is_regular (Token (_, (EOF, _))) = false
+  | is_regular _ = true;
+
+fun is_improper (Token (_, (Space, _))) = true
+  | is_improper (Token (_, (Comment, _))) = true
+  | is_improper _ = false;
+
+
+(* token content *)
+
+fun pos_of (Token (n, _)) = " (line " ^ string_of_int n ^ ")";
 
 fun kind_of (Token (_, (k, _))) = k;
-
-fun is_kind k (Token (_, (k', _))) = k = k';
-
-fun is_proper (Token (_, (Space, _))) = false
-  | is_proper (Token (_, (Comment, _))) = false
-  | is_proper _ = true;
-
-
-(* token value *)
-
 fun val_of (Token (_, (_, x))) = x;
 
 fun token_leq (Token (_, (_, x)), Token (_, (_, x'))) = x <= x';
@@ -97,7 +76,7 @@
 
 (* line numbering *)
 
-fun incr_line scan = Scan.depend (fn n => scan >> pair (n + 1));
+fun incr_line scan = Scan.depend (fn (n: int) => scan >> pair (n + 1));
 val keep_line = Scan.lift;
 
 val scan_blank =
@@ -123,6 +102,8 @@
 
 (* identifiers *)
 
+local
+
 val scan_letdigs =
   Scan.many (Symbol.is_ascii_letter orf Symbol.is_ascii_digit orf Symbol.is_ascii_quasi) >> implode;
 
@@ -130,6 +111,8 @@
 
 val scan_symbolic = Scan.many1 (member (op =) (explode "!#$%&*+-/:<=>?@\\^`|~")) >> implode;
 
+in
+
 val scan_ident = scan_alphanumeric || scan_symbolic;
 
 val scan_longident =
@@ -137,24 +120,20 @@
 
 val scan_typevar = $$ "'" ^^ scan_letdigs;
 
-
-(* selectors -- not fully standard conformant *)
-
-val scan_numeric =
-  Scan.one (member (op =) (explode "123456789")) ^^ (Scan.many Symbol.is_ascii_digit >> implode);
-
-val scan_selector =
-  keep_line ($$ "#") ^^ scan_blanks ^^
-  !!! "malformed record selector" (keep_line (scan_numeric || scan_alphanumeric || scan_symbolic));
+end;
 
 
 (* numerals *)
 
+local
+
 val scan_dec = Scan.many1 Symbol.is_ascii_digit >> implode;
 val scan_hex = Scan.many1 Symbol.is_ascii_hex >> implode;
 val scan_sign = Scan.optional ($$ "~") "";
 val scan_decint = scan_sign ^^ scan_dec;
 
+in
+
 val scan_word = Scan.this_string "0wx" ^^ scan_hex || Scan.this_string "0w" ^^ scan_dec;
 
 val scan_int = scan_sign ^^ (Scan.this_string "0x" ^^ scan_hex || scan_dec);
@@ -165,40 +144,59 @@
   scan_decint ^^ $$ "." ^^ scan_dec ^^ Scan.optional scan_exp "" ||
   scan_decint ^^ scan_exp;
 
+end;
+
 
 (* chars and strings *)
 
+local
+
+val scan_escape =
+  Scan.one (member (op =) (explode "\"\\abtnvfr")) ||
+  $$ "^" ^^ Scan.one (fn s => ord "@" <= ord s andalso ord s <= ord "_") ||
+  Scan.one Symbol.is_ascii_digit ^^
+    Scan.one Symbol.is_ascii_digit ^^
+    Scan.one Symbol.is_ascii_digit;
+
+val scan_str =
+  keep_line (Scan.one (fn s => Symbol.is_printable s andalso s <> "\"" andalso s <> "\\")) ||
+  keep_line ($$ "\\") ^^ !!! "bad escape character in string" (keep_line scan_escape);
+
 val scan_gap = keep_line ($$ "\\") ^^ scan_blanks1 ^^ keep_line ($$ "\\");
 val scan_gaps = Scan.repeat scan_gap >> implode;
 
-val scan_str =
-  scan_blank ||
-  scan_gap ||
-  keep_line ($$ "\\") |-- !!! "bad escape character in string"
-      (scan_blank || keep_line ($$ "\"" || $$ "\\")) ||
-  keep_line (Scan.one (fn s => s <> "\"" andalso s <> "\\" andalso Symbol.is_regular s));
+in
 
 val scan_char =
   keep_line ($$ "#" ^^ $$ "\"") ^^ scan_gaps ^^ scan_str ^^ scan_gaps ^^ keep_line ($$ "\"");
 
 val scan_string =
   keep_line ($$ "\"") ^^
-    !!! "missing quote at end of string" ((Scan.repeat scan_str >> implode) ^^ keep_line ($$ "\""));
+  !!! "missing quote at end of string"
+    ((Scan.repeat (scan_gap || scan_str) >> implode) ^^ keep_line ($$ "\""));
+
+end;
 
 
 (* scan nested comments *)
 
+local
+
 val scan_cmt =
   Scan.lift scan_blank ||
-  Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
+  Scan.depend (fn (d: int) => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
   Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) ||
   Scan.lift (keep_line ($$ "*" --| Scan.ahead (~$$ ")"))) ||
   Scan.lift (keep_line (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s)));
 
+in
+
 val scan_comment =
   keep_line ($$ "(" ^^ $$ "*") ^^
-    !!! "missing end of comment"
-      (Scan.pass 0 (Scan.repeat scan_cmt >> implode) ^^ keep_line ($$ "*" ^^ $$ ")"));
+  !!! "missing end of comment"
+    (Scan.pass 0 (Scan.repeat scan_cmt >> implode) ^^ keep_line ($$ "*" ^^ $$ ")"));
+
+end;
 
 
 (* scan token *)
@@ -210,23 +208,22 @@
         fun token k x = Token (n, (k, x));
       in
         scan_char >> token Char ||
-        scan_selector >> token Selector ||
         scan_string >> token String ||
         scan_blanks1 >> token Space ||
         scan_comment >> token Comment ||
         keep_line (Scan.max token_leq
           (scan_keyword >> token Keyword)
-          (scan_longident >> token LongIdent ||
+          (scan_word >> token Word ||
+            scan_real >> token Real ||
+            scan_int >> token Int ||
+            scan_longident >> token LongIdent ||
             scan_ident >> token Ident ||
-            scan_typevar >> token TypeVar ||
-            scan_real >> token Real ||
-            scan_word >> token Word ||
-            scan_int >> token Int))
+            scan_typevar >> token TypeVar))
       end);
   in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end;
 
 
-(* token sources *)
+(* token source *)
 
 local
 
@@ -237,12 +234,10 @@
 
 in
 
-fun source do_recover src =
-  Source.source' 1 Symbol.stopper (Scan.bulk scan) (Option.map (rpair recover) do_recover) src;
+fun source src =
+  Source.source' 1 Symbol.stopper (Scan.bulk scan) (SOME (false, recover)) src;
 
 end;
 
-fun source_proper src = src |> Source.filter is_proper;
+end;
 
-
-end;