# HG changeset patch # User wenzelm # Date 1189947151 -7200 # Node ID f1333a841b2651d330f9ab1a977014fb5dcfbadf # Parent 5c290506fbc04c9b1201842756f7a690756cd8a1 removed obsolete Selector token; tuned signature; string syntax: proper escape format; diff -r 5c290506fbc0 -r f1333a841b26 src/Pure/ML/ml_lex.ML --- a/src/Pure/ML/ml_lex.ML Sun Sep 16 14:52:29 2007 +0200 +++ b/src/Pure/ML/ml_lex.ML Sun Sep 16 14:52:31 2007 +0200 @@ -8,23 +8,18 @@ signature ML_LEX = sig datatype token_kind = - Keyword | Ident | LongIdent | TypeVar | Selector | Word | Int | Real | Char | String | + Keyword | Ident | LongIdent | TypeVar | Word | Int | Real | Char | String | Space | Comment | Error of string | EOF eqtype token - val str_of_kind: token_kind -> string val stopper: token * (token -> bool) - val not_eof: token -> bool + val is_regular: token -> bool + val is_improper: token -> bool + val pos_of: token -> string val kind_of: token -> token_kind - val is_kind: token_kind -> token -> bool - val is_proper: token -> bool val val_of: token -> string - val !!! : string -> (int * 'a -> 'b) -> int * 'a -> 'b val keywords: string list - val scan: int * string list -> token * (int * string list) - val source: bool option -> (string, 'a) Source.source -> - (token, int * (string, 'a) Source.source) Source.source - val source_proper: (token, 'a) Source.source -> - (token, (token, 'a) Source.source) Source.source + val source: (Symbol.symbol, 'a) Source.source -> + (token, int * (Symbol.symbol, 'a) Source.source) Source.source end; structure ML_Lex: ML_LEX = @@ -35,29 +30,13 @@ (* datatype token *) datatype token_kind = - Keyword | Ident | LongIdent | TypeVar | Selector | Word | Int | Real | Char | String | + Keyword | Ident | LongIdent | TypeVar | Word | Int | Real | Char | String | Space | Comment | Error of string | EOF; datatype token = Token of int * (token_kind * string); -val str_of_kind = - fn Keyword => "keyword" - | Ident => "identifier" - | LongIdent => "long identifier" - | TypeVar => "type variable" - | Selector => "record selector" - | Word => "word" - | Int => "integer" - | Real => "real" - | Char => "character" - | String => "string" - | Space => "white space" - | Comment => "comment" - | Error _ => "bad input" - | EOF => "end-of-file"; - -(* end-of-file *) +(* control tokens *) val eof = Token (0, (EOF, "")); @@ -65,22 +44,22 @@ | is_eof _ = false; val stopper = (eof, is_eof); -val not_eof = not o is_eof; -(* token kind *) +fun is_regular (Token (_, (Error _, _))) = false + | is_regular (Token (_, (EOF, _))) = false + | is_regular _ = true; + +fun is_improper (Token (_, (Space, _))) = true + | is_improper (Token (_, (Comment, _))) = true + | is_improper _ = false; + + +(* token content *) + +fun pos_of (Token (n, _)) = " (line " ^ string_of_int n ^ ")"; fun kind_of (Token (_, (k, _))) = k; - -fun is_kind k (Token (_, (k', _))) = k = k'; - -fun is_proper (Token (_, (Space, _))) = false - | is_proper (Token (_, (Comment, _))) = false - | is_proper _ = true; - - -(* token value *) - fun val_of (Token (_, (_, x))) = x; fun token_leq (Token (_, (_, x)), Token (_, (_, x'))) = x <= x'; @@ -97,7 +76,7 @@ (* line numbering *) -fun incr_line scan = Scan.depend (fn n => scan >> pair (n + 1)); +fun incr_line scan = Scan.depend (fn (n: int) => scan >> pair (n + 1)); val keep_line = Scan.lift; val scan_blank = @@ -123,6 +102,8 @@ (* identifiers *) +local + val scan_letdigs = Scan.many (Symbol.is_ascii_letter orf Symbol.is_ascii_digit orf Symbol.is_ascii_quasi) >> implode; @@ -130,6 +111,8 @@ val scan_symbolic = Scan.many1 (member (op =) (explode "!#$%&*+-/:<=>?@\\^`|~")) >> implode; +in + val scan_ident = scan_alphanumeric || scan_symbolic; val scan_longident = @@ -137,24 +120,20 @@ val scan_typevar = $$ "'" ^^ scan_letdigs; - -(* selectors -- not fully standard conformant *) - -val scan_numeric = - Scan.one (member (op =) (explode "123456789")) ^^ (Scan.many Symbol.is_ascii_digit >> implode); - -val scan_selector = - keep_line ($$ "#") ^^ scan_blanks ^^ - !!! "malformed record selector" (keep_line (scan_numeric || scan_alphanumeric || scan_symbolic)); +end; (* numerals *) +local + val scan_dec = Scan.many1 Symbol.is_ascii_digit >> implode; val scan_hex = Scan.many1 Symbol.is_ascii_hex >> implode; val scan_sign = Scan.optional ($$ "~") ""; val scan_decint = scan_sign ^^ scan_dec; +in + val scan_word = Scan.this_string "0wx" ^^ scan_hex || Scan.this_string "0w" ^^ scan_dec; val scan_int = scan_sign ^^ (Scan.this_string "0x" ^^ scan_hex || scan_dec); @@ -165,40 +144,59 @@ scan_decint ^^ $$ "." ^^ scan_dec ^^ Scan.optional scan_exp "" || scan_decint ^^ scan_exp; +end; + (* chars and strings *) +local + +val scan_escape = + Scan.one (member (op =) (explode "\"\\abtnvfr")) || + $$ "^" ^^ Scan.one (fn s => ord "@" <= ord s andalso ord s <= ord "_") || + Scan.one Symbol.is_ascii_digit ^^ + Scan.one Symbol.is_ascii_digit ^^ + Scan.one Symbol.is_ascii_digit; + +val scan_str = + keep_line (Scan.one (fn s => Symbol.is_printable s andalso s <> "\"" andalso s <> "\\")) || + keep_line ($$ "\\") ^^ !!! "bad escape character in string" (keep_line scan_escape); + val scan_gap = keep_line ($$ "\\") ^^ scan_blanks1 ^^ keep_line ($$ "\\"); val scan_gaps = Scan.repeat scan_gap >> implode; -val scan_str = - scan_blank || - scan_gap || - keep_line ($$ "\\") |-- !!! "bad escape character in string" - (scan_blank || keep_line ($$ "\"" || $$ "\\")) || - keep_line (Scan.one (fn s => s <> "\"" andalso s <> "\\" andalso Symbol.is_regular s)); +in val scan_char = keep_line ($$ "#" ^^ $$ "\"") ^^ scan_gaps ^^ scan_str ^^ scan_gaps ^^ keep_line ($$ "\""); val scan_string = keep_line ($$ "\"") ^^ - !!! "missing quote at end of string" ((Scan.repeat scan_str >> implode) ^^ keep_line ($$ "\"")); + !!! "missing quote at end of string" + ((Scan.repeat (scan_gap || scan_str) >> implode) ^^ keep_line ($$ "\"")); + +end; (* scan nested comments *) +local + val scan_cmt = Scan.lift scan_blank || - Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) || + Scan.depend (fn (d: int) => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) || Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) || Scan.lift (keep_line ($$ "*" --| Scan.ahead (~$$ ")"))) || Scan.lift (keep_line (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s))); +in + val scan_comment = keep_line ($$ "(" ^^ $$ "*") ^^ - !!! "missing end of comment" - (Scan.pass 0 (Scan.repeat scan_cmt >> implode) ^^ keep_line ($$ "*" ^^ $$ ")")); + !!! "missing end of comment" + (Scan.pass 0 (Scan.repeat scan_cmt >> implode) ^^ keep_line ($$ "*" ^^ $$ ")")); + +end; (* scan token *) @@ -210,23 +208,22 @@ fun token k x = Token (n, (k, x)); in scan_char >> token Char || - scan_selector >> token Selector || scan_string >> token String || scan_blanks1 >> token Space || scan_comment >> token Comment || keep_line (Scan.max token_leq (scan_keyword >> token Keyword) - (scan_longident >> token LongIdent || + (scan_word >> token Word || + scan_real >> token Real || + scan_int >> token Int || + scan_longident >> token LongIdent || scan_ident >> token Ident || - scan_typevar >> token TypeVar || - scan_real >> token Real || - scan_word >> token Word || - scan_int >> token Int)) + scan_typevar >> token TypeVar)) end); in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end; -(* token sources *) +(* token source *) local @@ -237,12 +234,10 @@ in -fun source do_recover src = - Source.source' 1 Symbol.stopper (Scan.bulk scan) (Option.map (rpair recover) do_recover) src; +fun source src = + Source.source' 1 Symbol.stopper (Scan.bulk scan) (SOME (false, recover)) src; end; -fun source_proper src = src |> Source.filter is_proper; +end; - -end;