src/Pure/ML/ml_lex.ML
author wenzelm
Sat, 15 Sep 2007 19:25:43 +0200
changeset 24579 852fc50927b1
child 24596 f1333a841b26
permissions -rw-r--r--
Lexical syntax for SML.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
24579
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
     1
(*  Title:      Pure/ML/ml_lex.ML
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
     2
    ID:         $Id$
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
     3
    Author:     Makarius
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
     4
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
     5
Lexical syntax for SML.
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
     6
*)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
     7
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
     8
signature ML_LEX =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
     9
sig
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    10
  datatype token_kind =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    11
    Keyword | Ident | LongIdent | TypeVar | Selector | Word | Int | Real | Char | String |
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    12
    Space | Comment | Error of string | EOF
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    13
  eqtype token
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    14
  val str_of_kind: token_kind -> string
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    15
  val stopper: token * (token -> bool)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    16
  val not_eof: token -> bool
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    17
  val kind_of: token -> token_kind
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    18
  val is_kind: token_kind -> token -> bool
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    19
  val is_proper: token -> bool
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    20
  val val_of: token -> string
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    21
  val !!! : string -> (int * 'a -> 'b) -> int * 'a -> 'b
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    22
  val keywords: string list
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    23
  val scan: int * string list -> token * (int * string list)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    24
  val source: bool option -> (string, 'a) Source.source ->
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    25
    (token, int * (string, 'a) Source.source) Source.source
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    26
  val source_proper: (token, 'a) Source.source ->
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    27
    (token, (token, 'a) Source.source) Source.source
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    28
end;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    29
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    30
structure ML_Lex: ML_LEX =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    31
struct
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    32
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    33
(** tokens **)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    34
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    35
(* datatype token *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    36
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    37
datatype token_kind =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    38
  Keyword | Ident | LongIdent | TypeVar | Selector | Word | Int | Real | Char | String |
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    39
  Space | Comment | Error of string | EOF;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    40
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    41
datatype token = Token of int * (token_kind * string);
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    42
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    43
val str_of_kind =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    44
 fn Keyword => "keyword"
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    45
  | Ident => "identifier"
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    46
  | LongIdent => "long identifier"
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    47
  | TypeVar => "type variable"
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    48
  | Selector => "record selector"
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    49
  | Word => "word"
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    50
  | Int => "integer"
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    51
  | Real => "real"
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    52
  | Char => "character"
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    53
  | String => "string"
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    54
  | Space => "white space"
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    55
  | Comment => "comment"
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    56
  | Error _ => "bad input"
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    57
  | EOF => "end-of-file";
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    58
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    59
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    60
(* end-of-file *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    61
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    62
val eof = Token (0, (EOF, ""));
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    63
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    64
fun is_eof (Token (_, (EOF, _))) = true
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    65
  | is_eof _ = false;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    66
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    67
val stopper = (eof, is_eof);
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    68
val not_eof = not o is_eof;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    69
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    70
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    71
(* token kind *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    72
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    73
fun kind_of (Token (_, (k, _))) = k;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    74
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    75
fun is_kind k (Token (_, (k', _))) = k = k';
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    76
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    77
fun is_proper (Token (_, (Space, _))) = false
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    78
  | is_proper (Token (_, (Comment, _))) = false
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    79
  | is_proper _ = true;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    80
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    81
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    82
(* token value *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    83
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    84
fun val_of (Token (_, (_, x))) = x;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    85
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    86
fun token_leq (Token (_, (_, x)), Token (_, (_, x'))) = x <= x';
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    87
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    88
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    89
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    90
(** scanners **)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    91
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    92
(* diagnostics *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    93
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    94
fun lex_err msg ((n, cs), _) = "SML lexical error (line " ^ string_of_int n ^ "): " ^ msg cs;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    95
fun !!! msg scan = Scan.!! (lex_err (K msg)) scan;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    96
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    97
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    98
(* line numbering *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
    99
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   100
fun incr_line scan = Scan.depend (fn n => scan >> pair (n + 1));
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   101
val keep_line = Scan.lift;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   102
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   103
val scan_blank =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   104
  incr_line ($$ "\n") ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   105
  keep_line (Scan.one Symbol.is_ascii_blank);
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   106
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   107
val scan_blanks = Scan.repeat scan_blank >> implode;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   108
val scan_blanks1 = Scan.repeat1 scan_blank >> implode;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   109
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   110
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   111
(* keywords *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   112
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   113
val keywords = ["#", "(", ")", ",", "->", "...", ":", ":>", ";", "=",
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   114
  "=>", "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as",
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   115
  "case", "datatype", "do", "else", "end", "eqtype", "exception", "fn",
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   116
  "fun", "functor", "handle", "if", "in", "include", "infix", "infixr",
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   117
  "let", "local", "nonfix", "of", "op", "open", "orelse", "raise", "rec",
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   118
  "sharing", "sig", "signature", "struct", "structure", "then", "type",
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   119
  "val", "where", "while", "with", "withtype"];
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   120
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   121
val scan_keyword = Scan.literal (Scan.make_lexicon (map explode keywords)) >> implode;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   122
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   123
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   124
(* identifiers *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   125
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   126
val scan_letdigs =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   127
  Scan.many (Symbol.is_ascii_letter orf Symbol.is_ascii_digit orf Symbol.is_ascii_quasi) >> implode;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   128
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   129
val scan_alphanumeric = Scan.one Symbol.is_ascii_letter ^^ scan_letdigs;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   130
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   131
val scan_symbolic = Scan.many1 (member (op =) (explode "!#$%&*+-/:<=>?@\\^`|~")) >> implode;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   132
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   133
val scan_ident = scan_alphanumeric || scan_symbolic;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   134
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   135
val scan_longident =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   136
  (Scan.repeat1 (scan_alphanumeric ^^ $$ ".") >> implode) ^^ (scan_ident || $$ "=");
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   137
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   138
val scan_typevar = $$ "'" ^^ scan_letdigs;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   139
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   140
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   141
(* selectors -- not fully standard conformant *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   142
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   143
val scan_numeric =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   144
  Scan.one (member (op =) (explode "123456789")) ^^ (Scan.many Symbol.is_ascii_digit >> implode);
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   145
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   146
val scan_selector =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   147
  keep_line ($$ "#") ^^ scan_blanks ^^
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   148
  !!! "malformed record selector" (keep_line (scan_numeric || scan_alphanumeric || scan_symbolic));
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   149
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   150
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   151
(* numerals *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   152
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   153
val scan_dec = Scan.many1 Symbol.is_ascii_digit >> implode;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   154
val scan_hex = Scan.many1 Symbol.is_ascii_hex >> implode;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   155
val scan_sign = Scan.optional ($$ "~") "";
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   156
val scan_decint = scan_sign ^^ scan_dec;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   157
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   158
val scan_word = Scan.this_string "0wx" ^^ scan_hex || Scan.this_string "0w" ^^ scan_dec;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   159
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   160
val scan_int = scan_sign ^^ (Scan.this_string "0x" ^^ scan_hex || scan_dec);
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   161
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   162
val scan_exp = ($$ "E" || $$ "e") ^^ scan_decint;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   163
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   164
val scan_real =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   165
  scan_decint ^^ $$ "." ^^ scan_dec ^^ Scan.optional scan_exp "" ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   166
  scan_decint ^^ scan_exp;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   167
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   168
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   169
(* chars and strings *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   170
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   171
val scan_gap = keep_line ($$ "\\") ^^ scan_blanks1 ^^ keep_line ($$ "\\");
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   172
val scan_gaps = Scan.repeat scan_gap >> implode;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   173
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   174
val scan_str =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   175
  scan_blank ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   176
  scan_gap ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   177
  keep_line ($$ "\\") |-- !!! "bad escape character in string"
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   178
      (scan_blank || keep_line ($$ "\"" || $$ "\\")) ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   179
  keep_line (Scan.one (fn s => s <> "\"" andalso s <> "\\" andalso Symbol.is_regular s));
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   180
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   181
val scan_char =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   182
  keep_line ($$ "#" ^^ $$ "\"") ^^ scan_gaps ^^ scan_str ^^ scan_gaps ^^ keep_line ($$ "\"");
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   183
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   184
val scan_string =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   185
  keep_line ($$ "\"") ^^
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   186
    !!! "missing quote at end of string" ((Scan.repeat scan_str >> implode) ^^ keep_line ($$ "\""));
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   187
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   188
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   189
(* scan nested comments *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   190
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   191
val scan_cmt =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   192
  Scan.lift scan_blank ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   193
  Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   194
  Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   195
  Scan.lift (keep_line ($$ "*" --| Scan.ahead (~$$ ")"))) ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   196
  Scan.lift (keep_line (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s)));
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   197
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   198
val scan_comment =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   199
  keep_line ($$ "(" ^^ $$ "*") ^^
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   200
    !!! "missing end of comment"
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   201
      (Scan.pass 0 (Scan.repeat scan_cmt >> implode) ^^ keep_line ($$ "*" ^^ $$ ")"));
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   202
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   203
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   204
(* scan token *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   205
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   206
val scan =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   207
  let
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   208
    val scanner = Scan.state :|-- (fn n =>
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   209
      let
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   210
        fun token k x = Token (n, (k, x));
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   211
      in
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   212
        scan_char >> token Char ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   213
        scan_selector >> token Selector ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   214
        scan_string >> token String ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   215
        scan_blanks1 >> token Space ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   216
        scan_comment >> token Comment ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   217
        keep_line (Scan.max token_leq
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   218
          (scan_keyword >> token Keyword)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   219
          (scan_longident >> token LongIdent ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   220
            scan_ident >> token Ident ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   221
            scan_typevar >> token TypeVar ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   222
            scan_real >> token Real ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   223
            scan_word >> token Word ||
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   224
            scan_int >> token Int))
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   225
      end);
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   226
  in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   227
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   228
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   229
(* token sources *)
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   230
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   231
local
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   232
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   233
val is_junk = (not o Symbol.is_blank) andf Symbol.is_regular;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   234
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   235
fun recover msg = Scan.state :|-- (fn n =>
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   236
  keep_line (Scan.many is_junk) >> (fn cs => [Token (n, (Error msg, implode cs))]));
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   237
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   238
in
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   239
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   240
fun source do_recover src =
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   241
  Source.source' 1 Symbol.stopper (Scan.bulk scan) (Option.map (rpair recover) do_recover) src;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   242
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   243
end;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   244
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   245
fun source_proper src = src |> Source.filter is_proper;
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   246
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   247
852fc50927b1 Lexical syntax for SML.
wenzelm
parents:
diff changeset
   248
end;