added tokenize;
internal scan_str: ensure Symbol.is_regular, otherwise it might swallow the stopper and crash!
--- a/src/Pure/ML/ml_lex.ML Thu Mar 19 16:56:51 2009 +0100
+++ b/src/Pure/ML/ml_lex.ML Thu Mar 19 18:20:27 2009 +0100
@@ -20,6 +20,7 @@
val source: (Symbol.symbol, 'a) Source.source ->
(token, (Symbol_Pos.T, Position.T * (Symbol.symbol, 'a) Source.source)
Source.source) Source.source
+ val tokenize: string -> token list
end;
structure ML_Lex: ML_LEX =
@@ -161,7 +162,8 @@
Scan.one (Symbol.is_ascii_digit o symbol) >> (fn ((a, b), c) => [a, b, c]);
val scan_str =
- Scan.one (fn (s, _) => Symbol.is_printable s andalso s <> "\"" andalso s <> "\\") >> single ||
+ Scan.one (fn (s, _) => Symbol.is_regular s andalso Symbol.is_printable s
+ andalso s <> "\"" andalso s <> "\\") >> single ||
$$$ "\\" @@@ !!! "bad escape character in string" scan_escape;
val scan_gap = $$$ "\\" @@@ scan_blanks1 @@@ $$$ "\\";
@@ -185,7 +187,7 @@
fun token k ss = Token (Symbol_Pos.range ss, (k, Symbol_Pos.implode ss));
-val scan = !!! "bad input"
+val scan_ml = !!! "bad input"
(scan_char >> token Char ||
scan_string >> token String ||
scan_blanks1 >> token Space ||
@@ -207,7 +209,9 @@
fun source src =
Symbol_Pos.source (Position.line 1) src
- |> Source.source Symbol_Pos.stopper (Scan.bulk scan) (SOME (false, recover));
+ |> Source.source Symbol_Pos.stopper (Scan.bulk scan_ml) (SOME (false, recover));
+
+val tokenize = Source.of_string #> source #> Source.exhaust;
end;