--- a/src/Pure/Syntax/lexicon.ML Mon Jan 20 20:38:51 2014 +0100
+++ b/src/Pure/Syntax/lexicon.ML Wed Jan 22 15:10:33 2014 +0100
@@ -24,8 +24,8 @@
val scan_tvar: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
val is_tid: string -> bool
datatype token_kind =
- Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy |
- NumSy | FloatSy | XNumSy | StrSy | Cartouche | Space | Comment | EOF
+ Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | NumSy |
+ FloatSy | XNumSy | StrSy | StringSy | Cartouche | Space | Comment | EOF
datatype token = Token of token_kind * string * Position.range
val str_of_token: token -> string
val pos_of_token: token -> Position.T
@@ -42,8 +42,10 @@
val matching_tokens: token * token -> bool
val valued_token: token -> bool
val predef_term: string -> token option
- val implode_str: string list -> string
- val explode_str: string -> string list
+ val implode_string: Symbol.symbol list -> string
+ val explode_string: string * Position.T -> Symbol_Pos.T list
+ val implode_str: Symbol.symbol list -> string
+ val explode_str: string * Position.T -> Symbol_Pos.T list
val tokenize: Scan.lexicon -> bool -> Symbol_Pos.T list -> token list
val read_indexname: string -> indexname
val read_var: string -> term
@@ -116,8 +118,8 @@
(** datatype token **)
datatype token_kind =
- Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy |
- NumSy | FloatSy | XNumSy | StrSy | Cartouche | Space | Comment | EOF;
+ Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | NumSy |
+ FloatSy | XNumSy | StrSy | StringSy | Cartouche | Space | Comment | EOF;
datatype token = Token of token_kind * string * Position.range;
@@ -152,6 +154,7 @@
("float_token", FloatSy),
("xnum_token", XNumSy),
("str_token", StrSy),
+ ("string_token", StringSy),
("cartouche", Cartouche)];
val terminals = map #1 terminal_kinds;
@@ -172,6 +175,7 @@
| FloatSy => Markup.numeral
| XNumSy => Markup.numeral
| StrSy => Markup.inner_string
+ | StringSy => Markup.inner_string
| Cartouche => Markup.inner_cartouche
| Comment => Markup.inner_comment
| _ => Markup.empty;
@@ -207,7 +211,25 @@
| NONE => NONE);
-(* str tokens *)
+
+(** string literals **)
+
+fun explode_literal scan_body (str, pos) =
+ (case Scan.read Symbol_Pos.stopper scan_body (Symbol_Pos.explode (str, pos)) of
+ SOME ss => ss
+ | _ => error (err_prefix ^ "malformed string literal " ^ quote str ^ Position.here pos));
+
+
+(* string *)
+
+val scan_string = Scan.trace (Symbol_Pos.scan_string_qq err_prefix) >> #2;
+val scan_string_body = Symbol_Pos.scan_string_qq err_prefix >> (#1 o #2);
+
+fun implode_string ss = quote (implode (map (fn "\"" => "\\\"" | s => s) ss));
+val explode_string = explode_literal scan_string_body;
+
+
+(* str *)
val scan_chr =
$$ "\\" |-- $$$ "'" ||
@@ -226,13 +248,8 @@
!!! "unclosed string literal"
($$ "'" |-- $$ "'" |-- (Scan.repeat scan_chr >> flat) --| $$ "'" --| $$ "'");
-
-fun implode_str cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs));
-
-fun explode_str str =
- (case Scan.read Symbol_Pos.stopper scan_str_body (Symbol_Pos.explode (str, Position.none)) of
- SOME cs => map Symbol_Pos.symbol cs
- | _ => error ("Inner lexical error: literal string expected at " ^ quote str));
+fun implode_str ss = enclose "''" "''" (implode (map (fn "'" => "\\'" | s => s) ss));
+val explode_str = explode_literal scan_str_body;
@@ -265,14 +282,15 @@
Symbol_Pos.scan_cartouche err_prefix >> token Cartouche ||
Symbol_Pos.scan_comment err_prefix >> token Comment ||
Scan.max token_leq scan_lit scan_val ||
+ scan_string >> token StringSy ||
scan_str >> token StrSy ||
Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol) >> token Space;
in
(case Scan.error
(Scan.finite Symbol_Pos.stopper (Scan.repeat scan_token)) syms of
(toks, []) => toks
- | (_, ss) => error ("Inner lexical error at: " ^ Symbol_Pos.content ss ^
- Position.here (#1 (Symbol_Pos.range ss))))
+ | (_, ss) =>
+ error (err_prefix ^ Symbol_Pos.content ss ^ Position.here (#1 (Symbol_Pos.range ss))))
end;