--- a/NEWS Fri Dec 07 22:19:49 2007 +0100
+++ b/NEWS Fri Dec 07 22:19:51 2007 +0100
@@ -6,8 +6,12 @@
*** General ***
-* Symbol \<chi> is now considered a letter. Potential INCOMPATIBILITY
-in identifier syntax etc.
+* Syntax: symbol \<chi> is now considered a letter. Potential
+INCOMPATIBILITY in identifier syntax etc.
+
+* Outer syntax: string tokens may contain arbitrary character codes
+specified via 3 decimal digits (as in SML). E.g. "foo\095bar" for
+"foo_bar".
*** Pure ***
--- a/doc-src/IsarRef/syntax.tex Fri Dec 07 22:19:49 2007 +0100
+++ b/doc-src/IsarRef/syntax.tex Fri Dec 07 22:19:51 2007 +0100
@@ -102,13 +102,16 @@
& & \verb,\<Upsilon>, ~|~ \verb,\<Phi>, ~|~ \verb,\<Psi>, ~|~ \verb,\<Omega>, \\
\end{matharray}
-The syntax of $string$ admits any characters, including newlines; ``\verb|"|''
-(double-quote) and ``\verb|\|'' (backslash) need to be escaped by a backslash.
-Alternative strings according to $altstring$ are analogous, using single
-back-quotes instead. The body of $verbatim$ may consist of any text not
-containing ``\verb|*}|''; this allows convenient inclusion of quotes without
-further escapes. The greek letters do \emph{not} include \verb,\<lambda>,,
-which is already used differently in the meta-logic.
+The syntax of $string$ admits any characters, including newlines;
+``\verb|"|'' (double-quote) and ``\verb|\|'' (backslash) need to be
+escaped by a backslash; arbitrary character codes may be specified as
+``\verb|\|$ddd$'', with 3 decimal digits as in SML. Alternative
+strings according to $altstring$ are analogous, using single
+back-quotes instead. The body of $verbatim$ may consist of any text
+not containing ``\verb|*}|''; this allows convenient inclusion of
+quotes without further escapes. The greek letters do \emph{not}
+include \verb,\<lambda>,, which is already used differently in the
+meta-logic.
Common mathematical symbols such as $\forall$ are represented in Isabelle as
\verb,\<forall>,. There are infinitely many legal symbols like this, although
--- a/src/Pure/Isar/outer_lex.ML Fri Dec 07 22:19:49 2007 +0100
+++ b/src/Pure/Isar/outer_lex.ML Fri Dec 07 22:19:51 2007 +0100
@@ -220,10 +220,17 @@
local
+val char_code =
+ Scan.one Symbol.is_ascii_digit --
+ Scan.one Symbol.is_ascii_digit --
+ Scan.one Symbol.is_ascii_digit :|-- (fn ((a, b), c) =>
+ let val (n, _) = Library.read_int [a, b, c]
+ in if n <= 255 then Scan.succeed (chr n) else Scan.fail end);
+
fun scan_str q =
scan_blank ||
keep_line ($$ "\\") |-- !!! "bad escape character in string"
- (scan_blank || keep_line ($$ q || $$ "\\")) ||
+ (scan_blank || keep_line ($$ q || $$ "\\" || char_code)) ||
keep_line (Scan.one (fn s => s <> q andalso s <> "\\" andalso Symbol.is_regular s));
fun scan_strs q =