inner syntax token language allows regular quoted strings;
authorwenzelm
Wed, 22 Jan 2014 15:10:33 +0100
changeset 55108 0b7a0c1fdf7e
parent 55107 1a29ea173bf9
child 55109 ecff9e26360c
inner syntax token language allows regular quoted strings; tuned signature;
NEWS
src/Doc/IsarRef/Inner_Syntax.thy
src/HOL/Tools/string_syntax.ML
src/Pure/Syntax/lexicon.ML
src/Pure/pure_thy.ML
--- a/NEWS	Mon Jan 20 20:38:51 2014 +0100
+++ b/NEWS	Wed Jan 22 15:10:33 2014 +0100
@@ -43,6 +43,10 @@
 context discipline.  See also Assumption.add_assumes and the more
 primitive Thm.assume_hyps.
 
+* Inner syntax token language allows regular quoted strings "..."
+(only makes sense in practice, if outer syntax is delimited
+differently).
+
 
 *** HOL ***
 
--- a/src/Doc/IsarRef/Inner_Syntax.thy	Mon Jan 20 20:38:51 2014 +0100
+++ b/src/Doc/IsarRef/Inner_Syntax.thy	Wed Jan 22 15:10:33 2014 +0100
@@ -631,18 +631,19 @@
     @{syntax_def (inner) num_token} & = & @{syntax_ref nat}@{text "  |  "}@{verbatim "-"}@{syntax_ref nat} \\
     @{syntax_def (inner) float_token} & = & @{syntax_ref nat}@{verbatim "."}@{syntax_ref nat}@{text "  |  "}@{verbatim "-"}@{syntax_ref nat}@{verbatim "."}@{syntax_ref nat} \\
     @{syntax_def (inner) xnum_token} & = & @{verbatim "#"}@{syntax_ref nat}@{text "  |  "}@{verbatim "#-"}@{syntax_ref nat} \\
-
     @{syntax_def (inner) str_token} & = & @{verbatim "''"} @{text "\<dots>"} @{verbatim "''"} \\
+    @{syntax_def (inner) string_token} & = & @{verbatim "\""} @{text "\<dots>"} @{verbatim "\""} \\
     @{syntax_def (inner) cartouche} & = & @{verbatim "\<open>"} @{text "\<dots>"} @{verbatim "\<close>"} \\
   \end{supertabular}
   \end{center}
 
   The token categories @{syntax (inner) num_token}, @{syntax (inner)
   float_token}, @{syntax (inner) xnum_token}, @{syntax (inner)
-  str_token}, and @{syntax (inner) cartouche} are not used in Pure.
-  Object-logics may implement numerals and string literals by adding
-  appropriate syntax declarations, together with some translation
-  functions (e.g.\ see Isabelle/HOL).
+  str_token}, @{syntax (inner) string_token}, and @{syntax (inner)
+  cartouche} are not used in Pure. Object-logics may implement
+  numerals and string literals by adding appropriate syntax
+  declarations, together with some translation functions (e.g.\ see
+  @{file "~~/src/HOL/Tools/string_syntax.ML"}).
 
   The derived categories @{syntax_def (inner) num_const}, @{syntax_def
   (inner) float_const}, and @{syntax_def (inner) num_const} provide
--- a/src/HOL/Tools/string_syntax.ML	Mon Jan 20 20:38:51 2014 +0100
+++ b/src/HOL/Tools/string_syntax.ML	Wed Jan 22 15:10:33 2014 +0100
@@ -38,24 +38,24 @@
 val specials = raw_explode "\\\"`'";
 
 fun dest_chr c1 c2 =
-  let val c = chr (dest_nib c1 * 16 + dest_nib c2) in
-    if not (member (op =) specials c) andalso Symbol.is_ascii c andalso Symbol.is_printable c
-    then c
-    else if c = "\n" then "\<newline>"
+  let val s = chr (dest_nib c1 * 16 + dest_nib c2) in
+    if not (member (op =) specials s) andalso Symbol.is_ascii s andalso Symbol.is_printable s
+    then s
+    else if s = "\n" then "\<newline>"
     else raise Match
   end;
 
 fun dest_char (Ast.Appl [Ast.Constant @{const_syntax Char}, c1, c2]) = dest_chr c1 c2
   | dest_char _ = raise Match;
 
-fun syntax_string cs =
+fun syntax_string ss =
   Ast.Appl [Ast.Constant @{syntax_const "_inner_string"},
-    Ast.Variable (Lexicon.implode_str cs)];
+    Ast.Variable (Lexicon.implode_str ss)];
 
 
 fun char_ast_tr [Ast.Variable str] =
-      (case Lexicon.explode_str str of
-        [c] => mk_char c
+      (case Lexicon.explode_str (str, Position.none) of
+        [(s, _)] => mk_char s
       | _ => error ("Single character expected: " ^ str))
   | char_ast_tr [Ast.Appl [Ast.Constant @{syntax_const "_constrain"}, ast1, ast2]] =
       Ast.Appl [Ast.Constant @{syntax_const "_constrain"}, char_ast_tr [ast1], ast2]
@@ -69,16 +69,16 @@
 (* string *)
 
 fun mk_string [] = Ast.Constant @{const_syntax Nil}
-  | mk_string (c :: cs) =
-      Ast.Appl [Ast.Constant @{const_syntax Cons}, mk_char c, mk_string cs];
+  | mk_string (s :: ss) =
+      Ast.Appl [Ast.Constant @{const_syntax Cons}, mk_char s, mk_string ss];
 
 fun string_ast_tr [Ast.Variable str] =
-      (case Lexicon.explode_str str of
+      (case Lexicon.explode_str (str, Position.none) of
         [] =>
           Ast.Appl
             [Ast.Constant @{syntax_const "_constrain"},
               Ast.Constant @{const_syntax Nil}, Ast.Constant @{type_syntax string}]
-      | cs => mk_string cs)
+      | ss => mk_string (map Symbol_Pos.symbol ss))
   | string_ast_tr [Ast.Appl [Ast.Constant @{syntax_const "_constrain"}, ast1, ast2]] =
       Ast.Appl [Ast.Constant @{syntax_const "_constrain"}, string_ast_tr [ast1], ast2]
   | string_ast_tr asts = raise Ast.AST ("string_tr", asts);
--- a/src/Pure/Syntax/lexicon.ML	Mon Jan 20 20:38:51 2014 +0100
+++ b/src/Pure/Syntax/lexicon.ML	Wed Jan 22 15:10:33 2014 +0100
@@ -24,8 +24,8 @@
   val scan_tvar: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
   val is_tid: string -> bool
   datatype token_kind =
-    Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy |
-    NumSy | FloatSy | XNumSy | StrSy | Cartouche | Space | Comment | EOF
+    Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | NumSy |
+    FloatSy | XNumSy | StrSy | StringSy | Cartouche | Space | Comment | EOF
   datatype token = Token of token_kind * string * Position.range
   val str_of_token: token -> string
   val pos_of_token: token -> Position.T
@@ -42,8 +42,10 @@
   val matching_tokens: token * token -> bool
   val valued_token: token -> bool
   val predef_term: string -> token option
-  val implode_str: string list -> string
-  val explode_str: string -> string list
+  val implode_string: Symbol.symbol list -> string
+  val explode_string: string * Position.T -> Symbol_Pos.T list
+  val implode_str: Symbol.symbol list -> string
+  val explode_str: string * Position.T -> Symbol_Pos.T list
   val tokenize: Scan.lexicon -> bool -> Symbol_Pos.T list -> token list
   val read_indexname: string -> indexname
   val read_var: string -> term
@@ -116,8 +118,8 @@
 (** datatype token **)
 
 datatype token_kind =
-  Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy |
-  NumSy | FloatSy | XNumSy | StrSy | Cartouche | Space | Comment | EOF;
+  Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | NumSy |
+  FloatSy | XNumSy | StrSy | StringSy | Cartouche | Space | Comment | EOF;
 
 datatype token = Token of token_kind * string * Position.range;
 
@@ -152,6 +154,7 @@
   ("float_token", FloatSy),
   ("xnum_token", XNumSy),
   ("str_token", StrSy),
+  ("string_token", StringSy),
   ("cartouche", Cartouche)];
 
 val terminals = map #1 terminal_kinds;
@@ -172,6 +175,7 @@
   | FloatSy => Markup.numeral
   | XNumSy => Markup.numeral
   | StrSy => Markup.inner_string
+  | StringSy => Markup.inner_string
   | Cartouche => Markup.inner_cartouche
   | Comment => Markup.inner_comment
   | _ => Markup.empty;
@@ -207,7 +211,25 @@
   | NONE => NONE);
 
 
-(* str tokens *)
+
+(** string literals **)
+
+fun explode_literal scan_body (str, pos) =
+  (case Scan.read Symbol_Pos.stopper scan_body (Symbol_Pos.explode (str, pos)) of
+    SOME ss => ss
+  | _ => error (err_prefix ^ "malformed string literal " ^ quote str ^ Position.here pos));
+
+
+(* string *)
+
+val scan_string = Scan.trace (Symbol_Pos.scan_string_qq err_prefix) >> #2;
+val scan_string_body = Symbol_Pos.scan_string_qq err_prefix >> (#1 o #2);
+
+fun implode_string ss = quote (implode (map (fn "\"" => "\\\"" | s => s) ss));
+val explode_string = explode_literal scan_string_body;
+
+
+(* str *)
 
 val scan_chr =
   $$ "\\" |-- $$$ "'" ||
@@ -226,13 +248,8 @@
     !!! "unclosed string literal"
       ($$ "'" |-- $$ "'" |-- (Scan.repeat scan_chr >> flat) --| $$ "'" --| $$ "'");
 
-
-fun implode_str cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs));
-
-fun explode_str str =
-  (case Scan.read Symbol_Pos.stopper scan_str_body (Symbol_Pos.explode (str, Position.none)) of
-    SOME cs => map Symbol_Pos.symbol cs
-  | _ => error ("Inner lexical error: literal string expected at " ^ quote str));
+fun implode_str ss = enclose "''" "''" (implode (map (fn "'" => "\\'" | s => s) ss));
+val explode_str = explode_literal scan_str_body;
 
 
 
@@ -265,14 +282,15 @@
       Symbol_Pos.scan_cartouche err_prefix >> token Cartouche ||
       Symbol_Pos.scan_comment err_prefix >> token Comment ||
       Scan.max token_leq scan_lit scan_val ||
+      scan_string >> token StringSy ||
       scan_str >> token StrSy ||
       Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol) >> token Space;
   in
     (case Scan.error
         (Scan.finite Symbol_Pos.stopper (Scan.repeat scan_token)) syms of
       (toks, []) => toks
-    | (_, ss) => error ("Inner lexical error at: " ^ Symbol_Pos.content ss ^
-        Position.here (#1 (Symbol_Pos.range ss))))
+    | (_, ss) =>
+        error (err_prefix ^ Symbol_Pos.content ss ^ Position.here (#1 (Symbol_Pos.range ss))))
   end;
 
 
--- a/src/Pure/pure_thy.ML	Mon Jan 20 20:38:51 2014 +0100
+++ b/src/Pure/pure_thy.ML	Wed Jan 22 15:10:33 2014 +0100
@@ -70,8 +70,9 @@
         "args", "cargs", "pttrn", "pttrns", "idt", "idts", "aprop", "asms",
         "any", "prop'", "num_const", "float_const", "xnum_const", "num_position",
         "float_position", "xnum_position", "index", "struct", "tid_position",
-        "tvar_position", "id_position", "longid_position", "var_position", "str_position",
-        "cartouche_position", "type_name", "class_name"]))
+        "tvar_position", "id_position", "longid_position", "var_position",
+        "str_position", "string_position", "cartouche_position", "type_name",
+        "class_name"]))
   #> Sign.add_syntax_i (map (fn x => (x, typ "'a", NoSyn)) token_markers)
   #> Sign.add_syntax_i
    [("",            typ "prop' => prop",               Delimfix "_"),
@@ -155,6 +156,7 @@
     ("_position",   typ "longid => longid_position",   Delimfix "_"),
     ("_position",   typ "var => var_position",         Delimfix "_"),
     ("_position",   typ "str_token => str_position",   Delimfix "_"),
+    ("_position",   typ "string_token => string_position", Delimfix "_"),
     ("_position",   typ "cartouche => cartouche_position", Delimfix "_"),
     ("_type_constraint_", typ "'a",                    NoSyn),
     ("_context_const", typ "id_position => logic",     Delimfix "CONST _"),