# HG changeset patch # User wenzelm # Date 1390399833 -3600 # Node ID 0b7a0c1fdf7ea87c46c476ee348be642004f3a67 # Parent 1a29ea173bf9397ef5f2ca97c3c8ea8b8f058e13 inner syntax token language allows regular quoted strings; tuned signature; diff -r 1a29ea173bf9 -r 0b7a0c1fdf7e NEWS --- a/NEWS Mon Jan 20 20:38:51 2014 +0100 +++ b/NEWS Wed Jan 22 15:10:33 2014 +0100 @@ -43,6 +43,10 @@ context discipline. See also Assumption.add_assumes and the more primitive Thm.assume_hyps. +* Inner syntax token language allows regular quoted strings "..." +(only makes sense in practice, if outer syntax is delimited +differently). + *** HOL *** diff -r 1a29ea173bf9 -r 0b7a0c1fdf7e src/Doc/IsarRef/Inner_Syntax.thy --- a/src/Doc/IsarRef/Inner_Syntax.thy Mon Jan 20 20:38:51 2014 +0100 +++ b/src/Doc/IsarRef/Inner_Syntax.thy Wed Jan 22 15:10:33 2014 +0100 @@ -631,18 +631,19 @@ @{syntax_def (inner) num_token} & = & @{syntax_ref nat}@{text " | "}@{verbatim "-"}@{syntax_ref nat} \\ @{syntax_def (inner) float_token} & = & @{syntax_ref nat}@{verbatim "."}@{syntax_ref nat}@{text " | "}@{verbatim "-"}@{syntax_ref nat}@{verbatim "."}@{syntax_ref nat} \\ @{syntax_def (inner) xnum_token} & = & @{verbatim "#"}@{syntax_ref nat}@{text " | "}@{verbatim "#-"}@{syntax_ref nat} \\ - @{syntax_def (inner) str_token} & = & @{verbatim "''"} @{text "\"} @{verbatim "''"} \\ + @{syntax_def (inner) string_token} & = & @{verbatim "\""} @{text "\"} @{verbatim "\""} \\ @{syntax_def (inner) cartouche} & = & @{verbatim "\"} @{text "\"} @{verbatim "\"} \\ \end{supertabular} \end{center} The token categories @{syntax (inner) num_token}, @{syntax (inner) float_token}, @{syntax (inner) xnum_token}, @{syntax (inner) - str_token}, and @{syntax (inner) cartouche} are not used in Pure. - Object-logics may implement numerals and string literals by adding - appropriate syntax declarations, together with some translation - functions (e.g.\ see Isabelle/HOL). + str_token}, @{syntax (inner) string_token}, and @{syntax (inner) + cartouche} are not used in Pure. Object-logics may implement + numerals and string literals by adding appropriate syntax + declarations, together with some translation functions (e.g.\ see + @{file "~~/src/HOL/Tools/string_syntax.ML"}). The derived categories @{syntax_def (inner) num_const}, @{syntax_def (inner) float_const}, and @{syntax_def (inner) num_const} provide diff -r 1a29ea173bf9 -r 0b7a0c1fdf7e src/HOL/Tools/string_syntax.ML --- a/src/HOL/Tools/string_syntax.ML Mon Jan 20 20:38:51 2014 +0100 +++ b/src/HOL/Tools/string_syntax.ML Wed Jan 22 15:10:33 2014 +0100 @@ -38,24 +38,24 @@ val specials = raw_explode "\\\"`'"; fun dest_chr c1 c2 = - let val c = chr (dest_nib c1 * 16 + dest_nib c2) in - if not (member (op =) specials c) andalso Symbol.is_ascii c andalso Symbol.is_printable c - then c - else if c = "\n" then "\" + let val s = chr (dest_nib c1 * 16 + dest_nib c2) in + if not (member (op =) specials s) andalso Symbol.is_ascii s andalso Symbol.is_printable s + then s + else if s = "\n" then "\" else raise Match end; fun dest_char (Ast.Appl [Ast.Constant @{const_syntax Char}, c1, c2]) = dest_chr c1 c2 | dest_char _ = raise Match; -fun syntax_string cs = +fun syntax_string ss = Ast.Appl [Ast.Constant @{syntax_const "_inner_string"}, - Ast.Variable (Lexicon.implode_str cs)]; + Ast.Variable (Lexicon.implode_str ss)]; fun char_ast_tr [Ast.Variable str] = - (case Lexicon.explode_str str of - [c] => mk_char c + (case Lexicon.explode_str (str, Position.none) of + [(s, _)] => mk_char s | _ => error ("Single character expected: " ^ str)) | char_ast_tr [Ast.Appl [Ast.Constant @{syntax_const "_constrain"}, ast1, ast2]] = Ast.Appl [Ast.Constant @{syntax_const "_constrain"}, char_ast_tr [ast1], ast2] @@ -69,16 +69,16 @@ (* string *) fun mk_string [] = Ast.Constant @{const_syntax Nil} - | mk_string (c :: cs) = - Ast.Appl [Ast.Constant @{const_syntax Cons}, mk_char c, mk_string cs]; + | mk_string (s :: ss) = + Ast.Appl [Ast.Constant @{const_syntax Cons}, mk_char s, mk_string ss]; fun string_ast_tr [Ast.Variable str] = - (case Lexicon.explode_str str of + (case Lexicon.explode_str (str, Position.none) of [] => Ast.Appl [Ast.Constant @{syntax_const "_constrain"}, Ast.Constant @{const_syntax Nil}, Ast.Constant @{type_syntax string}] - | cs => mk_string cs) + | ss => mk_string (map Symbol_Pos.symbol ss)) | string_ast_tr [Ast.Appl [Ast.Constant @{syntax_const "_constrain"}, ast1, ast2]] = Ast.Appl [Ast.Constant @{syntax_const "_constrain"}, string_ast_tr [ast1], ast2] | string_ast_tr asts = raise Ast.AST ("string_tr", asts); diff -r 1a29ea173bf9 -r 0b7a0c1fdf7e src/Pure/Syntax/lexicon.ML --- a/src/Pure/Syntax/lexicon.ML Mon Jan 20 20:38:51 2014 +0100 +++ b/src/Pure/Syntax/lexicon.ML Wed Jan 22 15:10:33 2014 +0100 @@ -24,8 +24,8 @@ val scan_tvar: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list val is_tid: string -> bool datatype token_kind = - Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | - NumSy | FloatSy | XNumSy | StrSy | Cartouche | Space | Comment | EOF + Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | NumSy | + FloatSy | XNumSy | StrSy | StringSy | Cartouche | Space | Comment | EOF datatype token = Token of token_kind * string * Position.range val str_of_token: token -> string val pos_of_token: token -> Position.T @@ -42,8 +42,10 @@ val matching_tokens: token * token -> bool val valued_token: token -> bool val predef_term: string -> token option - val implode_str: string list -> string - val explode_str: string -> string list + val implode_string: Symbol.symbol list -> string + val explode_string: string * Position.T -> Symbol_Pos.T list + val implode_str: Symbol.symbol list -> string + val explode_str: string * Position.T -> Symbol_Pos.T list val tokenize: Scan.lexicon -> bool -> Symbol_Pos.T list -> token list val read_indexname: string -> indexname val read_var: string -> term @@ -116,8 +118,8 @@ (** datatype token **) datatype token_kind = - Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | - NumSy | FloatSy | XNumSy | StrSy | Cartouche | Space | Comment | EOF; + Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | NumSy | + FloatSy | XNumSy | StrSy | StringSy | Cartouche | Space | Comment | EOF; datatype token = Token of token_kind * string * Position.range; @@ -152,6 +154,7 @@ ("float_token", FloatSy), ("xnum_token", XNumSy), ("str_token", StrSy), + ("string_token", StringSy), ("cartouche", Cartouche)]; val terminals = map #1 terminal_kinds; @@ -172,6 +175,7 @@ | FloatSy => Markup.numeral | XNumSy => Markup.numeral | StrSy => Markup.inner_string + | StringSy => Markup.inner_string | Cartouche => Markup.inner_cartouche | Comment => Markup.inner_comment | _ => Markup.empty; @@ -207,7 +211,25 @@ | NONE => NONE); -(* str tokens *) + +(** string literals **) + +fun explode_literal scan_body (str, pos) = + (case Scan.read Symbol_Pos.stopper scan_body (Symbol_Pos.explode (str, pos)) of + SOME ss => ss + | _ => error (err_prefix ^ "malformed string literal " ^ quote str ^ Position.here pos)); + + +(* string *) + +val scan_string = Scan.trace (Symbol_Pos.scan_string_qq err_prefix) >> #2; +val scan_string_body = Symbol_Pos.scan_string_qq err_prefix >> (#1 o #2); + +fun implode_string ss = quote (implode (map (fn "\"" => "\\\"" | s => s) ss)); +val explode_string = explode_literal scan_string_body; + + +(* str *) val scan_chr = $$ "\\" |-- $$$ "'" || @@ -226,13 +248,8 @@ !!! "unclosed string literal" ($$ "'" |-- $$ "'" |-- (Scan.repeat scan_chr >> flat) --| $$ "'" --| $$ "'"); - -fun implode_str cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs)); - -fun explode_str str = - (case Scan.read Symbol_Pos.stopper scan_str_body (Symbol_Pos.explode (str, Position.none)) of - SOME cs => map Symbol_Pos.symbol cs - | _ => error ("Inner lexical error: literal string expected at " ^ quote str)); +fun implode_str ss = enclose "''" "''" (implode (map (fn "'" => "\\'" | s => s) ss)); +val explode_str = explode_literal scan_str_body; @@ -265,14 +282,15 @@ Symbol_Pos.scan_cartouche err_prefix >> token Cartouche || Symbol_Pos.scan_comment err_prefix >> token Comment || Scan.max token_leq scan_lit scan_val || + scan_string >> token StringSy || scan_str >> token StrSy || Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol) >> token Space; in (case Scan.error (Scan.finite Symbol_Pos.stopper (Scan.repeat scan_token)) syms of (toks, []) => toks - | (_, ss) => error ("Inner lexical error at: " ^ Symbol_Pos.content ss ^ - Position.here (#1 (Symbol_Pos.range ss)))) + | (_, ss) => + error (err_prefix ^ Symbol_Pos.content ss ^ Position.here (#1 (Symbol_Pos.range ss)))) end; diff -r 1a29ea173bf9 -r 0b7a0c1fdf7e src/Pure/pure_thy.ML --- a/src/Pure/pure_thy.ML Mon Jan 20 20:38:51 2014 +0100 +++ b/src/Pure/pure_thy.ML Wed Jan 22 15:10:33 2014 +0100 @@ -70,8 +70,9 @@ "args", "cargs", "pttrn", "pttrns", "idt", "idts", "aprop", "asms", "any", "prop'", "num_const", "float_const", "xnum_const", "num_position", "float_position", "xnum_position", "index", "struct", "tid_position", - "tvar_position", "id_position", "longid_position", "var_position", "str_position", - "cartouche_position", "type_name", "class_name"])) + "tvar_position", "id_position", "longid_position", "var_position", + "str_position", "string_position", "cartouche_position", "type_name", + "class_name"])) #> Sign.add_syntax_i (map (fn x => (x, typ "'a", NoSyn)) token_markers) #> Sign.add_syntax_i [("", typ "prop' => prop", Delimfix "_"), @@ -155,6 +156,7 @@ ("_position", typ "longid => longid_position", Delimfix "_"), ("_position", typ "var => var_position", Delimfix "_"), ("_position", typ "str_token => str_position", Delimfix "_"), + ("_position", typ "string_token => string_position", Delimfix "_"), ("_position", typ "cartouche => cartouche_position", Delimfix "_"), ("_type_constraint_", typ "'a", NoSyn), ("_context_const", typ "id_position => logic", Delimfix "CONST _"),