inner syntax token language allows regular quoted strings;
authorwenzelm
Wed Jan 22 15:10:33 2014 +0100 (2014-01-22)
changeset 551080b7a0c1fdf7e
parent 55107 1a29ea173bf9
child 55109 ecff9e26360c
inner syntax token language allows regular quoted strings;
tuned signature;
NEWS
src/Doc/IsarRef/Inner_Syntax.thy
src/HOL/Tools/string_syntax.ML
src/Pure/Syntax/lexicon.ML
src/Pure/pure_thy.ML
     1.1 --- a/NEWS	Mon Jan 20 20:38:51 2014 +0100
     1.2 +++ b/NEWS	Wed Jan 22 15:10:33 2014 +0100
     1.3 @@ -43,6 +43,10 @@
     1.4  context discipline.  See also Assumption.add_assumes and the more
     1.5  primitive Thm.assume_hyps.
     1.6  
     1.7 +* Inner syntax token language allows regular quoted strings "..."
     1.8 +(only makes sense in practice, if outer syntax is delimited
     1.9 +differently).
    1.10 +
    1.11  
    1.12  *** HOL ***
    1.13  
     2.1 --- a/src/Doc/IsarRef/Inner_Syntax.thy	Mon Jan 20 20:38:51 2014 +0100
     2.2 +++ b/src/Doc/IsarRef/Inner_Syntax.thy	Wed Jan 22 15:10:33 2014 +0100
     2.3 @@ -631,18 +631,19 @@
     2.4      @{syntax_def (inner) num_token} & = & @{syntax_ref nat}@{text "  |  "}@{verbatim "-"}@{syntax_ref nat} \\
     2.5      @{syntax_def (inner) float_token} & = & @{syntax_ref nat}@{verbatim "."}@{syntax_ref nat}@{text "  |  "}@{verbatim "-"}@{syntax_ref nat}@{verbatim "."}@{syntax_ref nat} \\
     2.6      @{syntax_def (inner) xnum_token} & = & @{verbatim "#"}@{syntax_ref nat}@{text "  |  "}@{verbatim "#-"}@{syntax_ref nat} \\
     2.7 -
     2.8      @{syntax_def (inner) str_token} & = & @{verbatim "''"} @{text "\<dots>"} @{verbatim "''"} \\
     2.9 +    @{syntax_def (inner) string_token} & = & @{verbatim "\""} @{text "\<dots>"} @{verbatim "\""} \\
    2.10      @{syntax_def (inner) cartouche} & = & @{verbatim "\<open>"} @{text "\<dots>"} @{verbatim "\<close>"} \\
    2.11    \end{supertabular}
    2.12    \end{center}
    2.13  
    2.14    The token categories @{syntax (inner) num_token}, @{syntax (inner)
    2.15    float_token}, @{syntax (inner) xnum_token}, @{syntax (inner)
    2.16 -  str_token}, and @{syntax (inner) cartouche} are not used in Pure.
    2.17 -  Object-logics may implement numerals and string literals by adding
    2.18 -  appropriate syntax declarations, together with some translation
    2.19 -  functions (e.g.\ see Isabelle/HOL).
    2.20 +  str_token}, @{syntax (inner) string_token}, and @{syntax (inner)
    2.21 +  cartouche} are not used in Pure. Object-logics may implement
    2.22 +  numerals and string literals by adding appropriate syntax
    2.23 +  declarations, together with some translation functions (e.g.\ see
    2.24 +  @{file "~~/src/HOL/Tools/string_syntax.ML"}).
    2.25  
    2.26    The derived categories @{syntax_def (inner) num_const}, @{syntax_def
    2.27    (inner) float_const}, and @{syntax_def (inner) num_const} provide
     3.1 --- a/src/HOL/Tools/string_syntax.ML	Mon Jan 20 20:38:51 2014 +0100
     3.2 +++ b/src/HOL/Tools/string_syntax.ML	Wed Jan 22 15:10:33 2014 +0100
     3.3 @@ -38,24 +38,24 @@
     3.4  val specials = raw_explode "\\\"`'";
     3.5  
     3.6  fun dest_chr c1 c2 =
     3.7 -  let val c = chr (dest_nib c1 * 16 + dest_nib c2) in
     3.8 -    if not (member (op =) specials c) andalso Symbol.is_ascii c andalso Symbol.is_printable c
     3.9 -    then c
    3.10 -    else if c = "\n" then "\<newline>"
    3.11 +  let val s = chr (dest_nib c1 * 16 + dest_nib c2) in
    3.12 +    if not (member (op =) specials s) andalso Symbol.is_ascii s andalso Symbol.is_printable s
    3.13 +    then s
    3.14 +    else if s = "\n" then "\<newline>"
    3.15      else raise Match
    3.16    end;
    3.17  
    3.18  fun dest_char (Ast.Appl [Ast.Constant @{const_syntax Char}, c1, c2]) = dest_chr c1 c2
    3.19    | dest_char _ = raise Match;
    3.20  
    3.21 -fun syntax_string cs =
    3.22 +fun syntax_string ss =
    3.23    Ast.Appl [Ast.Constant @{syntax_const "_inner_string"},
    3.24 -    Ast.Variable (Lexicon.implode_str cs)];
    3.25 +    Ast.Variable (Lexicon.implode_str ss)];
    3.26  
    3.27  
    3.28  fun char_ast_tr [Ast.Variable str] =
    3.29 -      (case Lexicon.explode_str str of
    3.30 -        [c] => mk_char c
    3.31 +      (case Lexicon.explode_str (str, Position.none) of
    3.32 +        [(s, _)] => mk_char s
    3.33        | _ => error ("Single character expected: " ^ str))
    3.34    | char_ast_tr [Ast.Appl [Ast.Constant @{syntax_const "_constrain"}, ast1, ast2]] =
    3.35        Ast.Appl [Ast.Constant @{syntax_const "_constrain"}, char_ast_tr [ast1], ast2]
    3.36 @@ -69,16 +69,16 @@
    3.37  (* string *)
    3.38  
    3.39  fun mk_string [] = Ast.Constant @{const_syntax Nil}
    3.40 -  | mk_string (c :: cs) =
    3.41 -      Ast.Appl [Ast.Constant @{const_syntax Cons}, mk_char c, mk_string cs];
    3.42 +  | mk_string (s :: ss) =
    3.43 +      Ast.Appl [Ast.Constant @{const_syntax Cons}, mk_char s, mk_string ss];
    3.44  
    3.45  fun string_ast_tr [Ast.Variable str] =
    3.46 -      (case Lexicon.explode_str str of
    3.47 +      (case Lexicon.explode_str (str, Position.none) of
    3.48          [] =>
    3.49            Ast.Appl
    3.50              [Ast.Constant @{syntax_const "_constrain"},
    3.51                Ast.Constant @{const_syntax Nil}, Ast.Constant @{type_syntax string}]
    3.52 -      | cs => mk_string cs)
    3.53 +      | ss => mk_string (map Symbol_Pos.symbol ss))
    3.54    | string_ast_tr [Ast.Appl [Ast.Constant @{syntax_const "_constrain"}, ast1, ast2]] =
    3.55        Ast.Appl [Ast.Constant @{syntax_const "_constrain"}, string_ast_tr [ast1], ast2]
    3.56    | string_ast_tr asts = raise Ast.AST ("string_tr", asts);
     4.1 --- a/src/Pure/Syntax/lexicon.ML	Mon Jan 20 20:38:51 2014 +0100
     4.2 +++ b/src/Pure/Syntax/lexicon.ML	Wed Jan 22 15:10:33 2014 +0100
     4.3 @@ -24,8 +24,8 @@
     4.4    val scan_tvar: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list
     4.5    val is_tid: string -> bool
     4.6    datatype token_kind =
     4.7 -    Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy |
     4.8 -    NumSy | FloatSy | XNumSy | StrSy | Cartouche | Space | Comment | EOF
     4.9 +    Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | NumSy |
    4.10 +    FloatSy | XNumSy | StrSy | StringSy | Cartouche | Space | Comment | EOF
    4.11    datatype token = Token of token_kind * string * Position.range
    4.12    val str_of_token: token -> string
    4.13    val pos_of_token: token -> Position.T
    4.14 @@ -42,8 +42,10 @@
    4.15    val matching_tokens: token * token -> bool
    4.16    val valued_token: token -> bool
    4.17    val predef_term: string -> token option
    4.18 -  val implode_str: string list -> string
    4.19 -  val explode_str: string -> string list
    4.20 +  val implode_string: Symbol.symbol list -> string
    4.21 +  val explode_string: string * Position.T -> Symbol_Pos.T list
    4.22 +  val implode_str: Symbol.symbol list -> string
    4.23 +  val explode_str: string * Position.T -> Symbol_Pos.T list
    4.24    val tokenize: Scan.lexicon -> bool -> Symbol_Pos.T list -> token list
    4.25    val read_indexname: string -> indexname
    4.26    val read_var: string -> term
    4.27 @@ -116,8 +118,8 @@
    4.28  (** datatype token **)
    4.29  
    4.30  datatype token_kind =
    4.31 -  Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy |
    4.32 -  NumSy | FloatSy | XNumSy | StrSy | Cartouche | Space | Comment | EOF;
    4.33 +  Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | NumSy |
    4.34 +  FloatSy | XNumSy | StrSy | StringSy | Cartouche | Space | Comment | EOF;
    4.35  
    4.36  datatype token = Token of token_kind * string * Position.range;
    4.37  
    4.38 @@ -152,6 +154,7 @@
    4.39    ("float_token", FloatSy),
    4.40    ("xnum_token", XNumSy),
    4.41    ("str_token", StrSy),
    4.42 +  ("string_token", StringSy),
    4.43    ("cartouche", Cartouche)];
    4.44  
    4.45  val terminals = map #1 terminal_kinds;
    4.46 @@ -172,6 +175,7 @@
    4.47    | FloatSy => Markup.numeral
    4.48    | XNumSy => Markup.numeral
    4.49    | StrSy => Markup.inner_string
    4.50 +  | StringSy => Markup.inner_string
    4.51    | Cartouche => Markup.inner_cartouche
    4.52    | Comment => Markup.inner_comment
    4.53    | _ => Markup.empty;
    4.54 @@ -207,7 +211,25 @@
    4.55    | NONE => NONE);
    4.56  
    4.57  
    4.58 -(* str tokens *)
    4.59 +
    4.60 +(** string literals **)
    4.61 +
    4.62 +fun explode_literal scan_body (str, pos) =
    4.63 +  (case Scan.read Symbol_Pos.stopper scan_body (Symbol_Pos.explode (str, pos)) of
    4.64 +    SOME ss => ss
    4.65 +  | _ => error (err_prefix ^ "malformed string literal " ^ quote str ^ Position.here pos));
    4.66 +
    4.67 +
    4.68 +(* string *)
    4.69 +
    4.70 +val scan_string = Scan.trace (Symbol_Pos.scan_string_qq err_prefix) >> #2;
    4.71 +val scan_string_body = Symbol_Pos.scan_string_qq err_prefix >> (#1 o #2);
    4.72 +
    4.73 +fun implode_string ss = quote (implode (map (fn "\"" => "\\\"" | s => s) ss));
    4.74 +val explode_string = explode_literal scan_string_body;
    4.75 +
    4.76 +
    4.77 +(* str *)
    4.78  
    4.79  val scan_chr =
    4.80    $$ "\\" |-- $$$ "'" ||
    4.81 @@ -226,13 +248,8 @@
    4.82      !!! "unclosed string literal"
    4.83        ($$ "'" |-- $$ "'" |-- (Scan.repeat scan_chr >> flat) --| $$ "'" --| $$ "'");
    4.84  
    4.85 -
    4.86 -fun implode_str cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs));
    4.87 -
    4.88 -fun explode_str str =
    4.89 -  (case Scan.read Symbol_Pos.stopper scan_str_body (Symbol_Pos.explode (str, Position.none)) of
    4.90 -    SOME cs => map Symbol_Pos.symbol cs
    4.91 -  | _ => error ("Inner lexical error: literal string expected at " ^ quote str));
    4.92 +fun implode_str ss = enclose "''" "''" (implode (map (fn "'" => "\\'" | s => s) ss));
    4.93 +val explode_str = explode_literal scan_str_body;
    4.94  
    4.95  
    4.96  
    4.97 @@ -265,14 +282,15 @@
    4.98        Symbol_Pos.scan_cartouche err_prefix >> token Cartouche ||
    4.99        Symbol_Pos.scan_comment err_prefix >> token Comment ||
   4.100        Scan.max token_leq scan_lit scan_val ||
   4.101 +      scan_string >> token StringSy ||
   4.102        scan_str >> token StrSy ||
   4.103        Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol) >> token Space;
   4.104    in
   4.105      (case Scan.error
   4.106          (Scan.finite Symbol_Pos.stopper (Scan.repeat scan_token)) syms of
   4.107        (toks, []) => toks
   4.108 -    | (_, ss) => error ("Inner lexical error at: " ^ Symbol_Pos.content ss ^
   4.109 -        Position.here (#1 (Symbol_Pos.range ss))))
   4.110 +    | (_, ss) =>
   4.111 +        error (err_prefix ^ Symbol_Pos.content ss ^ Position.here (#1 (Symbol_Pos.range ss))))
   4.112    end;
   4.113  
   4.114  
     5.1 --- a/src/Pure/pure_thy.ML	Mon Jan 20 20:38:51 2014 +0100
     5.2 +++ b/src/Pure/pure_thy.ML	Wed Jan 22 15:10:33 2014 +0100
     5.3 @@ -70,8 +70,9 @@
     5.4          "args", "cargs", "pttrn", "pttrns", "idt", "idts", "aprop", "asms",
     5.5          "any", "prop'", "num_const", "float_const", "xnum_const", "num_position",
     5.6          "float_position", "xnum_position", "index", "struct", "tid_position",
     5.7 -        "tvar_position", "id_position", "longid_position", "var_position", "str_position",
     5.8 -        "cartouche_position", "type_name", "class_name"]))
     5.9 +        "tvar_position", "id_position", "longid_position", "var_position",
    5.10 +        "str_position", "string_position", "cartouche_position", "type_name",
    5.11 +        "class_name"]))
    5.12    #> Sign.add_syntax_i (map (fn x => (x, typ "'a", NoSyn)) token_markers)
    5.13    #> Sign.add_syntax_i
    5.14     [("",            typ "prop' => prop",               Delimfix "_"),
    5.15 @@ -155,6 +156,7 @@
    5.16      ("_position",   typ "longid => longid_position",   Delimfix "_"),
    5.17      ("_position",   typ "var => var_position",         Delimfix "_"),
    5.18      ("_position",   typ "str_token => str_position",   Delimfix "_"),
    5.19 +    ("_position",   typ "string_token => string_position", Delimfix "_"),
    5.20      ("_position",   typ "cartouche => cartouche_position", Delimfix "_"),
    5.21      ("_type_constraint_", typ "'a",                    NoSyn),
    5.22      ("_context_const", typ "id_position => logic",     Delimfix "CONST _"),