merged
authorwenzelm
Tue Oct 30 22:59:06 2018 +0100 (6 months ago)
changeset 69213ab98f058f9dc
parent 69206 9660bbf5ce7e
parent 69212 be8c70794375
child 69214 74455459973d
merged
NEWS
     1.1 --- a/NEWS	Tue Oct 30 18:11:22 2018 +0100
     1.2 +++ b/NEWS	Tue Oct 30 22:59:06 2018 +0100
     1.3 @@ -89,6 +89,22 @@
     1.4  potentially with variations on ML syntax (existing ML_Env.SML_operations
     1.5  observes the official standard).
     1.6  
     1.7 +* GHC.read_source reads Haskell source text with antiquotations (only
     1.8 +the control-cartouche form). The default "cartouche" antiquotation
     1.9 +evaluates an ML expression of type string and inlines the result as
    1.10 +Haskell string literal. This allows to refer to Isabelle items robustly,
    1.11 +e.g. via Isabelle/ML antiquotations or library operations. For example:
    1.12 +
    1.13 +ML_command \<open>
    1.14 +  GHC.read_source \<^context> \<open>
    1.15 +    allConst, impConst, eqConst :: String
    1.16 +    allConst = \<open>\<^const_name>\<open>Pure.all\<close>\<close>
    1.17 +    impConst = \<open>\<^const_name>\<open>Pure.imp\<close>\<close>
    1.18 +    eqConst = \<open>\<^const_name>\<open>Pure.eq\<close>\<close>
    1.19 +  \<close>
    1.20 +  |> writeln
    1.21 +\<close>
    1.22 +
    1.23  
    1.24  *** System ***
    1.25  
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/src/Pure/General/utf8.ML	Tue Oct 30 22:59:06 2018 +0100
     2.3 @@ -0,0 +1,57 @@
     2.4 +(*  Title:      Pure/General/utf8.ML
     2.5 +    Author:     Makarius
     2.6 +
     2.7 +Variations on UTF-8.
     2.8 +*)
     2.9 +
    2.10 +signature UTF8 =
    2.11 +sig
    2.12 +  type codepoint = int
    2.13 +  val decode_permissive: string -> codepoint list
    2.14 +end;
    2.15 +
    2.16 +structure UTF8: UTF8 =
    2.17 +struct
    2.18 +
    2.19 +type codepoint = int;
    2.20 +
    2.21 +
    2.22 +(* permissive UTF-8 decoding *)
    2.23 +
    2.24 +(*see also https://en.wikipedia.org/wiki/UTF-8#Description
    2.25 +  overlong encodings enable byte-stuffing of low-ASCII*)
    2.26 +
    2.27 +local
    2.28 +
    2.29 +type state = codepoint list * codepoint * int;
    2.30 +
    2.31 +fun flush ((buf, code, rest): state) : state =
    2.32 +  if code <> ~1 then
    2.33 +    ((if rest = 0 andalso code <= 0x10FFFF then code else 0xFFFD) :: buf, ~1, 0)
    2.34 +  else (buf, code, rest);
    2.35 +
    2.36 +fun init x n (state: state) : state = (#1 (flush state), Word8.toInt x, n);
    2.37 +
    2.38 +fun push x ((buf, code, rest): state) =
    2.39 +  if rest <= 0 then init x ~1 (buf, code, rest)
    2.40 +  else (buf, code * 64 + Word8.toInt x, rest - 1);
    2.41 +
    2.42 +fun append x ((buf, code, rest): state) : state = (Word8.toInt x :: buf, code, rest);
    2.43 +
    2.44 +fun decode (c, state) =
    2.45 +  if c < 0w128 then state |> flush |> append c
    2.46 +  else if Word8.andb (c, 0wxC0) = 0wx80 then state |> push (Word8.andb (c, 0wx3F))
    2.47 +  else if Word8.andb (c, 0wxE0) = 0wxC0 then state |> init (Word8.andb (c, 0wx1F)) 1
    2.48 +  else if Word8.andb (c, 0wxF0) = 0wxE0 then state |> init (Word8.andb (c, 0wx0F)) 2
    2.49 +  else if Word8.andb (c, 0wxF8) = 0wxF0 then state |> init (Word8.andb (c, 0wx07)) 3
    2.50 +  else state;
    2.51 +
    2.52 +in
    2.53 +
    2.54 +fun decode_permissive text =
    2.55 +  Word8Vector.foldl decode ([], ~1, 0) (Byte.stringToBytes text)
    2.56 +  |> flush |> #1 |> rev;
    2.57 +
    2.58 +end;
    2.59 +
    2.60 +end;
     3.1 --- a/src/Pure/ML/ml_syntax.ML	Tue Oct 30 18:11:22 2018 +0100
     3.2 +++ b/src/Pure/ML/ml_syntax.ML	Tue Oct 30 22:59:06 2018 +0100
     3.3 @@ -15,7 +15,8 @@
     3.4    val print_pair: ('a -> string) -> ('b -> string) -> 'a * 'b -> string
     3.5    val print_list: ('a -> string) -> 'a list -> string
     3.6    val print_option: ('a -> string) -> 'a option -> string
     3.7 -  val print_char: string -> string
     3.8 +  val print_symbol_char: Symbol.symbol -> string
     3.9 +  val print_symbol: Symbol.symbol -> string
    3.10    val print_string: string -> string
    3.11    val print_strings: string list -> string
    3.12    val print_properties: Properties.T -> string
    3.13 @@ -59,7 +60,7 @@
    3.14  fun print_option f NONE = "NONE"
    3.15    | print_option f (SOME x) = "SOME (" ^ f x ^ ")";
    3.16  
    3.17 -fun print_chr s =
    3.18 +fun print_symbol_char s =
    3.19    if Symbol.is_char s then
    3.20      (case ord s of
    3.21        34 => "\\\""
    3.22 @@ -77,12 +78,12 @@
    3.23          else "\\" ^ string_of_int c)
    3.24    else error ("Bad character: " ^ quote s);
    3.25  
    3.26 -fun print_char s =
    3.27 -  if Symbol.is_char s then print_chr s
    3.28 -  else if Symbol.is_utf8 s then translate_string print_chr s
    3.29 +fun print_symbol s =
    3.30 +  if Symbol.is_char s then print_symbol_char s
    3.31 +  else if Symbol.is_utf8 s then translate_string print_symbol_char s
    3.32    else s;
    3.33  
    3.34 -val print_string = quote o implode o map print_char o Symbol.explode;
    3.35 +val print_string = quote o implode o map print_symbol o Symbol.explode;
    3.36  val print_strings = print_list print_string;
    3.37  
    3.38  val print_properties = print_list (print_pair print_string print_string);
    3.39 @@ -124,7 +125,7 @@
    3.40      val body' =
    3.41        if length body <= max_len then body
    3.42        else take (Int.max (max_len, 0)) body @ ["..."];
    3.43 -  in Pretty.str (quote (implode (map print_char body'))) end;
    3.44 +  in Pretty.str (quote (implode (map print_symbol body'))) end;
    3.45  
    3.46  val _ =
    3.47    ML_system_pp (fn depth => fn _ => fn str =>
     4.1 --- a/src/Pure/PIDE/markup.ML	Tue Oct 30 18:11:22 2018 +0100
     4.2 +++ b/src/Pure/PIDE/markup.ML	Tue Oct 30 22:59:06 2018 +0100
     4.3 @@ -31,6 +31,7 @@
     4.4    val language_prop: bool -> T
     4.5    val language_ML: bool -> T
     4.6    val language_SML: bool -> T
     4.7 +  val language_haskell: bool -> T
     4.8    val language_document: bool -> T
     4.9    val language_antiquotation: T
    4.10    val language_text: bool -> T
    4.11 @@ -118,6 +119,7 @@
    4.12    val antiquotedN: string val antiquoted: T
    4.13    val antiquoteN: string val antiquote: T
    4.14    val ML_antiquotationN: string
    4.15 +  val haskell_antiquotationN: string
    4.16    val document_antiquotationN: string
    4.17    val document_antiquotation_optionN: string
    4.18    val paragraphN: string val paragraph: T
    4.19 @@ -302,6 +304,7 @@
    4.20  val language_prop = language' {name = "prop", symbols = true, antiquotes = false};
    4.21  val language_ML = language' {name = "ML", symbols = false, antiquotes = true};
    4.22  val language_SML = language' {name = "SML", symbols = false, antiquotes = false};
    4.23 +val language_haskell = language' {name = "Haskell", symbols = false, antiquotes = true};
    4.24  val language_document = language' {name = "document", symbols = false, antiquotes = true};
    4.25  val language_antiquotation =
    4.26    language {name = "antiquotation", symbols = true, antiquotes = false, delimited = true};
    4.27 @@ -477,6 +480,7 @@
    4.28  val (antiquoteN, antiquote) = markup_elem "antiquote";
    4.29  
    4.30  val ML_antiquotationN = "ML_antiquotation";
    4.31 +val haskell_antiquotationN = "Haskell_antiquotation";
    4.32  val document_antiquotationN = "document_antiquotation";
    4.33  val document_antiquotation_optionN = "document_antiquotation_option";
    4.34  
     5.1 --- a/src/Pure/ROOT.ML	Tue Oct 30 18:11:22 2018 +0100
     5.2 +++ b/src/Pure/ROOT.ML	Tue Oct 30 22:59:06 2018 +0100
     5.3 @@ -48,6 +48,7 @@
     5.4  ML_file "General/properties.ML";
     5.5  ML_file "General/output.ML";
     5.6  ML_file "PIDE/markup.ML";
     5.7 +ML_file "General/utf8.ML";
     5.8  ML_file "General/scan.ML";
     5.9  ML_file "General/source.ML";
    5.10  ML_file "General/symbol.ML";
    5.11 @@ -345,3 +346,4 @@
    5.12  ML_file_no_debug "Tools/debugger.ML";
    5.13  ML_file "Tools/named_theorems.ML";
    5.14  ML_file "Tools/jedit.ML";
    5.15 +ML_file "Tools/ghc.ML";
     6.1 --- a/src/Pure/Thy/markdown.ML	Tue Oct 30 18:11:22 2018 +0100
     6.2 +++ b/src/Pure/Thy/markdown.ML	Tue Oct 30 22:59:06 2018 +0100
     6.3 @@ -84,7 +84,7 @@
     6.4    (case bad_blanks source of
     6.5      [] => ()
     6.6    | (c, pos) :: _ =>
     6.7 -      error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos));
     6.8 +      error ("Bad blank character " ^ quote (ML_Syntax.print_symbol c) ^ Position.here pos));
     6.9  
    6.10  val is_space = Symbol.is_space o Symbol_Pos.symbol;
    6.11  val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false);
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/src/Pure/Tools/ghc.ML	Tue Oct 30 22:59:06 2018 +0100
     7.3 @@ -0,0 +1,127 @@
     7.4 +(*  Title:      Pure/Tools/ghc.ML
     7.5 +    Author:     Makarius
     7.6 +
     7.7 +Support for GHC: Glasgow Haskell Compiler.
     7.8 +*)
     7.9 +
    7.10 +signature GHC =
    7.11 +sig
    7.12 +  val print_codepoint: UTF8.codepoint -> string
    7.13 +  val print_symbol: Symbol.symbol -> string
    7.14 +  val print_string: string -> string
    7.15 +  val antiquotation: binding -> 'a Token.context_parser ->
    7.16 +    ({context: Proof.context, source: Token.src, argument: 'a} -> string) -> theory -> theory
    7.17 +  val read_source: Proof.context -> Input.source -> string
    7.18 +  val set_result: string -> Proof.context -> Proof.context
    7.19 +end;
    7.20 +
    7.21 +structure GHC: GHC =
    7.22 +struct
    7.23 +
    7.24 +(** string literals **)
    7.25 +
    7.26 +fun print_codepoint c =
    7.27 +  (case c of
    7.28 +    34 => "\\\""
    7.29 +  | 39 => "\\'"
    7.30 +  | 92 => "\\\\"
    7.31 +  | 7 => "\\a"
    7.32 +  | 8 => "\\b"
    7.33 +  | 9 => "\\t"
    7.34 +  | 10 => "\\n"
    7.35 +  | 11 => "\\v"
    7.36 +  | 12 => "\\f"
    7.37 +  | 13 => "\\r"
    7.38 +  | c =>
    7.39 +      if c >= 32 andalso c < 127 then chr c
    7.40 +      else "\\" ^ string_of_int c ^ "\\&");
    7.41 +
    7.42 +fun print_symbol sym =
    7.43 +  (case Symbol.decode sym of
    7.44 +    Symbol.Char s => print_codepoint (ord s)
    7.45 +  | Symbol.UTF8 s => UTF8.decode_permissive s |> map print_codepoint |> implode
    7.46 +  | Symbol.Sym s => "\\092<" ^ s ^ ">"
    7.47 +  | Symbol.Control s => "\\092<^" ^ s ^ ">"
    7.48 +  | _ => translate_string (print_codepoint o ord) sym);
    7.49 +
    7.50 +val print_string = quote o implode o map print_symbol o Symbol.explode;
    7.51 +
    7.52 +
    7.53 +(** antiquotations **)
    7.54 +
    7.55 +(* theory data *)
    7.56 +
    7.57 +structure Antiqs = Theory_Data
    7.58 +(
    7.59 +  type T = (Token.src -> Proof.context -> string) Name_Space.table;
    7.60 +  val empty : T = Name_Space.empty_table Markup.haskell_antiquotationN;
    7.61 +  val extend = I;
    7.62 +  fun merge tabs : T = Name_Space.merge_tables tabs;
    7.63 +);
    7.64 +
    7.65 +val get_antiquotations = Antiqs.get o Proof_Context.theory_of;
    7.66 +
    7.67 +fun antiquotation name scan body thy =
    7.68 +  let
    7.69 +    fun antiq src ctxt =
    7.70 +      let val (x, ctxt') = Token.syntax scan src ctxt
    7.71 +      in body {context = ctxt', source = src, argument = x} end;
    7.72 +  in thy |> Antiqs.map (Name_Space.define (Context.Theory thy) true (name, antiq) #> #2) end;
    7.73 +
    7.74 +
    7.75 +(* read source and expand antiquotations *)
    7.76 +
    7.77 +val scan_antiq =
    7.78 +  Antiquote.scan_control >> Antiquote.Control ||
    7.79 +  Scan.one (Symbol.not_eof o Symbol_Pos.symbol) >> (Antiquote.Text o Symbol_Pos.symbol);
    7.80 +
    7.81 +fun read_source ctxt source =
    7.82 +  let
    7.83 +    val _ =
    7.84 +      Context_Position.report ctxt (Input.pos_of source)
    7.85 +        (Markup.language_haskell (Input.is_delimited source));
    7.86 +
    7.87 +    val (input, _) =
    7.88 +      Input.source_explode source
    7.89 +      |> Scan.error (Scan.finite Symbol_Pos.stopper (Scan.repeat scan_antiq));
    7.90 +
    7.91 +    val _ = Context_Position.reports ctxt (Antiquote.antiq_reports input);
    7.92 +
    7.93 +    fun expand antiq =
    7.94 +      (case antiq of
    7.95 +        Antiquote.Text s => s
    7.96 +      | Antiquote.Control {name, body, ...} =>
    7.97 +          let
    7.98 +            val src = Token.make_src name (if null body then [] else [Token.read_cartouche body]);
    7.99 +            val (src', f) = Token.check_src ctxt get_antiquotations src;
   7.100 +          in f src' ctxt end
   7.101 +      | Antiquote.Antiq antiq => error ("Bad antiquotation " ^ Position.here (#1 (#range antiq))));
   7.102 +  in implode (map expand input) end;
   7.103 +
   7.104 +
   7.105 +(* concrete antiquotation: ML expression as Haskell string literal *)
   7.106 +
   7.107 +structure Local_Data = Proof_Data
   7.108 +(
   7.109 +  type T = string option;
   7.110 +  fun init _ = NONE;
   7.111 +);
   7.112 +
   7.113 +val set_result = Local_Data.put o SOME;
   7.114 +
   7.115 +fun the_result ctxt =
   7.116 +  (case Local_Data.get ctxt of
   7.117 +    SOME s => s
   7.118 +  | NONE => raise Fail "No result");
   7.119 +
   7.120 +val _ =
   7.121 +  Theory.setup
   7.122 +    (antiquotation \<^binding>\<open>cartouche\<close> (Scan.lift Args.cartouche_input)
   7.123 +      (fn {context = ctxt, argument, ...} =>
   7.124 +        ctxt |> Context.proof_map
   7.125 +          (ML_Context.expression ("result", Position.thread_data ())
   7.126 +            "string" "Context.map_proof (GHC.set_result result)"
   7.127 +            (ML_Lex.read_source argument))
   7.128 +        |> the_result |> print_string));
   7.129 +
   7.130 +end;
     8.1 --- a/src/Tools/Code/code_haskell.ML	Tue Oct 30 18:11:22 2018 +0100
     8.2 +++ b/src/Tools/Code/code_haskell.ML	Tue Oct 30 22:59:06 2018 +0100
     8.3 @@ -36,14 +36,9 @@
     8.4  (** Haskell serializer **)
     8.5  
     8.6  val print_haskell_string =
     8.7 -  let
     8.8 -    fun char c =
     8.9 -      let
    8.10 -        val _ = if Symbol.is_ascii c then ()
    8.11 -          else error "non-ASCII byte in Haskell string literal";
    8.12 -        val s = ML_Syntax.print_char c;
    8.13 -      in if s = "'" then "\\'" else s end;
    8.14 -  in quote o translate_string char end;
    8.15 +  quote o translate_string (fn c =>
    8.16 +    if Symbol.is_ascii c then GHC.print_codepoint (ord c)
    8.17 +    else error "non-ASCII byte in Haskell string literal");
    8.18  
    8.19  fun print_haskell_stmt class_syntax tyco_syntax const_syntax
    8.20      reserved deresolve deriving_show =
     9.1 --- a/src/Tools/Code/code_ml.ML	Tue Oct 30 18:11:22 2018 +0100
     9.2 +++ b/src/Tools/Code/code_ml.ML	Tue Oct 30 22:59:06 2018 +0100
     9.3 @@ -55,7 +55,7 @@
     9.4    if c = "\\"
     9.5    then "\\092" (*produce strings suitable for both SML as well as Isabelle/ML*)
     9.6    else if Symbol.is_ascii c
     9.7 -  then ML_Syntax.print_char c
     9.8 +  then ML_Syntax.print_symbol_char c
     9.9    else error "non-ASCII byte in SML string literal";
    9.10  
    9.11  val print_sml_string = quote o translate_string print_sml_char;