merged
authorwenzelm
Tue, 30 Oct 2018 22:59:06 +0100
changeset 69213 ab98f058f9dc
parent 69206 9660bbf5ce7e (current diff)
parent 69212 be8c70794375 (diff)
child 69214 74455459973d
merged
NEWS
--- a/NEWS	Tue Oct 30 18:11:22 2018 +0100
+++ b/NEWS	Tue Oct 30 22:59:06 2018 +0100
@@ -89,6 +89,22 @@
 potentially with variations on ML syntax (existing ML_Env.SML_operations
 observes the official standard).
 
+* GHC.read_source reads Haskell source text with antiquotations (only
+the control-cartouche form). The default "cartouche" antiquotation
+evaluates an ML expression of type string and inlines the result as
+Haskell string literal. This allows to refer to Isabelle items robustly,
+e.g. via Isabelle/ML antiquotations or library operations. For example:
+
+ML_command \<open>
+  GHC.read_source \<^context> \<open>
+    allConst, impConst, eqConst :: String
+    allConst = \<open>\<^const_name>\<open>Pure.all\<close>\<close>
+    impConst = \<open>\<^const_name>\<open>Pure.imp\<close>\<close>
+    eqConst = \<open>\<^const_name>\<open>Pure.eq\<close>\<close>
+  \<close>
+  |> writeln
+\<close>
+
 
 *** System ***
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/General/utf8.ML	Tue Oct 30 22:59:06 2018 +0100
@@ -0,0 +1,57 @@
+(*  Title:      Pure/General/utf8.ML
+    Author:     Makarius
+
+Variations on UTF-8.
+*)
+
+signature UTF8 =
+sig
+  type codepoint = int
+  val decode_permissive: string -> codepoint list
+end;
+
+structure UTF8: UTF8 =
+struct
+
+type codepoint = int;
+
+
+(* permissive UTF-8 decoding *)
+
+(*see also https://en.wikipedia.org/wiki/UTF-8#Description
+  overlong encodings enable byte-stuffing of low-ASCII*)
+
+local
+
+type state = codepoint list * codepoint * int;
+
+fun flush ((buf, code, rest): state) : state =
+  if code <> ~1 then
+    ((if rest = 0 andalso code <= 0x10FFFF then code else 0xFFFD) :: buf, ~1, 0)
+  else (buf, code, rest);
+
+fun init x n (state: state) : state = (#1 (flush state), Word8.toInt x, n);
+
+fun push x ((buf, code, rest): state) =
+  if rest <= 0 then init x ~1 (buf, code, rest)
+  else (buf, code * 64 + Word8.toInt x, rest - 1);
+
+fun append x ((buf, code, rest): state) : state = (Word8.toInt x :: buf, code, rest);
+
+fun decode (c, state) =
+  if c < 0w128 then state |> flush |> append c
+  else if Word8.andb (c, 0wxC0) = 0wx80 then state |> push (Word8.andb (c, 0wx3F))
+  else if Word8.andb (c, 0wxE0) = 0wxC0 then state |> init (Word8.andb (c, 0wx1F)) 1
+  else if Word8.andb (c, 0wxF0) = 0wxE0 then state |> init (Word8.andb (c, 0wx0F)) 2
+  else if Word8.andb (c, 0wxF8) = 0wxF0 then state |> init (Word8.andb (c, 0wx07)) 3
+  else state;
+
+in
+
+fun decode_permissive text =
+  Word8Vector.foldl decode ([], ~1, 0) (Byte.stringToBytes text)
+  |> flush |> #1 |> rev;
+
+end;
+
+end;
--- a/src/Pure/ML/ml_syntax.ML	Tue Oct 30 18:11:22 2018 +0100
+++ b/src/Pure/ML/ml_syntax.ML	Tue Oct 30 22:59:06 2018 +0100
@@ -15,7 +15,8 @@
   val print_pair: ('a -> string) -> ('b -> string) -> 'a * 'b -> string
   val print_list: ('a -> string) -> 'a list -> string
   val print_option: ('a -> string) -> 'a option -> string
-  val print_char: string -> string
+  val print_symbol_char: Symbol.symbol -> string
+  val print_symbol: Symbol.symbol -> string
   val print_string: string -> string
   val print_strings: string list -> string
   val print_properties: Properties.T -> string
@@ -59,7 +60,7 @@
 fun print_option f NONE = "NONE"
   | print_option f (SOME x) = "SOME (" ^ f x ^ ")";
 
-fun print_chr s =
+fun print_symbol_char s =
   if Symbol.is_char s then
     (case ord s of
       34 => "\\\""
@@ -77,12 +78,12 @@
         else "\\" ^ string_of_int c)
   else error ("Bad character: " ^ quote s);
 
-fun print_char s =
-  if Symbol.is_char s then print_chr s
-  else if Symbol.is_utf8 s then translate_string print_chr s
+fun print_symbol s =
+  if Symbol.is_char s then print_symbol_char s
+  else if Symbol.is_utf8 s then translate_string print_symbol_char s
   else s;
 
-val print_string = quote o implode o map print_char o Symbol.explode;
+val print_string = quote o implode o map print_symbol o Symbol.explode;
 val print_strings = print_list print_string;
 
 val print_properties = print_list (print_pair print_string print_string);
@@ -124,7 +125,7 @@
     val body' =
       if length body <= max_len then body
       else take (Int.max (max_len, 0)) body @ ["..."];
-  in Pretty.str (quote (implode (map print_char body'))) end;
+  in Pretty.str (quote (implode (map print_symbol body'))) end;
 
 val _ =
   ML_system_pp (fn depth => fn _ => fn str =>
--- a/src/Pure/PIDE/markup.ML	Tue Oct 30 18:11:22 2018 +0100
+++ b/src/Pure/PIDE/markup.ML	Tue Oct 30 22:59:06 2018 +0100
@@ -31,6 +31,7 @@
   val language_prop: bool -> T
   val language_ML: bool -> T
   val language_SML: bool -> T
+  val language_haskell: bool -> T
   val language_document: bool -> T
   val language_antiquotation: T
   val language_text: bool -> T
@@ -118,6 +119,7 @@
   val antiquotedN: string val antiquoted: T
   val antiquoteN: string val antiquote: T
   val ML_antiquotationN: string
+  val haskell_antiquotationN: string
   val document_antiquotationN: string
   val document_antiquotation_optionN: string
   val paragraphN: string val paragraph: T
@@ -302,6 +304,7 @@
 val language_prop = language' {name = "prop", symbols = true, antiquotes = false};
 val language_ML = language' {name = "ML", symbols = false, antiquotes = true};
 val language_SML = language' {name = "SML", symbols = false, antiquotes = false};
+val language_haskell = language' {name = "Haskell", symbols = false, antiquotes = true};
 val language_document = language' {name = "document", symbols = false, antiquotes = true};
 val language_antiquotation =
   language {name = "antiquotation", symbols = true, antiquotes = false, delimited = true};
@@ -477,6 +480,7 @@
 val (antiquoteN, antiquote) = markup_elem "antiquote";
 
 val ML_antiquotationN = "ML_antiquotation";
+val haskell_antiquotationN = "Haskell_antiquotation";
 val document_antiquotationN = "document_antiquotation";
 val document_antiquotation_optionN = "document_antiquotation_option";
 
--- a/src/Pure/ROOT.ML	Tue Oct 30 18:11:22 2018 +0100
+++ b/src/Pure/ROOT.ML	Tue Oct 30 22:59:06 2018 +0100
@@ -48,6 +48,7 @@
 ML_file "General/properties.ML";
 ML_file "General/output.ML";
 ML_file "PIDE/markup.ML";
+ML_file "General/utf8.ML";
 ML_file "General/scan.ML";
 ML_file "General/source.ML";
 ML_file "General/symbol.ML";
@@ -345,3 +346,4 @@
 ML_file_no_debug "Tools/debugger.ML";
 ML_file "Tools/named_theorems.ML";
 ML_file "Tools/jedit.ML";
+ML_file "Tools/ghc.ML";
--- a/src/Pure/Thy/markdown.ML	Tue Oct 30 18:11:22 2018 +0100
+++ b/src/Pure/Thy/markdown.ML	Tue Oct 30 22:59:06 2018 +0100
@@ -84,7 +84,7 @@
   (case bad_blanks source of
     [] => ()
   | (c, pos) :: _ =>
-      error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos));
+      error ("Bad blank character " ^ quote (ML_Syntax.print_symbol c) ^ Position.here pos));
 
 val is_space = Symbol.is_space o Symbol_Pos.symbol;
 val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/Tools/ghc.ML	Tue Oct 30 22:59:06 2018 +0100
@@ -0,0 +1,127 @@
+(*  Title:      Pure/Tools/ghc.ML
+    Author:     Makarius
+
+Support for GHC: Glasgow Haskell Compiler.
+*)
+
+signature GHC =
+sig
+  val print_codepoint: UTF8.codepoint -> string
+  val print_symbol: Symbol.symbol -> string
+  val print_string: string -> string
+  val antiquotation: binding -> 'a Token.context_parser ->
+    ({context: Proof.context, source: Token.src, argument: 'a} -> string) -> theory -> theory
+  val read_source: Proof.context -> Input.source -> string
+  val set_result: string -> Proof.context -> Proof.context
+end;
+
+structure GHC: GHC =
+struct
+
+(** string literals **)
+
+fun print_codepoint c =
+  (case c of
+    34 => "\\\""
+  | 39 => "\\'"
+  | 92 => "\\\\"
+  | 7 => "\\a"
+  | 8 => "\\b"
+  | 9 => "\\t"
+  | 10 => "\\n"
+  | 11 => "\\v"
+  | 12 => "\\f"
+  | 13 => "\\r"
+  | c =>
+      if c >= 32 andalso c < 127 then chr c
+      else "\\" ^ string_of_int c ^ "\\&");
+
+fun print_symbol sym =
+  (case Symbol.decode sym of
+    Symbol.Char s => print_codepoint (ord s)
+  | Symbol.UTF8 s => UTF8.decode_permissive s |> map print_codepoint |> implode
+  | Symbol.Sym s => "\\092<" ^ s ^ ">"
+  | Symbol.Control s => "\\092<^" ^ s ^ ">"
+  | _ => translate_string (print_codepoint o ord) sym);
+
+val print_string = quote o implode o map print_symbol o Symbol.explode;
+
+
+(** antiquotations **)
+
+(* theory data *)
+
+structure Antiqs = Theory_Data
+(
+  type T = (Token.src -> Proof.context -> string) Name_Space.table;
+  val empty : T = Name_Space.empty_table Markup.haskell_antiquotationN;
+  val extend = I;
+  fun merge tabs : T = Name_Space.merge_tables tabs;
+);
+
+val get_antiquotations = Antiqs.get o Proof_Context.theory_of;
+
+fun antiquotation name scan body thy =
+  let
+    fun antiq src ctxt =
+      let val (x, ctxt') = Token.syntax scan src ctxt
+      in body {context = ctxt', source = src, argument = x} end;
+  in thy |> Antiqs.map (Name_Space.define (Context.Theory thy) true (name, antiq) #> #2) end;
+
+
+(* read source and expand antiquotations *)
+
+val scan_antiq =
+  Antiquote.scan_control >> Antiquote.Control ||
+  Scan.one (Symbol.not_eof o Symbol_Pos.symbol) >> (Antiquote.Text o Symbol_Pos.symbol);
+
+fun read_source ctxt source =
+  let
+    val _ =
+      Context_Position.report ctxt (Input.pos_of source)
+        (Markup.language_haskell (Input.is_delimited source));
+
+    val (input, _) =
+      Input.source_explode source
+      |> Scan.error (Scan.finite Symbol_Pos.stopper (Scan.repeat scan_antiq));
+
+    val _ = Context_Position.reports ctxt (Antiquote.antiq_reports input);
+
+    fun expand antiq =
+      (case antiq of
+        Antiquote.Text s => s
+      | Antiquote.Control {name, body, ...} =>
+          let
+            val src = Token.make_src name (if null body then [] else [Token.read_cartouche body]);
+            val (src', f) = Token.check_src ctxt get_antiquotations src;
+          in f src' ctxt end
+      | Antiquote.Antiq antiq => error ("Bad antiquotation " ^ Position.here (#1 (#range antiq))));
+  in implode (map expand input) end;
+
+
+(* concrete antiquotation: ML expression as Haskell string literal *)
+
+structure Local_Data = Proof_Data
+(
+  type T = string option;
+  fun init _ = NONE;
+);
+
+val set_result = Local_Data.put o SOME;
+
+fun the_result ctxt =
+  (case Local_Data.get ctxt of
+    SOME s => s
+  | NONE => raise Fail "No result");
+
+val _ =
+  Theory.setup
+    (antiquotation \<^binding>\<open>cartouche\<close> (Scan.lift Args.cartouche_input)
+      (fn {context = ctxt, argument, ...} =>
+        ctxt |> Context.proof_map
+          (ML_Context.expression ("result", Position.thread_data ())
+            "string" "Context.map_proof (GHC.set_result result)"
+            (ML_Lex.read_source argument))
+        |> the_result |> print_string));
+
+end;
--- a/src/Tools/Code/code_haskell.ML	Tue Oct 30 18:11:22 2018 +0100
+++ b/src/Tools/Code/code_haskell.ML	Tue Oct 30 22:59:06 2018 +0100
@@ -36,14 +36,9 @@
 (** Haskell serializer **)
 
 val print_haskell_string =
-  let
-    fun char c =
-      let
-        val _ = if Symbol.is_ascii c then ()
-          else error "non-ASCII byte in Haskell string literal";
-        val s = ML_Syntax.print_char c;
-      in if s = "'" then "\\'" else s end;
-  in quote o translate_string char end;
+  quote o translate_string (fn c =>
+    if Symbol.is_ascii c then GHC.print_codepoint (ord c)
+    else error "non-ASCII byte in Haskell string literal");
 
 fun print_haskell_stmt class_syntax tyco_syntax const_syntax
     reserved deresolve deriving_show =
--- a/src/Tools/Code/code_ml.ML	Tue Oct 30 18:11:22 2018 +0100
+++ b/src/Tools/Code/code_ml.ML	Tue Oct 30 22:59:06 2018 +0100
@@ -55,7 +55,7 @@
   if c = "\\"
   then "\\092" (*produce strings suitable for both SML as well as Isabelle/ML*)
   else if Symbol.is_ascii c
-  then ML_Syntax.print_char c
+  then ML_Syntax.print_symbol_char c
   else error "non-ASCII byte in SML string literal";
 
 val print_sml_string = quote o translate_string print_sml_char;