# HG changeset patch # User wenzelm # Date 1540936746 -3600 # Node ID ab98f058f9dcc0af3abdf6bbcc9094b2607672f4 # Parent 9660bbf5ce7e85ee0cf9e185242d259d2504fdf0# Parent be8c70794375eee3a2b93bb2579fe0859295c8c4 merged diff -r 9660bbf5ce7e -r ab98f058f9dc NEWS --- a/NEWS Tue Oct 30 18:11:22 2018 +0100 +++ b/NEWS Tue Oct 30 22:59:06 2018 +0100 @@ -89,6 +89,22 @@ potentially with variations on ML syntax (existing ML_Env.SML_operations observes the official standard). +* GHC.read_source reads Haskell source text with antiquotations (only +the control-cartouche form). The default "cartouche" antiquotation +evaluates an ML expression of type string and inlines the result as +Haskell string literal. This allows to refer to Isabelle items robustly, +e.g. via Isabelle/ML antiquotations or library operations. For example: + +ML_command \ + GHC.read_source \<^context> \ + allConst, impConst, eqConst :: String + allConst = \\<^const_name>\Pure.all\\ + impConst = \\<^const_name>\Pure.imp\\ + eqConst = \\<^const_name>\Pure.eq\\ + \ + |> writeln +\ + *** System *** diff -r 9660bbf5ce7e -r ab98f058f9dc src/Pure/General/utf8.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Pure/General/utf8.ML Tue Oct 30 22:59:06 2018 +0100 @@ -0,0 +1,57 @@ +(* Title: Pure/General/utf8.ML + Author: Makarius + +Variations on UTF-8. +*) + +signature UTF8 = +sig + type codepoint = int + val decode_permissive: string -> codepoint list +end; + +structure UTF8: UTF8 = +struct + +type codepoint = int; + + +(* permissive UTF-8 decoding *) + +(*see also https://en.wikipedia.org/wiki/UTF-8#Description + overlong encodings enable byte-stuffing of low-ASCII*) + +local + +type state = codepoint list * codepoint * int; + +fun flush ((buf, code, rest): state) : state = + if code <> ~1 then + ((if rest = 0 andalso code <= 0x10FFFF then code else 0xFFFD) :: buf, ~1, 0) + else (buf, code, rest); + +fun init x n (state: state) : state = (#1 (flush state), Word8.toInt x, n); + +fun push x ((buf, code, rest): state) = + if rest <= 0 then init x ~1 (buf, code, rest) + else (buf, code * 64 + Word8.toInt x, rest - 1); + +fun append x ((buf, code, rest): state) : state = (Word8.toInt x :: buf, code, rest); + +fun decode (c, state) = + if c < 0w128 then state |> flush |> append c + else if Word8.andb (c, 0wxC0) = 0wx80 then state |> push (Word8.andb (c, 0wx3F)) + else if Word8.andb (c, 0wxE0) = 0wxC0 then state |> init (Word8.andb (c, 0wx1F)) 1 + else if Word8.andb (c, 0wxF0) = 0wxE0 then state |> init (Word8.andb (c, 0wx0F)) 2 + else if Word8.andb (c, 0wxF8) = 0wxF0 then state |> init (Word8.andb (c, 0wx07)) 3 + else state; + +in + +fun decode_permissive text = + Word8Vector.foldl decode ([], ~1, 0) (Byte.stringToBytes text) + |> flush |> #1 |> rev; + +end; + +end; diff -r 9660bbf5ce7e -r ab98f058f9dc src/Pure/ML/ml_syntax.ML --- a/src/Pure/ML/ml_syntax.ML Tue Oct 30 18:11:22 2018 +0100 +++ b/src/Pure/ML/ml_syntax.ML Tue Oct 30 22:59:06 2018 +0100 @@ -15,7 +15,8 @@ val print_pair: ('a -> string) -> ('b -> string) -> 'a * 'b -> string val print_list: ('a -> string) -> 'a list -> string val print_option: ('a -> string) -> 'a option -> string - val print_char: string -> string + val print_symbol_char: Symbol.symbol -> string + val print_symbol: Symbol.symbol -> string val print_string: string -> string val print_strings: string list -> string val print_properties: Properties.T -> string @@ -59,7 +60,7 @@ fun print_option f NONE = "NONE" | print_option f (SOME x) = "SOME (" ^ f x ^ ")"; -fun print_chr s = +fun print_symbol_char s = if Symbol.is_char s then (case ord s of 34 => "\\\"" @@ -77,12 +78,12 @@ else "\\" ^ string_of_int c) else error ("Bad character: " ^ quote s); -fun print_char s = - if Symbol.is_char s then print_chr s - else if Symbol.is_utf8 s then translate_string print_chr s +fun print_symbol s = + if Symbol.is_char s then print_symbol_char s + else if Symbol.is_utf8 s then translate_string print_symbol_char s else s; -val print_string = quote o implode o map print_char o Symbol.explode; +val print_string = quote o implode o map print_symbol o Symbol.explode; val print_strings = print_list print_string; val print_properties = print_list (print_pair print_string print_string); @@ -124,7 +125,7 @@ val body' = if length body <= max_len then body else take (Int.max (max_len, 0)) body @ ["..."]; - in Pretty.str (quote (implode (map print_char body'))) end; + in Pretty.str (quote (implode (map print_symbol body'))) end; val _ = ML_system_pp (fn depth => fn _ => fn str => diff -r 9660bbf5ce7e -r ab98f058f9dc src/Pure/PIDE/markup.ML --- a/src/Pure/PIDE/markup.ML Tue Oct 30 18:11:22 2018 +0100 +++ b/src/Pure/PIDE/markup.ML Tue Oct 30 22:59:06 2018 +0100 @@ -31,6 +31,7 @@ val language_prop: bool -> T val language_ML: bool -> T val language_SML: bool -> T + val language_haskell: bool -> T val language_document: bool -> T val language_antiquotation: T val language_text: bool -> T @@ -118,6 +119,7 @@ val antiquotedN: string val antiquoted: T val antiquoteN: string val antiquote: T val ML_antiquotationN: string + val haskell_antiquotationN: string val document_antiquotationN: string val document_antiquotation_optionN: string val paragraphN: string val paragraph: T @@ -302,6 +304,7 @@ val language_prop = language' {name = "prop", symbols = true, antiquotes = false}; val language_ML = language' {name = "ML", symbols = false, antiquotes = true}; val language_SML = language' {name = "SML", symbols = false, antiquotes = false}; +val language_haskell = language' {name = "Haskell", symbols = false, antiquotes = true}; val language_document = language' {name = "document", symbols = false, antiquotes = true}; val language_antiquotation = language {name = "antiquotation", symbols = true, antiquotes = false, delimited = true}; @@ -477,6 +480,7 @@ val (antiquoteN, antiquote) = markup_elem "antiquote"; val ML_antiquotationN = "ML_antiquotation"; +val haskell_antiquotationN = "Haskell_antiquotation"; val document_antiquotationN = "document_antiquotation"; val document_antiquotation_optionN = "document_antiquotation_option"; diff -r 9660bbf5ce7e -r ab98f058f9dc src/Pure/ROOT.ML --- a/src/Pure/ROOT.ML Tue Oct 30 18:11:22 2018 +0100 +++ b/src/Pure/ROOT.ML Tue Oct 30 22:59:06 2018 +0100 @@ -48,6 +48,7 @@ ML_file "General/properties.ML"; ML_file "General/output.ML"; ML_file "PIDE/markup.ML"; +ML_file "General/utf8.ML"; ML_file "General/scan.ML"; ML_file "General/source.ML"; ML_file "General/symbol.ML"; @@ -345,3 +346,4 @@ ML_file_no_debug "Tools/debugger.ML"; ML_file "Tools/named_theorems.ML"; ML_file "Tools/jedit.ML"; +ML_file "Tools/ghc.ML"; diff -r 9660bbf5ce7e -r ab98f058f9dc src/Pure/Thy/markdown.ML --- a/src/Pure/Thy/markdown.ML Tue Oct 30 18:11:22 2018 +0100 +++ b/src/Pure/Thy/markdown.ML Tue Oct 30 22:59:06 2018 +0100 @@ -84,7 +84,7 @@ (case bad_blanks source of [] => () | (c, pos) :: _ => - error ("Bad blank character " ^ quote (ML_Syntax.print_char c) ^ Position.here pos)); + error ("Bad blank character " ^ quote (ML_Syntax.print_symbol c) ^ Position.here pos)); val is_space = Symbol.is_space o Symbol_Pos.symbol; val is_empty = forall (fn Antiquote.Text ss => forall is_space ss | _ => false); diff -r 9660bbf5ce7e -r ab98f058f9dc src/Pure/Tools/ghc.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Pure/Tools/ghc.ML Tue Oct 30 22:59:06 2018 +0100 @@ -0,0 +1,127 @@ +(* Title: Pure/Tools/ghc.ML + Author: Makarius + +Support for GHC: Glasgow Haskell Compiler. +*) + +signature GHC = +sig + val print_codepoint: UTF8.codepoint -> string + val print_symbol: Symbol.symbol -> string + val print_string: string -> string + val antiquotation: binding -> 'a Token.context_parser -> + ({context: Proof.context, source: Token.src, argument: 'a} -> string) -> theory -> theory + val read_source: Proof.context -> Input.source -> string + val set_result: string -> Proof.context -> Proof.context +end; + +structure GHC: GHC = +struct + +(** string literals **) + +fun print_codepoint c = + (case c of + 34 => "\\\"" + | 39 => "\\'" + | 92 => "\\\\" + | 7 => "\\a" + | 8 => "\\b" + | 9 => "\\t" + | 10 => "\\n" + | 11 => "\\v" + | 12 => "\\f" + | 13 => "\\r" + | c => + if c >= 32 andalso c < 127 then chr c + else "\\" ^ string_of_int c ^ "\\&"); + +fun print_symbol sym = + (case Symbol.decode sym of + Symbol.Char s => print_codepoint (ord s) + | Symbol.UTF8 s => UTF8.decode_permissive s |> map print_codepoint |> implode + | Symbol.Sym s => "\\092<" ^ s ^ ">" + | Symbol.Control s => "\\092<^" ^ s ^ ">" + | _ => translate_string (print_codepoint o ord) sym); + +val print_string = quote o implode o map print_symbol o Symbol.explode; + + +(** antiquotations **) + +(* theory data *) + +structure Antiqs = Theory_Data +( + type T = (Token.src -> Proof.context -> string) Name_Space.table; + val empty : T = Name_Space.empty_table Markup.haskell_antiquotationN; + val extend = I; + fun merge tabs : T = Name_Space.merge_tables tabs; +); + +val get_antiquotations = Antiqs.get o Proof_Context.theory_of; + +fun antiquotation name scan body thy = + let + fun antiq src ctxt = + let val (x, ctxt') = Token.syntax scan src ctxt + in body {context = ctxt', source = src, argument = x} end; + in thy |> Antiqs.map (Name_Space.define (Context.Theory thy) true (name, antiq) #> #2) end; + + +(* read source and expand antiquotations *) + +val scan_antiq = + Antiquote.scan_control >> Antiquote.Control || + Scan.one (Symbol.not_eof o Symbol_Pos.symbol) >> (Antiquote.Text o Symbol_Pos.symbol); + +fun read_source ctxt source = + let + val _ = + Context_Position.report ctxt (Input.pos_of source) + (Markup.language_haskell (Input.is_delimited source)); + + val (input, _) = + Input.source_explode source + |> Scan.error (Scan.finite Symbol_Pos.stopper (Scan.repeat scan_antiq)); + + val _ = Context_Position.reports ctxt (Antiquote.antiq_reports input); + + fun expand antiq = + (case antiq of + Antiquote.Text s => s + | Antiquote.Control {name, body, ...} => + let + val src = Token.make_src name (if null body then [] else [Token.read_cartouche body]); + val (src', f) = Token.check_src ctxt get_antiquotations src; + in f src' ctxt end + | Antiquote.Antiq antiq => error ("Bad antiquotation " ^ Position.here (#1 (#range antiq)))); + in implode (map expand input) end; + + +(* concrete antiquotation: ML expression as Haskell string literal *) + +structure Local_Data = Proof_Data +( + type T = string option; + fun init _ = NONE; +); + +val set_result = Local_Data.put o SOME; + +fun the_result ctxt = + (case Local_Data.get ctxt of + SOME s => s + | NONE => raise Fail "No result"); + +val _ = + Theory.setup + (antiquotation \<^binding>\cartouche\ (Scan.lift Args.cartouche_input) + (fn {context = ctxt, argument, ...} => + ctxt |> Context.proof_map + (ML_Context.expression ("result", Position.thread_data ()) + "string" "Context.map_proof (GHC.set_result result)" + (ML_Lex.read_source argument)) + |> the_result |> print_string)); + +end; diff -r 9660bbf5ce7e -r ab98f058f9dc src/Tools/Code/code_haskell.ML --- a/src/Tools/Code/code_haskell.ML Tue Oct 30 18:11:22 2018 +0100 +++ b/src/Tools/Code/code_haskell.ML Tue Oct 30 22:59:06 2018 +0100 @@ -36,14 +36,9 @@ (** Haskell serializer **) val print_haskell_string = - let - fun char c = - let - val _ = if Symbol.is_ascii c then () - else error "non-ASCII byte in Haskell string literal"; - val s = ML_Syntax.print_char c; - in if s = "'" then "\\'" else s end; - in quote o translate_string char end; + quote o translate_string (fn c => + if Symbol.is_ascii c then GHC.print_codepoint (ord c) + else error "non-ASCII byte in Haskell string literal"); fun print_haskell_stmt class_syntax tyco_syntax const_syntax reserved deresolve deriving_show = diff -r 9660bbf5ce7e -r ab98f058f9dc src/Tools/Code/code_ml.ML --- a/src/Tools/Code/code_ml.ML Tue Oct 30 18:11:22 2018 +0100 +++ b/src/Tools/Code/code_ml.ML Tue Oct 30 22:59:06 2018 +0100 @@ -55,7 +55,7 @@ if c = "\\" then "\\092" (*produce strings suitable for both SML as well as Isabelle/ML*) else if Symbol.is_ascii c - then ML_Syntax.print_char c + then ML_Syntax.print_symbol_char c else error "non-ASCII byte in SML string literal"; val print_sml_string = quote o translate_string print_sml_char;