--- a/NEWS Sat Dec 30 14:15:44 2017 +0100
+++ b/NEWS Sat Dec 30 20:04:05 2017 +0100
@@ -81,6 +81,12 @@
* Action "isabelle.draft" is similar to "isabelle.preview", but shows a
plain-text document draft.
+* When loading text files, the Isabelle symbols encoding UTF-8-Isabelle
+is only used if there is no conflict with existing Unicode sequences in
+the file. Otherwise, the fallback encoding is plain UTF-8 and Isabelle
+symbols remain in literal \<symbol> form. This avoids accidental loss of
+Unicode content when saving the file.
+
*** Isabelle/VSCode Prover IDE ***
--- a/src/Doc/JEdit/JEdit.thy Sat Dec 30 14:15:44 2017 +0100
+++ b/src/Doc/JEdit/JEdit.thy Sat Dec 30 20:04:05 2017 +0100
@@ -504,16 +504,26 @@
\<close>
paragraph \<open>Encoding.\<close>
+
text \<open>Technically, the Unicode interpretation of Isabelle symbols is an
\<^emph>\<open>encoding\<close> called \<^verbatim>\<open>UTF-8-Isabelle\<close> in jEdit (\<^emph>\<open>not\<close> in the underlying
JVM). It is provided by the Isabelle Base plugin and enabled by default for
- all source files in Isabelle/jEdit. Sometimes such defaults are reset
- accidentally, or malformed UTF-8 sequences in the text force jEdit to fall
- back on a different encoding like \<^verbatim>\<open>ISO-8859-15\<close>. In that case, verbatim
- ``\<^verbatim>\<open>\<alpha>\<close>'' will be shown in the text buffer instead of its Unicode rendering
- ``\<open>\<alpha>\<close>''. The jEdit menu operation \<^emph>\<open>File~/ Reload with Encoding~/
- UTF-8-Isabelle\<close> helps to resolve such problems (after repairing malformed
- parts of the text). \<close>
+ all source files in Isabelle/jEdit.
+
+ Sometimes such defaults are reset accidentally, or malformed UTF-8 sequences
+ in the text force jEdit to fall back on a different encoding like
+ \<^verbatim>\<open>ISO-8859-15\<close>. In that case, verbatim ``\<^verbatim>\<open>\<alpha>\<close>'' will be shown in the text
+ buffer instead of its Unicode rendering ``\<open>\<alpha>\<close>''. The jEdit menu operation
+ \<^emph>\<open>File~/ Reload with Encoding~/ UTF-8-Isabelle\<close> helps to resolve such
+ problems (after repairing malformed parts of the text).
+
+ If the loaded text already contains Unicode sequences that are in conflict
+ with the Isabelle symbol encoding, the fallback-encoding UTF-8 is used and
+ Isabelle symbols remain in literal \<^verbatim>\<open>\<symbol>\<close> form. The jEdit menu
+ operation \<^emph>\<open>Utilities~/ Buffer Options~/ Character encoding\<close> allows to
+ enforce the UTF-8-Isabelle, but this will also change original Unicode
+ text into Isabelle symbols when saving the file!
+\<close>
paragraph \<open>Font.\<close>
text \<open>Correct rendering via Unicode requires a font that contains glyphs for
--- a/src/Pure/General/symbol.scala Sat Dec 30 14:15:44 2017 +0100
+++ b/src/Pure/General/symbol.scala Sat Dec 30 20:04:05 2017 +0100
@@ -504,6 +504,7 @@
def abbrevs: Multi_Map[Symbol, String] = symbols.abbrevs
def codes: List[(Symbol, Int)] = symbols.codes
+ lazy val is_code: Int => Boolean = codes.map(_._2).toSet
def decode(text: String): String = symbols.decode(text)
def encode(text: String): String = symbols.encode(text)
--- a/src/Tools/jEdit/src-base/isabelle_encoding.scala Sat Dec 30 14:15:44 2017 +0100
+++ b/src/Tools/jEdit/src-base/isabelle_encoding.scala Sat Dec 30 20:04:05 2017 +0100
@@ -11,7 +11,7 @@
import org.gjt.sp.jedit.io.Encoding
-import java.nio.charset.{Charset, CodingErrorAction}
+import java.nio.charset.{Charset, CodingErrorAction, CharacterCodingException}
import java.io.{InputStream, OutputStream, Reader, Writer, InputStreamReader, OutputStreamWriter,
CharArrayReader, ByteArrayOutputStream}
@@ -22,20 +22,25 @@
{
private val BUFSIZE = 32768
- private def text_reader(in: InputStream, codec: Codec): Reader =
+ private def text_reader(in: InputStream, codec: Codec, strict: Boolean): Reader =
{
- val source = new BufferedSource(in)(codec)
- new CharArrayReader(Symbol.decode(source.mkString).toArray)
+ val source = (new BufferedSource(in)(codec)).mkString
+
+ if (strict && Codepoint.iterator(source).exists(Symbol.is_code))
+ throw new CharacterCodingException()
+
+ new CharArrayReader(Symbol.decode(source).toArray)
}
- override def getTextReader(in: InputStream): Reader = text_reader(in, UTF8.codec())
+ override def getTextReader(in: InputStream): Reader =
+ text_reader(in, UTF8.codec(), true)
override def getPermissiveTextReader(in: InputStream): Reader =
{
val codec = UTF8.codec()
codec.onMalformedInput(CodingErrorAction.REPLACE)
codec.onUnmappableCharacter(CodingErrorAction.REPLACE)
- text_reader(in, codec)
+ text_reader(in, codec, false)
}
override def getTextWriter(out: OutputStream): Writer =