more robust treatment of UTF8 in raw byte sources;
authorwenzelm
Wed, 25 Oct 2017 14:39:22 +0200
changeset 66918 ec2b50aeb0dd
parent 66917 fcf84cd6c94f
child 66919 1f93e376aeb6
more robust treatment of UTF8 in raw byte sources;
src/Pure/General/scan.scala
src/Pure/PIDE/resources.scala
src/Pure/Thy/thy_header.scala
--- a/src/Pure/General/scan.scala	Wed Oct 25 14:36:29 2017 +0200
+++ b/src/Pure/General/scan.scala	Wed Oct 25 14:39:22 2017 +0200
@@ -491,6 +491,15 @@
     make_byte_reader(stream, stream_length)
   }
 
+  def reader_is_utf8(reader: Reader[Char]): Boolean =
+    reader.isInstanceOf[Byte_Reader]
+
+  def reader_decode_utf8(is_utf8: Boolean, s: String): String =
+    if (is_utf8) UTF8.decode_permissive(s) else s
+
+  def reader_decode_utf8(reader: Reader[Char], s: String): String =
+    reader_decode_utf8(reader_is_utf8(reader), s)
+
 
   /* plain text reader */
 
--- a/src/Pure/PIDE/resources.scala	Wed Oct 25 14:36:29 2017 +0200
+++ b/src/Pure/PIDE/resources.scala	Wed Oct 25 14:39:22 2017 +0200
@@ -59,10 +59,11 @@
 
   def loaded_files(syntax: Outer_Syntax, name: Document.Node.Name): () => List[Path] =
   {
-    val raw_text = with_thy_reader(name, reader => reader.source.toString)
+    val (is_utf8, raw_text) =
+      with_thy_reader(name, reader => (Scan.reader_is_utf8(reader), reader.source.toString))
     () => {
       if (syntax.load_commands_in(raw_text)) {
-        val text = Symbol.decode(raw_text)
+        val text = Symbol.decode(Scan.reader_decode_utf8(is_utf8, raw_text))
         val spans = syntax.parse_spans(text)
         val dir = Path.explode(name.master_dir)
         spans.iterator.map(Command.span_files(syntax, _)._1).flatten.
--- a/src/Pure/Thy/thy_header.scala	Wed Oct 25 14:36:29 2017 +0200
+++ b/src/Pure/Thy/thy_header.scala	Wed Oct 25 14:39:22 2017 +0200
@@ -189,10 +189,7 @@
   def read(reader: Reader[Char], start: Token.Pos, strict: Boolean = true): Thy_Header =
   {
     val (_, tokens0) = read_tokens(reader, true)
-    val text =
-      if (reader.isInstanceOf[Scan.Byte_Reader])
-        UTF8.decode_permissive(Token.implode(tokens0))
-      else Token.implode(tokens0)
+    val text = Scan.reader_decode_utf8(reader, Token.implode(tokens0))
 
     val (drop_tokens, tokens) = read_tokens(Scan.char_reader(text), strict)
     val pos = (start /: drop_tokens)(_.advance(_))