merged
authorwenzelm
Wed, 03 Jul 2024 21:54:17 +0200
changeset 80496 7958907b959a
parent 80484 0ca36dcdcbd3 (current diff)
parent 80495 9591af6f6b77 (diff)
child 80497 bd00bdf39c86
child 80503 d59cc10a6888
child 80519 d757f0f98447
merged
--- a/src/Pure/General/bytes.scala	Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Pure/General/bytes.scala	Wed Jul 03 21:54:17 2024 +0200
@@ -10,6 +10,7 @@
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream, FileInputStream, FileOutputStream,
   InputStream, OutputStream, File => JFile}
 import java.nio.ByteBuffer
+import java.nio.charset.StandardCharsets.ISO_8859_1
 import java.nio.channels.FileChannel
 import java.nio.file.StandardOpenOption
 import java.util.Arrays
@@ -45,7 +46,11 @@
 
   val empty: Bytes = reuse_array(new Array(0))
 
-  def apply(s: CharSequence): Bytes =
+  def raw(s: String): Bytes =
+    if (s.isEmpty) empty
+    else Builder.use(hint = s.length) { builder => builder += s.getBytes(ISO_8859_1) }
+
+  def apply(s: String): Bytes =
     if (s.isEmpty) empty
     else Builder.use(hint = s.length) { builder => builder += s }
 
@@ -131,21 +136,6 @@
   def append(path: Path, bytes: Bytes): Unit = append(path.file, bytes)
 
 
-  /* vector of short unsigned integers */
-
-  trait Vec {
-    def size: Long
-    def apply(i: Long): Char
-  }
-
-  class Vec_String(string: String) extends Vec {
-    override def size: Long = string.length.toLong
-    override def apply(i: Long): Char =
-      if (0 <= i && i < size) string(i.toInt)
-      else throw new IndexOutOfBoundsException
-  }
-
-
   /* incremental builder: unsynchronized! */
 
   object Builder {
@@ -173,6 +163,8 @@
   }
 
   final class Builder private[Bytes](hint: Long) {
+    builder =>
+
     private var chunks =
       new ArrayBuffer[Array[Byte]](if (hint <= 0) 16 else (hint / chunk_size).toInt)
 
@@ -240,12 +232,12 @@
       }
     }
 
-    def += (s: CharSequence): Unit =
-      if (s.length > 0) { this += UTF8.bytes(s.toString) }
+    def += (s: String): Unit =
+      if (s.length > 0) { builder += UTF8.bytes(s) }
 
-    def += (array: Array[Byte]): Unit = { this += (array, 0, array.length) }
+    def += (array: Array[Byte]): Unit = { builder += (array, 0, array.length) }
 
-    def += (a: Subarray): Unit = { this += (a.array, a.offset, a.length) }
+    def += (a: Subarray): Unit = { builder += (a.array, a.offset, a.length) }
 
     private def done(): Bytes = {
       val cs = chunks.toArray
@@ -292,7 +284,9 @@
   protected val chunk0: Array[Byte],
   protected val offset: Long,
   val size: Long
-) extends Bytes.Vec with YXML.Source {
+) extends YXML.Source {
+  bytes =>
+
   assert(
     (chunks.isEmpty ||
       chunks.get.nonEmpty &&
@@ -305,7 +299,7 @@
 
   override def is_empty: Boolean = size == 0
 
-  def proper: Option[Bytes] = if (is_empty) None else Some(this)
+  def proper: Option[Bytes] = if (is_empty) None else Some(bytes)
 
   def is_sliced: Boolean =
     offset != 0L || {
@@ -333,11 +327,13 @@
     }
   }
 
+  // signed byte
   def byte(i: Long): Byte =
     if (0 <= i && i < size) byte_unchecked(i)
     else throw new IndexOutOfBoundsException
 
-  def apply(i: Long): Char = (byte(i).toInt & 0xff).toChar
+  // unsigned char
+  def char(i: Long): Char = (byte(i).toInt & 0xff).toChar
 
   protected def subarray_iterator: Iterator[Bytes.Subarray] =
     if (is_empty) Iterator.empty
@@ -378,7 +374,10 @@
         for (a <- subarray_iterator) { builder += a }
       }
     }
-    else this
+    else bytes
+
+  def terminated_line: Boolean =
+    size >= 1 && (byte_unchecked(size - 1) == 13 || byte_unchecked(size - 1) == 10)
 
   def trim_line: Bytes =
     if (size >= 2 && byte_unchecked(size - 2) == 13 && byte_unchecked(size - 1) == 10) {
@@ -387,7 +386,7 @@
     else if (size >= 1 && (byte_unchecked(size - 1) == 13 || byte_unchecked(size - 1) == 10)) {
       slice(0, size - 1)
     }
-    else this
+    else bytes
 
 
   /* separated chunks */
@@ -399,20 +398,20 @@
 
       private def end(i: Long): Long = {
         var j = i
-        while (j < Bytes.this.size && byte_unchecked(j) != sep) { j += 1 }
+        while (j < bytes.size && byte_unchecked(j) != sep) { j += 1 }
         j
       }
 
       // init
-      if (!Bytes.this.is_empty) { start = 0; stop = end(0) }
+      if (!bytes.is_empty) { start = 0; stop = end(0) }
 
       def hasNext: Boolean =
-        0 <= start && start <= stop && stop <= Bytes.this.size
+        0 <= start && start <= stop && stop <= bytes.size
 
       def next(): Bytes =
         if (hasNext) {
-          val chunk = Bytes.this.slice(start, stop)
-          if (stop < Bytes.this.size) { start = stop + 1; stop = end(start) }
+          val chunk = bytes.slice(start, stop)
+          if (stop < bytes.size) { start = stop + 1; stop = end(start) }
           else { start = -1; stop = -1 }
           chunk
         }
@@ -447,7 +446,7 @@
   override def equals(that: Any): Boolean = {
     that match {
       case other: Bytes =>
-        if (this.eq(other)) true
+        if (bytes.eq(other)) true
         else if (size != other.size) false
         else {
           if (chunks.isEmpty && other.chunks.isEmpty) {
@@ -468,7 +467,7 @@
   /* content */
 
   def + (other: Bytes): Bytes =
-    if (other.is_empty) this
+    if (other.is_empty) bytes
     else if (is_empty) other
     else {
       Bytes.Builder.use(hint = size + other.size) { builder =>
@@ -495,14 +494,14 @@
       }
       utf8
 
-      if (utf8) UTF8.decode_permissive_bytes(this)
+      if (utf8) UTF8.decode_permissive(bytes)
       else new String(make_array, UTF8.charset)
     }
 
   def wellformed_text: Option[String] =
     try {
       val s = text
-      if (this == Bytes(s)) Some(s) else None
+      if (bytes == Bytes(s)) Some(s) else None
     }
     catch { case ERROR(_) => None }
 
@@ -622,6 +621,6 @@
     cache: Compress.Cache = Compress.Cache.none
   ) : (Boolean, Bytes) = {
     val compressed = compress(options = options, cache = cache)
-    if (compressed.size < size) (true, compressed) else (false, this)
+    if (compressed.size < size) (true, compressed) else (false, bytes)
   }
 }
--- a/src/Pure/General/scan.scala	Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Pure/General/scan.scala	Wed Jul 03 21:54:17 2024 +0200
@@ -436,7 +436,7 @@
     reader.isInstanceOf[Byte_Reader]
 
   def reader_decode_utf8(is_utf8: Boolean, s: String): String =
-    if (is_utf8) UTF8.decode_permissive(s) else s
+    if (is_utf8) UTF8.decode_permissive(Bytes.raw(s)) else s
 
   def reader_decode_utf8(reader: Reader[Char], s: String): String =
     reader_decode_utf8(reader_is_utf8(reader), s)
--- a/src/Pure/General/utf8.scala	Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Pure/General/utf8.scala	Wed Jul 03 21:54:17 2024 +0200
@@ -17,24 +17,13 @@
 
   def bytes(s: String): Array[Byte] = s.getBytes(charset)
 
-  def relevant(s: CharSequence): Boolean = {
-    var i = 0
-    val n = s.length
-    var found = false
-    while (i < n && !found) {
-      if (s.charAt(i) >= 128) { found = true }
-      i += 1
-    }
-    found
-  }
-
 
   /* permissive UTF-8 decoding */
 
   // see also https://en.wikipedia.org/wiki/UTF-8#Description
   // overlong encodings enable byte-stuffing of low-ASCII
 
-  def decode_permissive_bytes(bytes: Bytes.Vec): String = {
+  def decode_permissive(bytes: Bytes): String = {
     val size = bytes.size
     val buf = new java.lang.StringBuilder((size min Space.GiB(1).bytes).toInt)
     var code = -1
@@ -61,7 +50,7 @@
       }
     }
     for (i <- 0L until size) {
-      val c: Char = bytes(i)
+      val c = bytes.char(i)
       if (c < 128) { flush(); buf.append(c) }
       else if ((c & 0xC0) == 0x80) push(c & 0x3F)
       else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1)
@@ -71,8 +60,4 @@
     flush()
     buf.toString
   }
-
-  def decode_permissive(text: String): String =
-    if (relevant(text)) decode_permissive_bytes(new Bytes.Vec_String(text))
-    else text
 }
--- a/src/Pure/Isar/outer_syntax.scala	Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Pure/Isar/outer_syntax.scala	Wed Jul 03 21:54:17 2024 +0200
@@ -26,7 +26,7 @@
       for (s <- Symbol.iterator(str)) {
         if (s.length == 1) {
           val c = s(0)
-          if (c < 32 && c != YXML.X && c != YXML.Y || c == '\\' || c == '"') {
+          if (c < 32 && c != YXML.X_char && c != YXML.Y_char || c == '\\' || c == '"') {
             result += '\\'
             if (c < 10) result += '0'
             if (c < 100) result += '0'
--- a/src/Pure/PIDE/byte_message.scala	Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Pure/PIDE/byte_message.scala	Wed Jul 03 21:54:17 2024 +0200
@@ -85,16 +85,8 @@
     !msg.is_empty && msg.byte_iterator.forall(b => Symbol.is_ascii_digit(b.toChar)) &&
       Value.Long.unapply(msg.text).isDefined
 
-  private def is_terminated(msg: Bytes): Boolean =
-    msg.size match {
-      case size if size > 0 =>
-        val c = msg(size - 1)
-        c <= Char.MaxValue && Symbol.is_ascii_line_terminator(c.toChar)
-      case _ => false
-    }
-
   def write_line_message(stream: OutputStream, msg: Bytes): Unit = {
-    if (is_length(msg) || is_terminated(msg)) {
+    if (is_length(msg) || msg.terminated_line) {
       error ("Bad content for line message:\n" ++ msg.text.take(100))
     }
 
--- a/src/Pure/PIDE/yxml.scala	Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Pure/PIDE/yxml.scala	Wed Jul 03 21:54:17 2024 +0200
@@ -17,19 +17,14 @@
   val X_byte: Byte = 5
   val Y_byte: Byte = 6
 
-  val X = X_byte.toChar
-  val Y = Y_byte.toChar
-
-  val is_X: Char => Boolean = _ == X
-  val is_Y: Char => Boolean = _ == Y
+  val X_char: Char = X_byte.toChar
+  val Y_char: Char = Y_byte.toChar
 
-  val X_string: String = X.toString
-  val Y_string: String = Y.toString
-  val XY_string: String = X_string + Y_string
-  val XYX_string: String = XY_string + X_string
+  val X_string: String = X_char.toString
+  val Y_string: String = Y_char.toString
 
-  def detect(s: String): Boolean = s.exists(c => c == X || c == Y)
-  def detect_elem(s: String): Boolean = s.startsWith(XY_string)
+  def detect(s: String): Boolean = s.exists(c => c == X_char || c == Y_char)
+  def detect_elem(s: String): Boolean = s.length >= 2 && s(0) == X_char && s(1) == Y_char
 
 
   /* string representation */
@@ -108,12 +103,12 @@
 
   class Source_String(source: String) extends Source {
     override def is_empty: Boolean = source.isEmpty
-    override def is_X: Boolean = source.length == 1 && source(0) == X
-    override def is_Y: Boolean = source.length == 1 && source(0) == Y
+    override def is_X: Boolean = source.length == 1 && source(0) == X_char
+    override def is_Y: Boolean = source.length == 1 && source(0) == Y_char
     override def iterator_X: Iterator[Source] =
-      Library.separated_chunks(X, source).map(Source.apply)
+      Library.separated_chunks(X_char, source).map(Source.apply)
     override def iterator_Y: Iterator[Source] =
-      Library.separated_chunks(Y, source).map(Source.apply)
+      Library.separated_chunks(Y_char, source).map(Source.apply)
     override def text: String = source
   }
 
--- a/src/Pure/ROOT.ML	Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Pure/ROOT.ML	Wed Jul 03 21:54:17 2024 +0200
@@ -372,3 +372,4 @@
 ML_file "Tools/jedit.ML";
 ML_file "Tools/ghc.ML";
 ML_file "Tools/generated_files.ML";
+
--- a/src/Pure/System/web_app.scala	Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Pure/System/web_app.scala	Wed Jul 03 21:54:17 2024 +0200
@@ -125,7 +125,7 @@
               var sep_ok = true
               var sep_i = 0
               while (sep_ok && sep_i < sep.length) {
-                if (bytes(bytes_i + sep_i) == sep(sep_i)) {
+                if (bytes.char(bytes_i + sep_i) == sep(sep_i)) {
                   sep_i += 1
                 } else {
                   bytes_i += 1
--- a/src/Pure/Tools/dump.scala	Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Pure/Tools/dump.scala	Wed Jul 03 21:54:17 2024 +0200
@@ -29,9 +29,6 @@
     def write(file_name: Path, bytes: Bytes): Unit =
       Bytes.write(write_path(file_name), bytes)
 
-    def write(file_name: Path, text: String): Unit =
-      write(file_name, Bytes(text))
-
     def write(file_name: Path, body: XML.Body): Unit =
       write(file_name, YXML.bytes_of_body(body, recode = Symbol.encode))
   }
--- a/src/Tools/jEdit/jedit_main/scala_console.scala	Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Tools/jEdit/jedit_main/scala_console.scala	Wed Jul 03 21:54:17 2024 +0200
@@ -50,7 +50,7 @@
 
     override def flush(): Unit = {
       val s = buf.synchronized { val s = buf.toString; buf.clear(); s }
-      val str = UTF8.decode_permissive(s)
+      val str = UTF8.decode_permissive(Bytes.raw(s))
       GUI_Thread.later {
         if (global_out == null) java.lang.System.out.print(str)
         else global_out.writeAttrs(null, str)