# HG changeset patch # User wenzelm # Date 1720036457 -7200 # Node ID 7958907b959aabbf8f226c82d6638d2fc63c2b71 # Parent 0ca36dcdcbd311ee4065010bf018fa61ed4d2da3# Parent 9591af6f6b77400a5482f98d05db42249c2c893f merged diff -r 0ca36dcdcbd3 -r 7958907b959a src/Pure/General/bytes.scala --- a/src/Pure/General/bytes.scala Wed Jul 03 19:42:13 2024 +0200 +++ b/src/Pure/General/bytes.scala Wed Jul 03 21:54:17 2024 +0200 @@ -10,6 +10,7 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream, FileInputStream, FileOutputStream, InputStream, OutputStream, File => JFile} import java.nio.ByteBuffer +import java.nio.charset.StandardCharsets.ISO_8859_1 import java.nio.channels.FileChannel import java.nio.file.StandardOpenOption import java.util.Arrays @@ -45,7 +46,11 @@ val empty: Bytes = reuse_array(new Array(0)) - def apply(s: CharSequence): Bytes = + def raw(s: String): Bytes = + if (s.isEmpty) empty + else Builder.use(hint = s.length) { builder => builder += s.getBytes(ISO_8859_1) } + + def apply(s: String): Bytes = if (s.isEmpty) empty else Builder.use(hint = s.length) { builder => builder += s } @@ -131,21 +136,6 @@ def append(path: Path, bytes: Bytes): Unit = append(path.file, bytes) - /* vector of short unsigned integers */ - - trait Vec { - def size: Long - def apply(i: Long): Char - } - - class Vec_String(string: String) extends Vec { - override def size: Long = string.length.toLong - override def apply(i: Long): Char = - if (0 <= i && i < size) string(i.toInt) - else throw new IndexOutOfBoundsException - } - - /* incremental builder: unsynchronized! */ object Builder { @@ -173,6 +163,8 @@ } final class Builder private[Bytes](hint: Long) { + builder => + private var chunks = new ArrayBuffer[Array[Byte]](if (hint <= 0) 16 else (hint / chunk_size).toInt) @@ -240,12 +232,12 @@ } } - def += (s: CharSequence): Unit = - if (s.length > 0) { this += UTF8.bytes(s.toString) } + def += (s: String): Unit = + if (s.length > 0) { builder += UTF8.bytes(s) } - def += (array: Array[Byte]): Unit = { this += (array, 0, array.length) } + def += (array: Array[Byte]): Unit = { builder += (array, 0, array.length) } - def += (a: Subarray): Unit = { this += (a.array, a.offset, a.length) } + def += (a: Subarray): Unit = { builder += (a.array, a.offset, a.length) } private def done(): Bytes = { val cs = chunks.toArray @@ -292,7 +284,9 @@ protected val chunk0: Array[Byte], protected val offset: Long, val size: Long -) extends Bytes.Vec with YXML.Source { +) extends YXML.Source { + bytes => + assert( (chunks.isEmpty || chunks.get.nonEmpty && @@ -305,7 +299,7 @@ override def is_empty: Boolean = size == 0 - def proper: Option[Bytes] = if (is_empty) None else Some(this) + def proper: Option[Bytes] = if (is_empty) None else Some(bytes) def is_sliced: Boolean = offset != 0L || { @@ -333,11 +327,13 @@ } } + // signed byte def byte(i: Long): Byte = if (0 <= i && i < size) byte_unchecked(i) else throw new IndexOutOfBoundsException - def apply(i: Long): Char = (byte(i).toInt & 0xff).toChar + // unsigned char + def char(i: Long): Char = (byte(i).toInt & 0xff).toChar protected def subarray_iterator: Iterator[Bytes.Subarray] = if (is_empty) Iterator.empty @@ -378,7 +374,10 @@ for (a <- subarray_iterator) { builder += a } } } - else this + else bytes + + def terminated_line: Boolean = + size >= 1 && (byte_unchecked(size - 1) == 13 || byte_unchecked(size - 1) == 10) def trim_line: Bytes = if (size >= 2 && byte_unchecked(size - 2) == 13 && byte_unchecked(size - 1) == 10) { @@ -387,7 +386,7 @@ else if (size >= 1 && (byte_unchecked(size - 1) == 13 || byte_unchecked(size - 1) == 10)) { slice(0, size - 1) } - else this + else bytes /* separated chunks */ @@ -399,20 +398,20 @@ private def end(i: Long): Long = { var j = i - while (j < Bytes.this.size && byte_unchecked(j) != sep) { j += 1 } + while (j < bytes.size && byte_unchecked(j) != sep) { j += 1 } j } // init - if (!Bytes.this.is_empty) { start = 0; stop = end(0) } + if (!bytes.is_empty) { start = 0; stop = end(0) } def hasNext: Boolean = - 0 <= start && start <= stop && stop <= Bytes.this.size + 0 <= start && start <= stop && stop <= bytes.size def next(): Bytes = if (hasNext) { - val chunk = Bytes.this.slice(start, stop) - if (stop < Bytes.this.size) { start = stop + 1; stop = end(start) } + val chunk = bytes.slice(start, stop) + if (stop < bytes.size) { start = stop + 1; stop = end(start) } else { start = -1; stop = -1 } chunk } @@ -447,7 +446,7 @@ override def equals(that: Any): Boolean = { that match { case other: Bytes => - if (this.eq(other)) true + if (bytes.eq(other)) true else if (size != other.size) false else { if (chunks.isEmpty && other.chunks.isEmpty) { @@ -468,7 +467,7 @@ /* content */ def + (other: Bytes): Bytes = - if (other.is_empty) this + if (other.is_empty) bytes else if (is_empty) other else { Bytes.Builder.use(hint = size + other.size) { builder => @@ -495,14 +494,14 @@ } utf8 - if (utf8) UTF8.decode_permissive_bytes(this) + if (utf8) UTF8.decode_permissive(bytes) else new String(make_array, UTF8.charset) } def wellformed_text: Option[String] = try { val s = text - if (this == Bytes(s)) Some(s) else None + if (bytes == Bytes(s)) Some(s) else None } catch { case ERROR(_) => None } @@ -622,6 +621,6 @@ cache: Compress.Cache = Compress.Cache.none ) : (Boolean, Bytes) = { val compressed = compress(options = options, cache = cache) - if (compressed.size < size) (true, compressed) else (false, this) + if (compressed.size < size) (true, compressed) else (false, bytes) } } diff -r 0ca36dcdcbd3 -r 7958907b959a src/Pure/General/scan.scala --- a/src/Pure/General/scan.scala Wed Jul 03 19:42:13 2024 +0200 +++ b/src/Pure/General/scan.scala Wed Jul 03 21:54:17 2024 +0200 @@ -436,7 +436,7 @@ reader.isInstanceOf[Byte_Reader] def reader_decode_utf8(is_utf8: Boolean, s: String): String = - if (is_utf8) UTF8.decode_permissive(s) else s + if (is_utf8) UTF8.decode_permissive(Bytes.raw(s)) else s def reader_decode_utf8(reader: Reader[Char], s: String): String = reader_decode_utf8(reader_is_utf8(reader), s) diff -r 0ca36dcdcbd3 -r 7958907b959a src/Pure/General/utf8.scala --- a/src/Pure/General/utf8.scala Wed Jul 03 19:42:13 2024 +0200 +++ b/src/Pure/General/utf8.scala Wed Jul 03 21:54:17 2024 +0200 @@ -17,24 +17,13 @@ def bytes(s: String): Array[Byte] = s.getBytes(charset) - def relevant(s: CharSequence): Boolean = { - var i = 0 - val n = s.length - var found = false - while (i < n && !found) { - if (s.charAt(i) >= 128) { found = true } - i += 1 - } - found - } - /* permissive UTF-8 decoding */ // see also https://en.wikipedia.org/wiki/UTF-8#Description // overlong encodings enable byte-stuffing of low-ASCII - def decode_permissive_bytes(bytes: Bytes.Vec): String = { + def decode_permissive(bytes: Bytes): String = { val size = bytes.size val buf = new java.lang.StringBuilder((size min Space.GiB(1).bytes).toInt) var code = -1 @@ -61,7 +50,7 @@ } } for (i <- 0L until size) { - val c: Char = bytes(i) + val c = bytes.char(i) if (c < 128) { flush(); buf.append(c) } else if ((c & 0xC0) == 0x80) push(c & 0x3F) else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1) @@ -71,8 +60,4 @@ flush() buf.toString } - - def decode_permissive(text: String): String = - if (relevant(text)) decode_permissive_bytes(new Bytes.Vec_String(text)) - else text } diff -r 0ca36dcdcbd3 -r 7958907b959a src/Pure/Isar/outer_syntax.scala --- a/src/Pure/Isar/outer_syntax.scala Wed Jul 03 19:42:13 2024 +0200 +++ b/src/Pure/Isar/outer_syntax.scala Wed Jul 03 21:54:17 2024 +0200 @@ -26,7 +26,7 @@ for (s <- Symbol.iterator(str)) { if (s.length == 1) { val c = s(0) - if (c < 32 && c != YXML.X && c != YXML.Y || c == '\\' || c == '"') { + if (c < 32 && c != YXML.X_char && c != YXML.Y_char || c == '\\' || c == '"') { result += '\\' if (c < 10) result += '0' if (c < 100) result += '0' diff -r 0ca36dcdcbd3 -r 7958907b959a src/Pure/PIDE/byte_message.scala --- a/src/Pure/PIDE/byte_message.scala Wed Jul 03 19:42:13 2024 +0200 +++ b/src/Pure/PIDE/byte_message.scala Wed Jul 03 21:54:17 2024 +0200 @@ -85,16 +85,8 @@ !msg.is_empty && msg.byte_iterator.forall(b => Symbol.is_ascii_digit(b.toChar)) && Value.Long.unapply(msg.text).isDefined - private def is_terminated(msg: Bytes): Boolean = - msg.size match { - case size if size > 0 => - val c = msg(size - 1) - c <= Char.MaxValue && Symbol.is_ascii_line_terminator(c.toChar) - case _ => false - } - def write_line_message(stream: OutputStream, msg: Bytes): Unit = { - if (is_length(msg) || is_terminated(msg)) { + if (is_length(msg) || msg.terminated_line) { error ("Bad content for line message:\n" ++ msg.text.take(100)) } diff -r 0ca36dcdcbd3 -r 7958907b959a src/Pure/PIDE/yxml.scala --- a/src/Pure/PIDE/yxml.scala Wed Jul 03 19:42:13 2024 +0200 +++ b/src/Pure/PIDE/yxml.scala Wed Jul 03 21:54:17 2024 +0200 @@ -17,19 +17,14 @@ val X_byte: Byte = 5 val Y_byte: Byte = 6 - val X = X_byte.toChar - val Y = Y_byte.toChar - - val is_X: Char => Boolean = _ == X - val is_Y: Char => Boolean = _ == Y + val X_char: Char = X_byte.toChar + val Y_char: Char = Y_byte.toChar - val X_string: String = X.toString - val Y_string: String = Y.toString - val XY_string: String = X_string + Y_string - val XYX_string: String = XY_string + X_string + val X_string: String = X_char.toString + val Y_string: String = Y_char.toString - def detect(s: String): Boolean = s.exists(c => c == X || c == Y) - def detect_elem(s: String): Boolean = s.startsWith(XY_string) + def detect(s: String): Boolean = s.exists(c => c == X_char || c == Y_char) + def detect_elem(s: String): Boolean = s.length >= 2 && s(0) == X_char && s(1) == Y_char /* string representation */ @@ -108,12 +103,12 @@ class Source_String(source: String) extends Source { override def is_empty: Boolean = source.isEmpty - override def is_X: Boolean = source.length == 1 && source(0) == X - override def is_Y: Boolean = source.length == 1 && source(0) == Y + override def is_X: Boolean = source.length == 1 && source(0) == X_char + override def is_Y: Boolean = source.length == 1 && source(0) == Y_char override def iterator_X: Iterator[Source] = - Library.separated_chunks(X, source).map(Source.apply) + Library.separated_chunks(X_char, source).map(Source.apply) override def iterator_Y: Iterator[Source] = - Library.separated_chunks(Y, source).map(Source.apply) + Library.separated_chunks(Y_char, source).map(Source.apply) override def text: String = source } diff -r 0ca36dcdcbd3 -r 7958907b959a src/Pure/ROOT.ML --- a/src/Pure/ROOT.ML Wed Jul 03 19:42:13 2024 +0200 +++ b/src/Pure/ROOT.ML Wed Jul 03 21:54:17 2024 +0200 @@ -372,3 +372,4 @@ ML_file "Tools/jedit.ML"; ML_file "Tools/ghc.ML"; ML_file "Tools/generated_files.ML"; + diff -r 0ca36dcdcbd3 -r 7958907b959a src/Pure/System/web_app.scala --- a/src/Pure/System/web_app.scala Wed Jul 03 19:42:13 2024 +0200 +++ b/src/Pure/System/web_app.scala Wed Jul 03 21:54:17 2024 +0200 @@ -125,7 +125,7 @@ var sep_ok = true var sep_i = 0 while (sep_ok && sep_i < sep.length) { - if (bytes(bytes_i + sep_i) == sep(sep_i)) { + if (bytes.char(bytes_i + sep_i) == sep(sep_i)) { sep_i += 1 } else { bytes_i += 1 diff -r 0ca36dcdcbd3 -r 7958907b959a src/Pure/Tools/dump.scala --- a/src/Pure/Tools/dump.scala Wed Jul 03 19:42:13 2024 +0200 +++ b/src/Pure/Tools/dump.scala Wed Jul 03 21:54:17 2024 +0200 @@ -29,9 +29,6 @@ def write(file_name: Path, bytes: Bytes): Unit = Bytes.write(write_path(file_name), bytes) - def write(file_name: Path, text: String): Unit = - write(file_name, Bytes(text)) - def write(file_name: Path, body: XML.Body): Unit = write(file_name, YXML.bytes_of_body(body, recode = Symbol.encode)) } diff -r 0ca36dcdcbd3 -r 7958907b959a src/Tools/jEdit/jedit_main/scala_console.scala --- a/src/Tools/jEdit/jedit_main/scala_console.scala Wed Jul 03 19:42:13 2024 +0200 +++ b/src/Tools/jEdit/jedit_main/scala_console.scala Wed Jul 03 21:54:17 2024 +0200 @@ -50,7 +50,7 @@ override def flush(): Unit = { val s = buf.synchronized { val s = buf.toString; buf.clear(); s } - val str = UTF8.decode_permissive(s) + val str = UTF8.decode_permissive(Bytes.raw(s)) GUI_Thread.later { if (global_out == null) java.lang.System.out.print(str) else global_out.writeAttrs(null, str)