--- a/src/Pure/General/bytes.scala Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Pure/General/bytes.scala Wed Jul 03 21:54:17 2024 +0200
@@ -10,6 +10,7 @@
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, FileInputStream, FileOutputStream,
InputStream, OutputStream, File => JFile}
import java.nio.ByteBuffer
+import java.nio.charset.StandardCharsets.ISO_8859_1
import java.nio.channels.FileChannel
import java.nio.file.StandardOpenOption
import java.util.Arrays
@@ -45,7 +46,11 @@
val empty: Bytes = reuse_array(new Array(0))
- def apply(s: CharSequence): Bytes =
+ def raw(s: String): Bytes =
+ if (s.isEmpty) empty
+ else Builder.use(hint = s.length) { builder => builder += s.getBytes(ISO_8859_1) }
+
+ def apply(s: String): Bytes =
if (s.isEmpty) empty
else Builder.use(hint = s.length) { builder => builder += s }
@@ -131,21 +136,6 @@
def append(path: Path, bytes: Bytes): Unit = append(path.file, bytes)
- /* vector of short unsigned integers */
-
- trait Vec {
- def size: Long
- def apply(i: Long): Char
- }
-
- class Vec_String(string: String) extends Vec {
- override def size: Long = string.length.toLong
- override def apply(i: Long): Char =
- if (0 <= i && i < size) string(i.toInt)
- else throw new IndexOutOfBoundsException
- }
-
-
/* incremental builder: unsynchronized! */
object Builder {
@@ -173,6 +163,8 @@
}
final class Builder private[Bytes](hint: Long) {
+ builder =>
+
private var chunks =
new ArrayBuffer[Array[Byte]](if (hint <= 0) 16 else (hint / chunk_size).toInt)
@@ -240,12 +232,12 @@
}
}
- def += (s: CharSequence): Unit =
- if (s.length > 0) { this += UTF8.bytes(s.toString) }
+ def += (s: String): Unit =
+ if (s.length > 0) { builder += UTF8.bytes(s) }
- def += (array: Array[Byte]): Unit = { this += (array, 0, array.length) }
+ def += (array: Array[Byte]): Unit = { builder += (array, 0, array.length) }
- def += (a: Subarray): Unit = { this += (a.array, a.offset, a.length) }
+ def += (a: Subarray): Unit = { builder += (a.array, a.offset, a.length) }
private def done(): Bytes = {
val cs = chunks.toArray
@@ -292,7 +284,9 @@
protected val chunk0: Array[Byte],
protected val offset: Long,
val size: Long
-) extends Bytes.Vec with YXML.Source {
+) extends YXML.Source {
+ bytes =>
+
assert(
(chunks.isEmpty ||
chunks.get.nonEmpty &&
@@ -305,7 +299,7 @@
override def is_empty: Boolean = size == 0
- def proper: Option[Bytes] = if (is_empty) None else Some(this)
+ def proper: Option[Bytes] = if (is_empty) None else Some(bytes)
def is_sliced: Boolean =
offset != 0L || {
@@ -333,11 +327,13 @@
}
}
+ // signed byte
def byte(i: Long): Byte =
if (0 <= i && i < size) byte_unchecked(i)
else throw new IndexOutOfBoundsException
- def apply(i: Long): Char = (byte(i).toInt & 0xff).toChar
+ // unsigned char
+ def char(i: Long): Char = (byte(i).toInt & 0xff).toChar
protected def subarray_iterator: Iterator[Bytes.Subarray] =
if (is_empty) Iterator.empty
@@ -378,7 +374,10 @@
for (a <- subarray_iterator) { builder += a }
}
}
- else this
+ else bytes
+
+ def terminated_line: Boolean =
+ size >= 1 && (byte_unchecked(size - 1) == 13 || byte_unchecked(size - 1) == 10)
def trim_line: Bytes =
if (size >= 2 && byte_unchecked(size - 2) == 13 && byte_unchecked(size - 1) == 10) {
@@ -387,7 +386,7 @@
else if (size >= 1 && (byte_unchecked(size - 1) == 13 || byte_unchecked(size - 1) == 10)) {
slice(0, size - 1)
}
- else this
+ else bytes
/* separated chunks */
@@ -399,20 +398,20 @@
private def end(i: Long): Long = {
var j = i
- while (j < Bytes.this.size && byte_unchecked(j) != sep) { j += 1 }
+ while (j < bytes.size && byte_unchecked(j) != sep) { j += 1 }
j
}
// init
- if (!Bytes.this.is_empty) { start = 0; stop = end(0) }
+ if (!bytes.is_empty) { start = 0; stop = end(0) }
def hasNext: Boolean =
- 0 <= start && start <= stop && stop <= Bytes.this.size
+ 0 <= start && start <= stop && stop <= bytes.size
def next(): Bytes =
if (hasNext) {
- val chunk = Bytes.this.slice(start, stop)
- if (stop < Bytes.this.size) { start = stop + 1; stop = end(start) }
+ val chunk = bytes.slice(start, stop)
+ if (stop < bytes.size) { start = stop + 1; stop = end(start) }
else { start = -1; stop = -1 }
chunk
}
@@ -447,7 +446,7 @@
override def equals(that: Any): Boolean = {
that match {
case other: Bytes =>
- if (this.eq(other)) true
+ if (bytes.eq(other)) true
else if (size != other.size) false
else {
if (chunks.isEmpty && other.chunks.isEmpty) {
@@ -468,7 +467,7 @@
/* content */
def + (other: Bytes): Bytes =
- if (other.is_empty) this
+ if (other.is_empty) bytes
else if (is_empty) other
else {
Bytes.Builder.use(hint = size + other.size) { builder =>
@@ -495,14 +494,14 @@
}
utf8
- if (utf8) UTF8.decode_permissive_bytes(this)
+ if (utf8) UTF8.decode_permissive(bytes)
else new String(make_array, UTF8.charset)
}
def wellformed_text: Option[String] =
try {
val s = text
- if (this == Bytes(s)) Some(s) else None
+ if (bytes == Bytes(s)) Some(s) else None
}
catch { case ERROR(_) => None }
@@ -622,6 +621,6 @@
cache: Compress.Cache = Compress.Cache.none
) : (Boolean, Bytes) = {
val compressed = compress(options = options, cache = cache)
- if (compressed.size < size) (true, compressed) else (false, this)
+ if (compressed.size < size) (true, compressed) else (false, bytes)
}
}
--- a/src/Pure/General/scan.scala Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Pure/General/scan.scala Wed Jul 03 21:54:17 2024 +0200
@@ -436,7 +436,7 @@
reader.isInstanceOf[Byte_Reader]
def reader_decode_utf8(is_utf8: Boolean, s: String): String =
- if (is_utf8) UTF8.decode_permissive(s) else s
+ if (is_utf8) UTF8.decode_permissive(Bytes.raw(s)) else s
def reader_decode_utf8(reader: Reader[Char], s: String): String =
reader_decode_utf8(reader_is_utf8(reader), s)
--- a/src/Pure/General/utf8.scala Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Pure/General/utf8.scala Wed Jul 03 21:54:17 2024 +0200
@@ -17,24 +17,13 @@
def bytes(s: String): Array[Byte] = s.getBytes(charset)
- def relevant(s: CharSequence): Boolean = {
- var i = 0
- val n = s.length
- var found = false
- while (i < n && !found) {
- if (s.charAt(i) >= 128) { found = true }
- i += 1
- }
- found
- }
-
/* permissive UTF-8 decoding */
// see also https://en.wikipedia.org/wiki/UTF-8#Description
// overlong encodings enable byte-stuffing of low-ASCII
- def decode_permissive_bytes(bytes: Bytes.Vec): String = {
+ def decode_permissive(bytes: Bytes): String = {
val size = bytes.size
val buf = new java.lang.StringBuilder((size min Space.GiB(1).bytes).toInt)
var code = -1
@@ -61,7 +50,7 @@
}
}
for (i <- 0L until size) {
- val c: Char = bytes(i)
+ val c = bytes.char(i)
if (c < 128) { flush(); buf.append(c) }
else if ((c & 0xC0) == 0x80) push(c & 0x3F)
else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1)
@@ -71,8 +60,4 @@
flush()
buf.toString
}
-
- def decode_permissive(text: String): String =
- if (relevant(text)) decode_permissive_bytes(new Bytes.Vec_String(text))
- else text
}
--- a/src/Pure/Isar/outer_syntax.scala Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Pure/Isar/outer_syntax.scala Wed Jul 03 21:54:17 2024 +0200
@@ -26,7 +26,7 @@
for (s <- Symbol.iterator(str)) {
if (s.length == 1) {
val c = s(0)
- if (c < 32 && c != YXML.X && c != YXML.Y || c == '\\' || c == '"') {
+ if (c < 32 && c != YXML.X_char && c != YXML.Y_char || c == '\\' || c == '"') {
result += '\\'
if (c < 10) result += '0'
if (c < 100) result += '0'
--- a/src/Pure/PIDE/byte_message.scala Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Pure/PIDE/byte_message.scala Wed Jul 03 21:54:17 2024 +0200
@@ -85,16 +85,8 @@
!msg.is_empty && msg.byte_iterator.forall(b => Symbol.is_ascii_digit(b.toChar)) &&
Value.Long.unapply(msg.text).isDefined
- private def is_terminated(msg: Bytes): Boolean =
- msg.size match {
- case size if size > 0 =>
- val c = msg(size - 1)
- c <= Char.MaxValue && Symbol.is_ascii_line_terminator(c.toChar)
- case _ => false
- }
-
def write_line_message(stream: OutputStream, msg: Bytes): Unit = {
- if (is_length(msg) || is_terminated(msg)) {
+ if (is_length(msg) || msg.terminated_line) {
error ("Bad content for line message:\n" ++ msg.text.take(100))
}
--- a/src/Pure/PIDE/yxml.scala Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Pure/PIDE/yxml.scala Wed Jul 03 21:54:17 2024 +0200
@@ -17,19 +17,14 @@
val X_byte: Byte = 5
val Y_byte: Byte = 6
- val X = X_byte.toChar
- val Y = Y_byte.toChar
-
- val is_X: Char => Boolean = _ == X
- val is_Y: Char => Boolean = _ == Y
+ val X_char: Char = X_byte.toChar
+ val Y_char: Char = Y_byte.toChar
- val X_string: String = X.toString
- val Y_string: String = Y.toString
- val XY_string: String = X_string + Y_string
- val XYX_string: String = XY_string + X_string
+ val X_string: String = X_char.toString
+ val Y_string: String = Y_char.toString
- def detect(s: String): Boolean = s.exists(c => c == X || c == Y)
- def detect_elem(s: String): Boolean = s.startsWith(XY_string)
+ def detect(s: String): Boolean = s.exists(c => c == X_char || c == Y_char)
+ def detect_elem(s: String): Boolean = s.length >= 2 && s(0) == X_char && s(1) == Y_char
/* string representation */
@@ -108,12 +103,12 @@
class Source_String(source: String) extends Source {
override def is_empty: Boolean = source.isEmpty
- override def is_X: Boolean = source.length == 1 && source(0) == X
- override def is_Y: Boolean = source.length == 1 && source(0) == Y
+ override def is_X: Boolean = source.length == 1 && source(0) == X_char
+ override def is_Y: Boolean = source.length == 1 && source(0) == Y_char
override def iterator_X: Iterator[Source] =
- Library.separated_chunks(X, source).map(Source.apply)
+ Library.separated_chunks(X_char, source).map(Source.apply)
override def iterator_Y: Iterator[Source] =
- Library.separated_chunks(Y, source).map(Source.apply)
+ Library.separated_chunks(Y_char, source).map(Source.apply)
override def text: String = source
}
--- a/src/Pure/ROOT.ML Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Pure/ROOT.ML Wed Jul 03 21:54:17 2024 +0200
@@ -372,3 +372,4 @@
ML_file "Tools/jedit.ML";
ML_file "Tools/ghc.ML";
ML_file "Tools/generated_files.ML";
+
--- a/src/Pure/System/web_app.scala Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Pure/System/web_app.scala Wed Jul 03 21:54:17 2024 +0200
@@ -125,7 +125,7 @@
var sep_ok = true
var sep_i = 0
while (sep_ok && sep_i < sep.length) {
- if (bytes(bytes_i + sep_i) == sep(sep_i)) {
+ if (bytes.char(bytes_i + sep_i) == sep(sep_i)) {
sep_i += 1
} else {
bytes_i += 1
--- a/src/Pure/Tools/dump.scala Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Pure/Tools/dump.scala Wed Jul 03 21:54:17 2024 +0200
@@ -29,9 +29,6 @@
def write(file_name: Path, bytes: Bytes): Unit =
Bytes.write(write_path(file_name), bytes)
- def write(file_name: Path, text: String): Unit =
- write(file_name, Bytes(text))
-
def write(file_name: Path, body: XML.Body): Unit =
write(file_name, YXML.bytes_of_body(body, recode = Symbol.encode))
}
--- a/src/Tools/jEdit/jedit_main/scala_console.scala Wed Jul 03 19:42:13 2024 +0200
+++ b/src/Tools/jEdit/jedit_main/scala_console.scala Wed Jul 03 21:54:17 2024 +0200
@@ -50,7 +50,7 @@
override def flush(): Unit = {
val s = buf.synchronized { val s = buf.toString; buf.clear(); s }
- val str = UTF8.decode_permissive(s)
+ val str = UTF8.decode_permissive(Bytes.raw(s))
GUI_Thread.later {
if (global_out == null) java.lang.System.out.print(str)
else global_out.writeAttrs(null, str)