notable performance tuning: avoid overhead of higher-order functions;
authorwenzelm
Sat, 15 Jun 2024 23:24:24 +0200
changeset 80379 1f1ce661c71c
parent 80378 ab4badc7db7f
child 80380 94d903234f6b
notable performance tuning: avoid overhead of higher-order functions;
src/Pure/General/bytes.scala
src/Pure/General/utf8.scala
--- a/src/Pure/General/bytes.scala	Sat Jun 15 22:43:01 2024 +0200
+++ b/src/Pure/General/bytes.scala	Sat Jun 15 23:24:24 2024 +0200
@@ -410,10 +410,18 @@
 
   def text: String =
     if (is_empty) ""
-    else if (byte_iterator.forall(_ >= 0)) {
-      new String(make_array, UTF8.charset)
+    else {
+      var i = 0L
+      var utf8 = false
+      while (i < size && !utf8) {
+        if (byte_unchecked(i) < 0) { utf8 = true }
+        i += 1
+      }
+      utf8
+
+      if (utf8) UTF8.decode_permissive_bytes(this)
+      else new String(make_array, UTF8.charset)
     }
-    else UTF8.decode_permissive_bytes(this)
 
   def wellformed_text: Option[String] =
     try {
--- a/src/Pure/General/utf8.scala	Sat Jun 15 22:43:01 2024 +0200
+++ b/src/Pure/General/utf8.scala	Sat Jun 15 23:24:24 2024 +0200
@@ -17,8 +17,16 @@
 
   def bytes(s: String): Array[Byte] = s.getBytes(charset)
 
-  def relevant(c: Char): Boolean = c >= 128
-  def relevant(s: CharSequence): Boolean = (0 until s.length).exists(i => relevant(s.charAt(i)))
+  def relevant(s: CharSequence): Boolean = {
+    var i = 0
+    val n = s.length
+    var utf8 = false
+    while (i < n && !utf8) {
+      if (s.charAt(i) >= 128) { utf8 = true }
+      i += 1
+    }
+    utf8
+  }
 
 
   /* permissive UTF-8 decoding */