more systematic text length;
authorwenzelm
Tue, 20 Dec 2016 10:44:36 +0100
changeset 64615 fd0d6de380c6
parent 64614 88211daacf93
child 64616 dc3ec40fe41b
more systematic text length;
src/Pure/General/codepoint.scala
src/Pure/General/symbol.scala
src/Pure/PIDE/line.scala
src/Pure/System/utf8.scala
--- a/src/Pure/General/codepoint.scala	Tue Dec 20 10:06:18 2016 +0100
+++ b/src/Pure/General/codepoint.scala	Tue Dec 20 10:44:36 2016 +0100
@@ -9,17 +9,20 @@
 
 object Codepoint
 {
-  def iterator(str: String): Iterator[Int] =
+  def string(c: Int): String = new String(Array(c), 0, 1)
+
+  def iterator(s: String): Iterator[Int] =
     new Iterator[Int] {
       var offset = 0
-      def hasNext: Boolean = offset < str.length
+      def hasNext: Boolean = offset < s.length
       def next: Int =
       {
-        val c = str.codePointAt(offset)
+        val c = s.codePointAt(offset)
         offset += Character.charCount(c)
         c
       }
     }
 
-  def string(c: Int): String = new String(Array(c), 0, 1)
+  def length(s: String): Int = iterator(s).length
+  object Length extends Line.Length { def apply(s: String): Int = Codepoint.length(s) }
 }
--- a/src/Pure/General/symbol.scala	Tue Dec 20 10:06:18 2016 +0100
+++ b/src/Pure/General/symbol.scala	Tue Dec 20 10:44:36 2016 +0100
@@ -128,6 +128,9 @@
 
   def explode(text: CharSequence): List[Symbol] = iterator(text).toList
 
+  def length(text: CharSequence): Int = iterator(text).length
+  object Length extends Line.Length { def apply(text: String): Int = length(text) }
+
 
   /* decoding offsets */
 
--- a/src/Pure/PIDE/line.scala	Tue Dec 20 10:06:18 2016 +0100
+++ b/src/Pure/PIDE/line.scala	Tue Dec 20 10:44:36 2016 +0100
@@ -12,6 +12,12 @@
 
 object Line
 {
+  /* length wrt. encoding */
+
+  trait Length { def apply(text: String): Int }
+  object Length extends Length { def apply(text: String): Int = text.length }
+
+
   /* position */
 
   object Position
--- a/src/Pure/System/utf8.scala	Tue Dec 20 10:06:18 2016 +0100
+++ b/src/Pure/System/utf8.scala	Tue Dec 20 10:44:36 2016 +0100
@@ -21,6 +21,17 @@
 
   def bytes(s: String): Array[Byte] = s.getBytes(charset)
 
+  def bytes_length(s: String): Int =
+    (0 /: Codepoint.iterator(s)) {
+      case (n, i) =>
+        if (i < 0x80) n + 1
+        else if (i < 0x800) n + 2
+        else if (i < 0x10000) n + 3
+        else n + 4
+    }
+
+  object Length extends Line.Length { def apply(s: String): Int = bytes_length(s) }
+
 
   /* permissive UTF-8 decoding */