--- a/src/Pure/General/codepoint.scala Tue Dec 20 10:06:18 2016 +0100
+++ b/src/Pure/General/codepoint.scala Tue Dec 20 10:44:36 2016 +0100
@@ -9,17 +9,20 @@
object Codepoint
{
- def iterator(str: String): Iterator[Int] =
+ def string(c: Int): String = new String(Array(c), 0, 1)
+
+ def iterator(s: String): Iterator[Int] =
new Iterator[Int] {
var offset = 0
- def hasNext: Boolean = offset < str.length
+ def hasNext: Boolean = offset < s.length
def next: Int =
{
- val c = str.codePointAt(offset)
+ val c = s.codePointAt(offset)
offset += Character.charCount(c)
c
}
}
- def string(c: Int): String = new String(Array(c), 0, 1)
+ def length(s: String): Int = iterator(s).length
+ object Length extends Line.Length { def apply(s: String): Int = Codepoint.length(s) }
}
--- a/src/Pure/General/symbol.scala Tue Dec 20 10:06:18 2016 +0100
+++ b/src/Pure/General/symbol.scala Tue Dec 20 10:44:36 2016 +0100
@@ -128,6 +128,9 @@
def explode(text: CharSequence): List[Symbol] = iterator(text).toList
+ def length(text: CharSequence): Int = iterator(text).length
+ object Length extends Line.Length { def apply(text: String): Int = length(text) }
+
/* decoding offsets */
--- a/src/Pure/PIDE/line.scala Tue Dec 20 10:06:18 2016 +0100
+++ b/src/Pure/PIDE/line.scala Tue Dec 20 10:44:36 2016 +0100
@@ -12,6 +12,12 @@
object Line
{
+ /* length wrt. encoding */
+
+ trait Length { def apply(text: String): Int }
+ object Length extends Length { def apply(text: String): Int = text.length }
+
+
/* position */
object Position
--- a/src/Pure/System/utf8.scala Tue Dec 20 10:06:18 2016 +0100
+++ b/src/Pure/System/utf8.scala Tue Dec 20 10:44:36 2016 +0100
@@ -21,6 +21,17 @@
def bytes(s: String): Array[Byte] = s.getBytes(charset)
+ def bytes_length(s: String): Int =
+ (0 /: Codepoint.iterator(s)) {
+ case (n, i) =>
+ if (i < 0x80) n + 1
+ else if (i < 0x800) n + 2
+ else if (i < 0x10000) n + 3
+ else n + 4
+ }
+
+ object Length extends Line.Length { def apply(s: String): Int = bytes_length(s) }
+
/* permissive UTF-8 decoding */