more systematic text length;
authorwenzelm
Tue Dec 20 10:44:36 2016 +0100 (2016-12-20)
changeset 64615fd0d6de380c6
parent 64614 88211daacf93
child 64616 dc3ec40fe41b
more systematic text length;
src/Pure/General/codepoint.scala
src/Pure/General/symbol.scala
src/Pure/PIDE/line.scala
src/Pure/System/utf8.scala
     1.1 --- a/src/Pure/General/codepoint.scala	Tue Dec 20 10:06:18 2016 +0100
     1.2 +++ b/src/Pure/General/codepoint.scala	Tue Dec 20 10:44:36 2016 +0100
     1.3 @@ -9,17 +9,20 @@
     1.4  
     1.5  object Codepoint
     1.6  {
     1.7 -  def iterator(str: String): Iterator[Int] =
     1.8 +  def string(c: Int): String = new String(Array(c), 0, 1)
     1.9 +
    1.10 +  def iterator(s: String): Iterator[Int] =
    1.11      new Iterator[Int] {
    1.12        var offset = 0
    1.13 -      def hasNext: Boolean = offset < str.length
    1.14 +      def hasNext: Boolean = offset < s.length
    1.15        def next: Int =
    1.16        {
    1.17 -        val c = str.codePointAt(offset)
    1.18 +        val c = s.codePointAt(offset)
    1.19          offset += Character.charCount(c)
    1.20          c
    1.21        }
    1.22      }
    1.23  
    1.24 -  def string(c: Int): String = new String(Array(c), 0, 1)
    1.25 +  def length(s: String): Int = iterator(s).length
    1.26 +  object Length extends Line.Length { def apply(s: String): Int = Codepoint.length(s) }
    1.27  }
     2.1 --- a/src/Pure/General/symbol.scala	Tue Dec 20 10:06:18 2016 +0100
     2.2 +++ b/src/Pure/General/symbol.scala	Tue Dec 20 10:44:36 2016 +0100
     2.3 @@ -128,6 +128,9 @@
     2.4  
     2.5    def explode(text: CharSequence): List[Symbol] = iterator(text).toList
     2.6  
     2.7 +  def length(text: CharSequence): Int = iterator(text).length
     2.8 +  object Length extends Line.Length { def apply(text: String): Int = length(text) }
     2.9 +
    2.10  
    2.11    /* decoding offsets */
    2.12  
     3.1 --- a/src/Pure/PIDE/line.scala	Tue Dec 20 10:06:18 2016 +0100
     3.2 +++ b/src/Pure/PIDE/line.scala	Tue Dec 20 10:44:36 2016 +0100
     3.3 @@ -12,6 +12,12 @@
     3.4  
     3.5  object Line
     3.6  {
     3.7 +  /* length wrt. encoding */
     3.8 +
     3.9 +  trait Length { def apply(text: String): Int }
    3.10 +  object Length extends Length { def apply(text: String): Int = text.length }
    3.11 +
    3.12 +
    3.13    /* position */
    3.14  
    3.15    object Position
     4.1 --- a/src/Pure/System/utf8.scala	Tue Dec 20 10:06:18 2016 +0100
     4.2 +++ b/src/Pure/System/utf8.scala	Tue Dec 20 10:44:36 2016 +0100
     4.3 @@ -21,6 +21,17 @@
     4.4  
     4.5    def bytes(s: String): Array[Byte] = s.getBytes(charset)
     4.6  
     4.7 +  def bytes_length(s: String): Int =
     4.8 +    (0 /: Codepoint.iterator(s)) {
     4.9 +      case (n, i) =>
    4.10 +        if (i < 0x80) n + 1
    4.11 +        else if (i < 0x800) n + 2
    4.12 +        else if (i < 0x10000) n + 3
    4.13 +        else n + 4
    4.14 +    }
    4.15 +
    4.16 +  object Length extends Line.Length { def apply(s: String): Int = bytes_length(s) }
    4.17 +
    4.18  
    4.19    /* permissive UTF-8 decoding */
    4.20