src/Pure/General/codepoint.scala
author haftmann
Mon, 06 Feb 2017 20:56:34 +0100
changeset 64990 c6a7de505796
parent 64682 7e119f32276a
child 65196 e8760a98db78
permissions -rw-r--r--
more explicit errors in pathological cases

/*  Title:      Pure/General/codepoint.scala
    Author:     Makarius

Unicode codepoints vs. Unicode string encoding.
*/

package isabelle


object Codepoint
{
  def string(c: Int): String = new String(Array(c), 0, 1)

  def iterator(s: String): Iterator[Int] =
    new Iterator[Int] {
      var offset = 0
      def hasNext: Boolean = offset < s.length
      def next: Int =
      {
        val c = s.codePointAt(offset)
        offset += Character.charCount(c)
        c
      }
    }

  def length(s: String): Int = iterator(s).length

  trait Length extends Text.Length
  {
    protected def codepoint_length(c: Int): Int = 1

    def apply(s: String): Int =
      (0 /: iterator(s)) { case (n, c) => n + codepoint_length(c) }

    def offset(s: String, i: Int): Option[Text.Offset] =
    {
      if (i <= 0 || s.forall(_ < 0x80)) Text.Length.offset(s, i)
      else {
        val length = s.length
        var offset = 0
        var j = 0
        while (offset < length && j < i) {
          val c = s.codePointAt(offset)
          offset += Character.charCount(c)
          j += codepoint_length(c)
        }
        if (j >= i) Some(offset) else None
      }
    }
  }

  object Length extends Length
}