| 
64610
 | 
     1  | 
/*  Title:      Pure/General/codepoint.scala
  | 
| 
 | 
     2  | 
    Author:     Makarius
  | 
| 
 | 
     3  | 
  | 
| 
 | 
     4  | 
Unicode codepoints vs. Unicode string encoding.
  | 
| 
 | 
     5  | 
*/
  | 
| 
 | 
     6  | 
  | 
| 
 | 
     7  | 
package isabelle
  | 
| 
 | 
     8  | 
  | 
| 
 | 
     9  | 
  | 
| 
 | 
    10  | 
object Codepoint
  | 
| 
 | 
    11  | 
{
 | 
| 
64615
 | 
    12  | 
  def string(c: Int): String = new String(Array(c), 0, 1)
  | 
| 
 | 
    13  | 
  | 
| 
 | 
    14  | 
  def iterator(s: String): Iterator[Int] =
  | 
| 
64610
 | 
    15  | 
    new Iterator[Int] {
 | 
| 
 | 
    16  | 
      var offset = 0
  | 
| 
64615
 | 
    17  | 
      def hasNext: Boolean = offset < s.length
  | 
| 
64610
 | 
    18  | 
      def next: Int =
  | 
| 
 | 
    19  | 
      {
 | 
| 
64615
 | 
    20  | 
        val c = s.codePointAt(offset)
  | 
| 
64610
 | 
    21  | 
        offset += Character.charCount(c)
  | 
| 
 | 
    22  | 
        c
  | 
| 
 | 
    23  | 
      }
  | 
| 
 | 
    24  | 
    }
  | 
| 
 | 
    25  | 
  | 
| 
64615
 | 
    26  | 
  def length(s: String): Int = iterator(s).length
  | 
| 
64617
 | 
    27  | 
  | 
| 
64682
 | 
    28  | 
  trait Length extends Text.Length
  | 
| 
64617
 | 
    29  | 
  {
 | 
| 
 | 
    30  | 
    protected def codepoint_length(c: Int): Int = 1
  | 
| 
 | 
    31  | 
  | 
| 
 | 
    32  | 
    def apply(s: String): Int =
  | 
| 
 | 
    33  | 
      (0 /: iterator(s)) { case (n, c) => n + codepoint_length(c) }
 | 
| 
 | 
    34  | 
  | 
| 
 | 
    35  | 
    def offset(s: String, i: Int): Option[Text.Offset] =
  | 
| 
 | 
    36  | 
    {
 | 
| 
64682
 | 
    37  | 
      if (i <= 0 || s.forall(_ < 0x80)) Text.Length.offset(s, i)
  | 
| 
64617
 | 
    38  | 
      else {
 | 
| 
 | 
    39  | 
        val length = s.length
  | 
| 
 | 
    40  | 
        var offset = 0
  | 
| 
 | 
    41  | 
        var j = 0
  | 
| 
 | 
    42  | 
        while (offset < length && j < i) {
 | 
| 
 | 
    43  | 
          val c = s.codePointAt(offset)
  | 
| 
 | 
    44  | 
          offset += Character.charCount(c)
  | 
| 
 | 
    45  | 
          j += codepoint_length(c)
  | 
| 
 | 
    46  | 
        }
  | 
| 
 | 
    47  | 
        if (j >= i) Some(offset) else None
  | 
| 
 | 
    48  | 
      }
  | 
| 
 | 
    49  | 
    }
  | 
| 
 | 
    50  | 
  }
  | 
| 
 | 
    51  | 
  | 
| 
 | 
    52  | 
  object Length extends Length
  | 
| 
64610
 | 
    53  | 
}
  |