src/Pure/General/codepoint.scala
author wenzelm
Sat, 11 Dec 2021 11:24:48 +0100
changeset 74913 c2a2be496f35
parent 73337 0af9e7e4476f
child 75196 e894577e10d8
permissions -rw-r--r--
tuned;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
64610
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     1
/*  Title:      Pure/General/codepoint.scala
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     3
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     4
Unicode codepoints vs. Unicode string encoding.
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     5
*/
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     6
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     7
package isabelle
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     8
71933
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
     9
import isabelle.Text.Offset
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    10
64610
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
    11
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
    12
object Codepoint
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
    13
{
64615
fd0d6de380c6 more systematic text length;
wenzelm
parents: 64610
diff changeset
    14
  def string(c: Int): String = new String(Array(c), 0, 1)
fd0d6de380c6 more systematic text length;
wenzelm
parents: 64610
diff changeset
    15
71933
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    16
  private class Iterator_Offset[A](s: String, result: (Int, Text.Offset) => A)
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    17
    extends Iterator[A]
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    18
  {
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    19
    var offset = 0
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    20
    def hasNext: Boolean = offset < s.length
73337
0af9e7e4476f tuned --- fewer warnings;
wenzelm
parents: 71933
diff changeset
    21
    def next(): A =
71933
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    22
    {
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    23
      val c = s.codePointAt(offset)
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    24
      val i = offset
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    25
      offset += Character.charCount(c)
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    26
      result(c, i)
64610
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
    27
    }
71933
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    28
  }
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    29
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    30
  def iterator_offset(s: String): Iterator[(Int, Text.Offset)] = new Iterator_Offset(s, (_, _))
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    31
  def iterator(s: String): Iterator[Int] = new Iterator_Offset(s, (c, _) => c)
64610
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
    32
64615
fd0d6de380c6 more systematic text length;
wenzelm
parents: 64610
diff changeset
    33
  def length(s: String): Int = iterator(s).length
64610
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
    34
}