src/Pure/General/codepoint.scala
author wenzelm
Thu, 03 Mar 2022 15:12:38 +0100
changeset 75196 e894577e10d8
parent 73337 0af9e7e4476f
child 75393 87ebf5a50283
permissions -rw-r--r--
tuned imports;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
64610
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     1
/*  Title:      Pure/General/codepoint.scala
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     3
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     4
Unicode codepoints vs. Unicode string encoding.
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     5
*/
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     6
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     7
package isabelle
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     8
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     9
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
    10
object Codepoint
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
    11
{
64615
fd0d6de380c6 more systematic text length;
wenzelm
parents: 64610
diff changeset
    12
  def string(c: Int): String = new String(Array(c), 0, 1)
fd0d6de380c6 more systematic text length;
wenzelm
parents: 64610
diff changeset
    13
71933
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    14
  private class Iterator_Offset[A](s: String, result: (Int, Text.Offset) => A)
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    15
    extends Iterator[A]
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    16
  {
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    17
    var offset = 0
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    18
    def hasNext: Boolean = offset < s.length
73337
0af9e7e4476f tuned --- fewer warnings;
wenzelm
parents: 71933
diff changeset
    19
    def next(): A =
71933
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    20
    {
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    21
      val c = s.codePointAt(offset)
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    22
      val i = offset
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    23
      offset += Character.charCount(c)
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    24
      result(c, i)
64610
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
    25
    }
71933
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    26
  }
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    27
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    28
  def iterator_offset(s: String): Iterator[(Int, Text.Offset)] = new Iterator_Offset(s, (_, _))
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    29
  def iterator(s: String): Iterator[Int] = new Iterator_Offset(s, (c, _) => c)
64610
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
    30
64615
fd0d6de380c6 more systematic text length;
wenzelm
parents: 64610
diff changeset
    31
  def length(s: String): Int = iterator(s).length
64610
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
    32
}