src/Pure/General/codepoint.scala
author Fabian Huch <huch@in.tum.de>
Thu, 18 Jul 2024 13:08:11 +0200
changeset 80574 90493e889dff
parent 75393 87ebf5a50283
child 81346 0cdd6729a962
permissions -rw-r--r--
clarified: more uniform;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
64610
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     1
/*  Title:      Pure/General/codepoint.scala
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     3
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     4
Unicode codepoints vs. Unicode string encoding.
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     5
*/
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     6
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     7
package isabelle
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     8
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
     9
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75196
diff changeset
    10
object Codepoint {
64615
fd0d6de380c6 more systematic text length;
wenzelm
parents: 64610
diff changeset
    11
  def string(c: Int): String = new String(Array(c), 0, 1)
fd0d6de380c6 more systematic text length;
wenzelm
parents: 64610
diff changeset
    12
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75196
diff changeset
    13
  private class Iterator_Offset[A](s: String, result: (Int, Text.Offset) => A) extends Iterator[A] {
71933
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    14
    var offset = 0
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    15
    def hasNext: Boolean = offset < s.length
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75196
diff changeset
    16
    def next(): A = {
71933
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    17
      val c = s.codePointAt(offset)
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    18
      val i = offset
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    19
      offset += Character.charCount(c)
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    20
      result(c, i)
64610
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
    21
    }
71933
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    22
  }
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    23
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    24
  def iterator_offset(s: String): Iterator[(Int, Text.Offset)] = new Iterator_Offset(s, (_, _))
aec0f7b58cc6 proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents: 65196
diff changeset
    25
  def iterator(s: String): Iterator[Int] = new Iterator_Offset(s, (c, _) => c)
64610
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
    26
64615
fd0d6de380c6 more systematic text length;
wenzelm
parents: 64610
diff changeset
    27
  def length(s: String): Int = iterator(s).length
64610
1b89608974e9 clarified modules;
wenzelm
parents:
diff changeset
    28
}