author | Fabian Huch <huch@in.tum.de> |
Thu, 18 Jul 2024 13:08:11 +0200 | |
changeset 80574 | 90493e889dff |
parent 75393 | 87ebf5a50283 |
child 81346 | 0cdd6729a962 |
permissions | -rw-r--r-- |
64610 | 1 |
/* Title: Pure/General/codepoint.scala |
2 |
Author: Makarius |
|
3 |
||
4 |
Unicode codepoints vs. Unicode string encoding. |
|
5 |
*/ |
|
6 |
||
7 |
package isabelle |
|
8 |
||
9 |
||
75393 | 10 |
object Codepoint { |
64615 | 11 |
def string(c: Int): String = new String(Array(c), 0, 1) |
12 |
||
75393 | 13 |
private class Iterator_Offset[A](s: String, result: (Int, Text.Offset) => A) extends Iterator[A] { |
71933
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
14 |
var offset = 0 |
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
15 |
def hasNext: Boolean = offset < s.length |
75393 | 16 |
def next(): A = { |
71933
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
17 |
val c = s.codePointAt(offset) |
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
18 |
val i = offset |
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
19 |
offset += Character.charCount(c) |
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
20 |
result(c, i) |
64610 | 21 |
} |
71933
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
22 |
} |
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
23 |
|
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
24 |
def iterator_offset(s: String): Iterator[(Int, Text.Offset)] = new Iterator_Offset(s, (_, _)) |
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
25 |
def iterator(s: String): Iterator[Int] = new Iterator_Offset(s, (c, _) => c) |
64610 | 26 |
|
64615 | 27 |
def length(s: String): Int = iterator(s).length |
64610 | 28 |
} |