author | wenzelm |
Sat, 11 Dec 2021 11:24:48 +0100 | |
changeset 74913 | c2a2be496f35 |
parent 73337 | 0af9e7e4476f |
child 75196 | e894577e10d8 |
permissions | -rw-r--r-- |
64610 | 1 |
/* Title: Pure/General/codepoint.scala |
2 |
Author: Makarius |
|
3 |
||
4 |
Unicode codepoints vs. Unicode string encoding. |
|
5 |
*/ |
|
6 |
||
7 |
package isabelle |
|
8 |
||
71933
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
9 |
import isabelle.Text.Offset |
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
10 |
|
64610 | 11 |
|
12 |
object Codepoint |
|
13 |
{ |
|
64615 | 14 |
def string(c: Int): String = new String(Array(c), 0, 1) |
15 |
||
71933
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
16 |
private class Iterator_Offset[A](s: String, result: (Int, Text.Offset) => A) |
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
17 |
extends Iterator[A] |
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
18 |
{ |
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
19 |
var offset = 0 |
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
20 |
def hasNext: Boolean = offset < s.length |
73337 | 21 |
def next(): A = |
71933
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
22 |
{ |
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
23 |
val c = s.codePointAt(offset) |
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
24 |
val i = offset |
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
25 |
offset += Character.charCount(c) |
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
26 |
result(c, i) |
64610 | 27 |
} |
71933
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
28 |
} |
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
29 |
|
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
30 |
def iterator_offset(s: String): Iterator[(Int, Text.Offset)] = new Iterator_Offset(s, (_, _)) |
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
31 |
def iterator(s: String): Iterator[Int] = new Iterator_Offset(s, (c, _) => c) |
64610 | 32 |
|
64615 | 33 |
def length(s: String): Int = iterator(s).length |
64610 | 34 |
} |