| author | wenzelm |
| Thu, 12 Nov 2020 11:46:53 +0100 | |
| changeset 72595 | c806eeb9138c |
| parent 71933 | aec0f7b58cc6 |
| child 73337 | 0af9e7e4476f |
| permissions | -rw-r--r-- |
| 64610 | 1 |
/* Title: Pure/General/codepoint.scala |
2 |
Author: Makarius |
|
3 |
||
4 |
Unicode codepoints vs. Unicode string encoding. |
|
5 |
*/ |
|
6 |
||
7 |
package isabelle |
|
8 |
||
|
71933
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
9 |
import isabelle.Text.Offset |
|
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
10 |
|
| 64610 | 11 |
|
12 |
object Codepoint |
|
13 |
{
|
|
| 64615 | 14 |
def string(c: Int): String = new String(Array(c), 0, 1) |
15 |
||
|
71933
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
16 |
private class Iterator_Offset[A](s: String, result: (Int, Text.Offset) => A) |
|
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
17 |
extends Iterator[A] |
|
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
18 |
{
|
|
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
19 |
var offset = 0 |
|
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
20 |
def hasNext: Boolean = offset < s.length |
|
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
21 |
def next: A = |
|
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
22 |
{
|
|
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
23 |
val c = s.codePointAt(offset) |
|
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
24 |
val i = offset |
|
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
25 |
offset += Character.charCount(c) |
|
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
26 |
result(c, i) |
| 64610 | 27 |
} |
|
71933
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
28 |
} |
|
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
29 |
|
|
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
30 |
def iterator_offset(s: String): Iterator[(Int, Text.Offset)] = new Iterator_Offset(s, (_, _)) |
|
aec0f7b58cc6
proper rendering of complex codepoints, e.g. \<^url> code: 0x01F310;
wenzelm
parents:
65196
diff
changeset
|
31 |
def iterator(s: String): Iterator[Int] = new Iterator_Offset(s, (c, _) => c) |
| 64610 | 32 |
|
| 64615 | 33 |
def length(s: String): Int = iterator(s).length |
| 64610 | 34 |
} |