64610
|
1 |
/* Title: Pure/General/codepoint.scala
|
|
2 |
Author: Makarius
|
|
3 |
|
|
4 |
Unicode codepoints vs. Unicode string encoding.
|
|
5 |
*/
|
|
6 |
|
|
7 |
package isabelle
|
|
8 |
|
|
9 |
|
|
10 |
object Codepoint
|
|
11 |
{
|
64615
|
12 |
def string(c: Int): String = new String(Array(c), 0, 1)
|
|
13 |
|
|
14 |
def iterator(s: String): Iterator[Int] =
|
64610
|
15 |
new Iterator[Int] {
|
|
16 |
var offset = 0
|
64615
|
17 |
def hasNext: Boolean = offset < s.length
|
64610
|
18 |
def next: Int =
|
|
19 |
{
|
64615
|
20 |
val c = s.codePointAt(offset)
|
64610
|
21 |
offset += Character.charCount(c)
|
|
22 |
c
|
|
23 |
}
|
|
24 |
}
|
|
25 |
|
64615
|
26 |
def length(s: String): Int = iterator(s).length
|
64617
|
27 |
|
64682
|
28 |
trait Length extends Text.Length
|
64617
|
29 |
{
|
|
30 |
protected def codepoint_length(c: Int): Int = 1
|
|
31 |
|
|
32 |
def apply(s: String): Int =
|
|
33 |
(0 /: iterator(s)) { case (n, c) => n + codepoint_length(c) }
|
|
34 |
|
|
35 |
def offset(s: String, i: Int): Option[Text.Offset] =
|
|
36 |
{
|
64682
|
37 |
if (i <= 0 || s.forall(_ < 0x80)) Text.Length.offset(s, i)
|
64617
|
38 |
else {
|
|
39 |
val length = s.length
|
|
40 |
var offset = 0
|
|
41 |
var j = 0
|
|
42 |
while (offset < length && j < i) {
|
|
43 |
val c = s.codePointAt(offset)
|
|
44 |
offset += Character.charCount(c)
|
|
45 |
j += codepoint_length(c)
|
|
46 |
}
|
|
47 |
if (j >= i) Some(offset) else None
|
|
48 |
}
|
|
49 |
}
|
|
50 |
}
|
|
51 |
|
|
52 |
object Length extends Length
|
64610
|
53 |
}
|