equal
deleted
inserted
replaced
18 val charset_name: String = "UTF-8" |
18 val charset_name: String = "UTF-8" |
19 val charset: Charset = Charset.forName(charset_name) |
19 val charset: Charset = Charset.forName(charset_name) |
20 def codec(): Codec = Codec(charset) |
20 def codec(): Codec = Codec(charset) |
21 |
21 |
22 def bytes(s: String): Array[Byte] = s.getBytes(charset) |
22 def bytes(s: String): Array[Byte] = s.getBytes(charset) |
|
23 |
|
24 def bytes_length(s: String): Int = |
|
25 (0 /: Codepoint.iterator(s)) { |
|
26 case (n, i) => |
|
27 if (i < 0x80) n + 1 |
|
28 else if (i < 0x800) n + 2 |
|
29 else if (i < 0x10000) n + 3 |
|
30 else n + 4 |
|
31 } |
|
32 |
|
33 object Length extends Line.Length { def apply(s: String): Int = bytes_length(s) } |
23 |
34 |
24 |
35 |
25 /* permissive UTF-8 decoding */ |
36 /* permissive UTF-8 decoding */ |
26 |
37 |
27 // see also http://en.wikipedia.org/wiki/UTF-8#Description |
38 // see also http://en.wikipedia.org/wiki/UTF-8#Description |