equal
deleted
inserted
replaced
34 buf.toString |
34 buf.toString |
35 } |
35 } |
36 } |
36 } |
37 |
37 |
38 |
38 |
|
39 /* permissive UTF-8 decoding */ |
|
40 |
|
41 // see also http://en.wikipedia.org/wiki/UTF-8#Description |
|
42 def decode_permissive_utf8(text: CharSequence): String = |
|
43 { |
|
44 val buf = new java.lang.StringBuilder(text.length) |
|
45 var code = -1 |
|
46 var rest = 0 |
|
47 def flush() |
|
48 { |
|
49 if (code != -1) { |
|
50 if (rest == 0 && Character.isValidCodePoint(code)) |
|
51 buf.appendCodePoint(code) |
|
52 else buf.append('\uFFFD') |
|
53 code = -1 |
|
54 rest = 0 |
|
55 } |
|
56 } |
|
57 def init(x: Int, n: Int) |
|
58 { |
|
59 flush() |
|
60 code = x |
|
61 rest = n |
|
62 } |
|
63 def push(x: Int) |
|
64 { |
|
65 if (rest <= 0) init(x, -1) |
|
66 else { |
|
67 code <<= 6 |
|
68 code += x |
|
69 rest -= 1 |
|
70 } |
|
71 } |
|
72 for (i <- 0 until text.length) { |
|
73 val c = text.charAt(i) |
|
74 if (c < 128) { flush(); buf.append(c) } |
|
75 else if ((c & 0xC0) == 0x80) push(c & 0x3F) |
|
76 else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1) |
|
77 else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2) |
|
78 else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3) |
|
79 } |
|
80 flush() |
|
81 buf.toString |
|
82 } |
|
83 |
|
84 |
39 /* timing */ |
85 /* timing */ |
40 |
86 |
41 def timeit[A](e: => A) = |
87 def timeit[A](e: => A) = |
42 { |
88 { |
43 val start = System.currentTimeMillis() |
89 val start = System.currentTimeMillis() |