equal
deleted
inserted
replaced
21 /* permissive UTF-8 decoding */ |
21 /* permissive UTF-8 decoding */ |
22 |
22 |
23 // see also https://en.wikipedia.org/wiki/UTF-8#Description |
23 // see also https://en.wikipedia.org/wiki/UTF-8#Description |
24 // overlong encodings enable byte-stuffing of low-ASCII |
24 // overlong encodings enable byte-stuffing of low-ASCII |
25 |
25 |
26 def decode_permissive_bytes(bytes: Bytes.Vec): String = { |
26 def decode_permissive(bytes: Bytes): String = { |
27 val size = bytes.size |
27 val size = bytes.size |
28 val buf = new java.lang.StringBuilder((size min Space.GiB(1).bytes).toInt) |
28 val buf = new java.lang.StringBuilder((size min Space.GiB(1).bytes).toInt) |
29 var code = -1 |
29 var code = -1 |
30 var rest = 0 |
30 var rest = 0 |
31 def flush(): Unit = { |
31 def flush(): Unit = { |
58 else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3) |
58 else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3) |
59 } |
59 } |
60 flush() |
60 flush() |
61 buf.toString |
61 buf.toString |
62 } |
62 } |
63 |
|
64 def decode_permissive(text: String): String = { |
|
65 val relevant = { |
|
66 var i = 0 |
|
67 val n = text.length |
|
68 var found = false |
|
69 while (i < n && !found) { |
|
70 if (text(i) >= 128) { found = true } |
|
71 i += 1 |
|
72 } |
|
73 found |
|
74 } |
|
75 if (relevant) decode_permissive_bytes(new Bytes.Vec_String(text)) |
|
76 else text |
|
77 } |
|
78 } |
63 } |