src/Pure/General/utf8.scala
changeset 80492 43323d886ea3
parent 80490 dd2f5fb363a5
child 80494 d1240adc30ce
equal deleted inserted replaced
80491:b439582efc8a 80492:43323d886ea3
    21   /* permissive UTF-8 decoding */
    21   /* permissive UTF-8 decoding */
    22 
    22 
    23   // see also https://en.wikipedia.org/wiki/UTF-8#Description
    23   // see also https://en.wikipedia.org/wiki/UTF-8#Description
    24   // overlong encodings enable byte-stuffing of low-ASCII
    24   // overlong encodings enable byte-stuffing of low-ASCII
    25 
    25 
    26   def decode_permissive_bytes(bytes: Bytes.Vec): String = {
    26   def decode_permissive(bytes: Bytes): String = {
    27     val size = bytes.size
    27     val size = bytes.size
    28     val buf = new java.lang.StringBuilder((size min Space.GiB(1).bytes).toInt)
    28     val buf = new java.lang.StringBuilder((size min Space.GiB(1).bytes).toInt)
    29     var code = -1
    29     var code = -1
    30     var rest = 0
    30     var rest = 0
    31     def flush(): Unit = {
    31     def flush(): Unit = {
    58       else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3)
    58       else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3)
    59     }
    59     }
    60     flush()
    60     flush()
    61     buf.toString
    61     buf.toString
    62   }
    62   }
    63 
       
    64   def decode_permissive(text: String): String = {
       
    65     val relevant = {
       
    66       var i = 0
       
    67       val n = text.length
       
    68       var found = false
       
    69       while (i < n && !found) {
       
    70         if (text(i) >= 128) { found = true }
       
    71         i += 1
       
    72       }
       
    73       found
       
    74     }
       
    75     if (relevant) decode_permissive_bytes(new Bytes.Vec_String(text))
       
    76     else text
       
    77   }
       
    78 }
    63 }