src/Pure/System/utf8.scala
changeset 64615 fd0d6de380c6
parent 64370 865b39487b5d
child 64617 01e50039edc9
equal deleted inserted replaced
64614:88211daacf93 64615:fd0d6de380c6
    18   val charset_name: String = "UTF-8"
    18   val charset_name: String = "UTF-8"
    19   val charset: Charset = Charset.forName(charset_name)
    19   val charset: Charset = Charset.forName(charset_name)
    20   def codec(): Codec = Codec(charset)
    20   def codec(): Codec = Codec(charset)
    21 
    21 
    22   def bytes(s: String): Array[Byte] = s.getBytes(charset)
    22   def bytes(s: String): Array[Byte] = s.getBytes(charset)
       
    23 
       
    24   def bytes_length(s: String): Int =
       
    25     (0 /: Codepoint.iterator(s)) {
       
    26       case (n, i) =>
       
    27         if (i < 0x80) n + 1
       
    28         else if (i < 0x800) n + 2
       
    29         else if (i < 0x10000) n + 3
       
    30         else n + 4
       
    31     }
       
    32 
       
    33   object Length extends Line.Length { def apply(s: String): Int = bytes_length(s) }
    23 
    34 
    24 
    35 
    25   /* permissive UTF-8 decoding */
    36   /* permissive UTF-8 decoding */
    26 
    37 
    27   // see also http://en.wikipedia.org/wiki/UTF-8#Description
    38   // see also http://en.wikipedia.org/wiki/UTF-8#Description