clarified directories;
authorwenzelm
Wed Dec 21 11:21:46 2016 +0100 (2016-12-21)
changeset 64639bad5de3f9554
parent 64623 83f012ce2567
child 64640 f9470490e682
clarified directories;
src/Pure/General/utf8.scala
src/Pure/System/utf8.scala
src/Pure/build-jars
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/Pure/General/utf8.scala	Wed Dec 21 11:21:46 2016 +0100
     1.3 @@ -0,0 +1,99 @@
     1.4 +/*  Title:      Pure/General/utf8.scala
     1.5 +    Author:     Makarius
     1.6 +
     1.7 +Variations on UTF-8.
     1.8 +*/
     1.9 +
    1.10 +package isabelle
    1.11 +
    1.12 +
    1.13 +import java.nio.charset.Charset
    1.14 +import scala.io.Codec
    1.15 +
    1.16 +
    1.17 +object UTF8
    1.18 +{
    1.19 +  /* charset */
    1.20 +
    1.21 +  val charset_name: String = "UTF-8"
    1.22 +  val charset: Charset = Charset.forName(charset_name)
    1.23 +  def codec(): Codec = Codec(charset)
    1.24 +
    1.25 +  def bytes(s: String): Array[Byte] = s.getBytes(charset)
    1.26 +
    1.27 +  object Length extends Codepoint.Length
    1.28 +  {
    1.29 +    override def codepoint_length(c: Int): Int =
    1.30 +      if (c < 0x80) 1
    1.31 +      else if (c < 0x800) 2
    1.32 +      else if (c < 0x10000) 3
    1.33 +      else 4
    1.34 +  }
    1.35 +
    1.36 +
    1.37 +  /* permissive UTF-8 decoding */
    1.38 +
    1.39 +  // see also http://en.wikipedia.org/wiki/UTF-8#Description
    1.40 +  // overlong encodings enable byte-stuffing of low-ASCII
    1.41 +
    1.42 +  def decode_permissive(text: CharSequence): String =
    1.43 +  {
    1.44 +    val buf = new java.lang.StringBuilder(text.length)
    1.45 +    var code = -1
    1.46 +    var rest = 0
    1.47 +    def flush()
    1.48 +    {
    1.49 +      if (code != -1) {
    1.50 +        if (rest == 0 && Character.isValidCodePoint(code))
    1.51 +          buf.appendCodePoint(code)
    1.52 +        else buf.append('\uFFFD')
    1.53 +        code = -1
    1.54 +        rest = 0
    1.55 +      }
    1.56 +    }
    1.57 +    def init(x: Int, n: Int)
    1.58 +    {
    1.59 +      flush()
    1.60 +      code = x
    1.61 +      rest = n
    1.62 +    }
    1.63 +    def push(x: Int)
    1.64 +    {
    1.65 +      if (rest <= 0) init(x, -1)
    1.66 +      else {
    1.67 +        code <<= 6
    1.68 +        code += x
    1.69 +        rest -= 1
    1.70 +      }
    1.71 +    }
    1.72 +    for (i <- 0 until text.length) {
    1.73 +      val c = text.charAt(i)
    1.74 +      if (c < 128) { flush(); buf.append(c) }
    1.75 +      else if ((c & 0xC0) == 0x80) push(c & 0x3F)
    1.76 +      else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1)
    1.77 +      else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2)
    1.78 +      else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3)
    1.79 +    }
    1.80 +    flush()
    1.81 +    buf.toString
    1.82 +  }
    1.83 +
    1.84 +  private class Decode_Chars(decode: String => String,
    1.85 +    buffer: Array[Byte], start: Int, end: Int) extends CharSequence
    1.86 +  {
    1.87 +    def length: Int = end - start
    1.88 +    def charAt(i: Int): Char = (buffer(start + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
    1.89 +    def subSequence(i: Int, j: Int): CharSequence =
    1.90 +      new Decode_Chars(decode, buffer, start + i, start + j)
    1.91 +
    1.92 +    // toString with adhoc decoding: abuse of CharSequence interface
    1.93 +    override def toString: String = decode(decode_permissive(this))
    1.94 +  }
    1.95 +
    1.96 +  def decode_chars(decode: String => String,
    1.97 +    buffer: Array[Byte], start: Int, end: Int): CharSequence =
    1.98 +  {
    1.99 +    require(0 <= start && start <= end && end <= buffer.length)
   1.100 +    new Decode_Chars(decode, buffer, start, end)
   1.101 +  }
   1.102 +}
     2.1 --- a/src/Pure/System/utf8.scala	Tue Dec 20 22:32:04 2016 +0100
     2.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.3 @@ -1,99 +0,0 @@
     2.4 -/*  Title:      Pure/System/utf8.scala
     2.5 -    Author:     Makarius
     2.6 -
     2.7 -Variations on UTF-8.
     2.8 -*/
     2.9 -
    2.10 -package isabelle
    2.11 -
    2.12 -
    2.13 -import java.nio.charset.Charset
    2.14 -import scala.io.Codec
    2.15 -
    2.16 -
    2.17 -object UTF8
    2.18 -{
    2.19 -  /* charset */
    2.20 -
    2.21 -  val charset_name: String = "UTF-8"
    2.22 -  val charset: Charset = Charset.forName(charset_name)
    2.23 -  def codec(): Codec = Codec(charset)
    2.24 -
    2.25 -  def bytes(s: String): Array[Byte] = s.getBytes(charset)
    2.26 -
    2.27 -  object Length extends Codepoint.Length
    2.28 -  {
    2.29 -    override def codepoint_length(c: Int): Int =
    2.30 -      if (c < 0x80) 1
    2.31 -      else if (c < 0x800) 2
    2.32 -      else if (c < 0x10000) 3
    2.33 -      else 4
    2.34 -  }
    2.35 -
    2.36 -
    2.37 -  /* permissive UTF-8 decoding */
    2.38 -
    2.39 -  // see also http://en.wikipedia.org/wiki/UTF-8#Description
    2.40 -  // overlong encodings enable byte-stuffing of low-ASCII
    2.41 -
    2.42 -  def decode_permissive(text: CharSequence): String =
    2.43 -  {
    2.44 -    val buf = new java.lang.StringBuilder(text.length)
    2.45 -    var code = -1
    2.46 -    var rest = 0
    2.47 -    def flush()
    2.48 -    {
    2.49 -      if (code != -1) {
    2.50 -        if (rest == 0 && Character.isValidCodePoint(code))
    2.51 -          buf.appendCodePoint(code)
    2.52 -        else buf.append('\uFFFD')
    2.53 -        code = -1
    2.54 -        rest = 0
    2.55 -      }
    2.56 -    }
    2.57 -    def init(x: Int, n: Int)
    2.58 -    {
    2.59 -      flush()
    2.60 -      code = x
    2.61 -      rest = n
    2.62 -    }
    2.63 -    def push(x: Int)
    2.64 -    {
    2.65 -      if (rest <= 0) init(x, -1)
    2.66 -      else {
    2.67 -        code <<= 6
    2.68 -        code += x
    2.69 -        rest -= 1
    2.70 -      }
    2.71 -    }
    2.72 -    for (i <- 0 until text.length) {
    2.73 -      val c = text.charAt(i)
    2.74 -      if (c < 128) { flush(); buf.append(c) }
    2.75 -      else if ((c & 0xC0) == 0x80) push(c & 0x3F)
    2.76 -      else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1)
    2.77 -      else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2)
    2.78 -      else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3)
    2.79 -    }
    2.80 -    flush()
    2.81 -    buf.toString
    2.82 -  }
    2.83 -
    2.84 -  private class Decode_Chars(decode: String => String,
    2.85 -    buffer: Array[Byte], start: Int, end: Int) extends CharSequence
    2.86 -  {
    2.87 -    def length: Int = end - start
    2.88 -    def charAt(i: Int): Char = (buffer(start + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
    2.89 -    def subSequence(i: Int, j: Int): CharSequence =
    2.90 -      new Decode_Chars(decode, buffer, start + i, start + j)
    2.91 -
    2.92 -    // toString with adhoc decoding: abuse of CharSequence interface
    2.93 -    override def toString: String = decode(decode_permissive(this))
    2.94 -  }
    2.95 -
    2.96 -  def decode_chars(decode: String => String,
    2.97 -    buffer: Array[Byte], start: Int, end: Int): CharSequence =
    2.98 -  {
    2.99 -    require(0 <= start && start <= end && end <= buffer.length)
   2.100 -    new Decode_Chars(decode, buffer, start, end)
   2.101 -  }
   2.102 -}
     3.1 --- a/src/Pure/build-jars	Tue Dec 20 22:32:04 2016 +0100
     3.2 +++ b/src/Pure/build-jars	Wed Dec 21 11:21:46 2016 +0100
     3.3 @@ -70,6 +70,7 @@
     3.4    General/timing.scala
     3.5    General/untyped.scala
     3.6    General/url.scala
     3.7 +  General/utf8.scala
     3.8    General/value.scala
     3.9    General/word.scala
    3.10    General/xz.scala
    3.11 @@ -118,7 +119,6 @@
    3.12    System/process_result.scala
    3.13    System/progress.scala
    3.14    System/system_channel.scala
    3.15 -  System/utf8.scala
    3.16    Thy/html.scala
    3.17    Thy/present.scala
    3.18    Thy/sessions.scala