src/Pure/General/bytes.scala
author wenzelm
Fri May 11 20:05:37 2018 +0200 (13 months ago)
changeset 68150 f0f34cbed539
parent 68149 9a4a6adb95b5
child 68167 327bb0f5f768
permissions -rw-r--r--
clarified output: avoid costly operations on huge blobs;
     1 /*  Title:      Pure/General/bytes.scala
     2     Author:     Makarius
     3 
     4 Immutable byte vectors versus UTF8 strings.
     5 */
     6 
     7 package isabelle
     8 
     9 
    10 import java.io.{File => JFile, ByteArrayOutputStream, ByteArrayInputStream,
    11   OutputStream, InputStream, FileInputStream, FileOutputStream}
    12 import java.net.URL
    13 import java.util.Base64
    14 
    15 import org.tukaani.xz.{XZInputStream, XZOutputStream}
    16 
    17 
    18 object Bytes
    19 {
    20   val empty: Bytes = new Bytes(Array[Byte](), 0, 0)
    21 
    22   def apply(s: CharSequence): Bytes =
    23   {
    24     val str = s.toString
    25     if (str.isEmpty) empty
    26     else {
    27       val b = UTF8.bytes(str)
    28       new Bytes(b, 0, b.length)
    29     }
    30   }
    31 
    32   def apply(a: Array[Byte]): Bytes = apply(a, 0, a.length)
    33 
    34   def apply(a: Array[Byte], offset: Int, length: Int): Bytes =
    35     if (length == 0) empty
    36     else {
    37       val b = new Array[Byte](length)
    38       System.arraycopy(a, offset, b, 0, length)
    39       new Bytes(b, 0, b.length)
    40     }
    41 
    42 
    43   def base64(s: String): Bytes =
    44   {
    45     val a = Base64.getDecoder.decode(s)
    46     new Bytes(a, 0, a.length)
    47   }
    48 
    49 
    50   /* read */
    51 
    52   def read_stream(stream: InputStream, limit: Int = Integer.MAX_VALUE, hint: Int = 1024): Bytes =
    53     if (limit == 0) empty
    54     else {
    55       val out = new ByteArrayOutputStream(if (limit == Integer.MAX_VALUE) hint else limit)
    56       val buf = new Array[Byte](8192)
    57       var m = 0
    58 
    59       do {
    60         m = stream.read(buf, 0, buf.size min (limit - out.size))
    61         if (m != -1) out.write(buf, 0, m)
    62       } while (m != -1 && limit > out.size)
    63 
    64       new Bytes(out.toByteArray, 0, out.size)
    65     }
    66 
    67   def read_block(stream: InputStream, length: Int): Option[Bytes] =
    68   {
    69     val bytes = read_stream(stream, limit = length)
    70     if (bytes.length == length) Some(bytes) else None
    71   }
    72 
    73   def read_line(stream: InputStream): Option[Bytes] =
    74   {
    75     val out = new ByteArrayOutputStream(100)
    76     var c = 0
    77     while ({ c = stream.read; c != -1 && c != 10 }) out.write(c)
    78 
    79     if (c == -1 && out.size == 0) None
    80     else {
    81       val a = out.toByteArray
    82       val n = a.length
    83       val b = if (n > 0 && a(n - 1) == 13) a.take(n - 1) else a
    84       Some(new Bytes(b, 0, b.length))
    85     }
    86   }
    87 
    88   def read(file: JFile): Bytes =
    89     using(new FileInputStream(file))(read_stream(_, file.length.toInt))
    90 
    91   def read(path: Path): Bytes = read(path.file)
    92 
    93   def read(url: URL): Bytes = using(url.openStream)(read_stream(_))
    94 
    95 
    96   /* write */
    97 
    98   def write(file: JFile, bytes: Bytes)
    99   {
   100     val stream = new FileOutputStream(file)
   101     try { bytes.write_stream(stream) } finally { stream.close }
   102   }
   103 
   104   def write(path: Path, bytes: Bytes): Unit = write(path.file, bytes)
   105 }
   106 
   107 final class Bytes private(
   108   protected val bytes: Array[Byte],
   109   protected val offset: Int,
   110   val length: Int) extends CharSequence
   111 {
   112   /* equality */
   113 
   114   override def equals(that: Any): Boolean =
   115   {
   116     that match {
   117       case other: Bytes =>
   118         if (this eq other) true
   119         else if (length != other.length) false
   120         else (0 until length).forall(i => bytes(offset + i) == other.bytes(other.offset + i))
   121       case _ => false
   122     }
   123   }
   124 
   125   private lazy val hash: Int =
   126   {
   127     var h = 0
   128     for (i <- offset until offset + length) {
   129       val b = bytes(i).asInstanceOf[Int] & 0xFF
   130       h = 31 * h + b
   131     }
   132     h
   133   }
   134 
   135   override def hashCode(): Int = hash
   136 
   137 
   138   /* content */
   139 
   140   lazy val sha1_digest: SHA1.Digest = SHA1.digest(bytes)
   141 
   142   def text: String =
   143     UTF8.decode_chars(s => s, bytes, offset, offset + length).toString
   144 
   145   def base64: String =
   146   {
   147     val b =
   148       if (offset == 0 && length == bytes.length) bytes
   149       else Bytes(bytes, offset, length).bytes
   150     Base64.getEncoder.encodeToString(b)
   151   }
   152 
   153   def maybe_base64: (Boolean, String) =
   154   {
   155     val s = text
   156     if (this == Bytes(s)) (false, s) else (true, base64)
   157   }
   158 
   159   override def toString: String = "Bytes(" + length + ")"
   160 
   161   def isEmpty: Boolean = length == 0
   162 
   163   def proper: Option[Bytes] = if (isEmpty) None else Some(this)
   164   def proper_text: Option[String] = if (isEmpty) None else Some(text)
   165 
   166   def +(other: Bytes): Bytes =
   167     if (other.isEmpty) this
   168     else if (isEmpty) other
   169     else {
   170       val new_bytes = new Array[Byte](length + other.length)
   171       System.arraycopy(bytes, offset, new_bytes, 0, length)
   172       System.arraycopy(other.bytes, other.offset, new_bytes, length, other.length)
   173       new Bytes(new_bytes, 0, new_bytes.length)
   174     }
   175 
   176 
   177   /* CharSequence operations */
   178 
   179   def charAt(i: Int): Char =
   180     if (0 <= i && i < length) (bytes(offset + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
   181     else throw new IndexOutOfBoundsException
   182 
   183   def subSequence(i: Int, j: Int): Bytes =
   184   {
   185     if (0 <= i && i <= j && j <= length) new Bytes(bytes, offset + i, j - i)
   186     else throw new IndexOutOfBoundsException
   187   }
   188 
   189 
   190   /* streams */
   191 
   192   def stream(): ByteArrayInputStream = new ByteArrayInputStream(bytes, offset, length)
   193 
   194   def write_stream(stream: OutputStream): Unit = stream.write(bytes, offset, length)
   195 
   196 
   197   /* XZ data compression */
   198 
   199   def uncompress(cache: XZ.Cache = XZ.cache()): Bytes =
   200     using(new XZInputStream(stream(), cache))(Bytes.read_stream(_, hint = length))
   201 
   202   def compress(options: XZ.Options = XZ.options(), cache: XZ.Cache = XZ.cache()): Bytes =
   203   {
   204     val result = new ByteArrayOutputStream(length)
   205     using(new XZOutputStream(result, options, cache))(write_stream(_))
   206     new Bytes(result.toByteArray, 0, result.size)
   207   }
   208 }