| author | wenzelm | 
| Tue, 11 Dec 2018 23:59:41 +0100 | |
| changeset 69450 | b28b001e7ee8 | 
| parent 69448 | 51e696887b81 | 
| child 69454 | ef051edd4d10 | 
| permissions | -rw-r--r-- | 
| 54439 | 1  | 
/* Title: Pure/General/bytes.scala  | 
2  | 
Author: Makarius  | 
|
3  | 
||
4  | 
Immutable byte vectors versus UTF8 strings.  | 
|
5  | 
*/  | 
|
6  | 
||
7  | 
package isabelle  | 
|
8  | 
||
9  | 
||
| 64004 | 10  | 
import java.io.{File => JFile, ByteArrayOutputStream, ByteArrayInputStream,
 | 
| 64229 | 11  | 
OutputStream, InputStream, FileInputStream, FileOutputStream}  | 
| 65070 | 12  | 
import java.net.URL  | 
| 68094 | 13  | 
import java.util.Base64  | 
| 64004 | 14  | 
|
15  | 
import org.tukaani.xz.{XZInputStream, XZOutputStream}
 | 
|
| 54440 | 16  | 
|
17  | 
||
| 54439 | 18  | 
object Bytes  | 
19  | 
{
 | 
|
20  | 
val empty: Bytes = new Bytes(Array[Byte](), 0, 0)  | 
|
21  | 
||
22  | 
def apply(s: CharSequence): Bytes =  | 
|
23  | 
  {
 | 
|
24  | 
val str = s.toString  | 
|
25  | 
if (str.isEmpty) empty  | 
|
26  | 
    else {
 | 
|
| 62527 | 27  | 
val b = UTF8.bytes(str)  | 
| 
54442
 
c39972ddd672
more specific Protocol_Output: empty message.body, main content via bytes/text;
 
wenzelm 
parents: 
54440 
diff
changeset
 | 
28  | 
new Bytes(b, 0, b.length)  | 
| 54439 | 29  | 
}  | 
30  | 
}  | 
|
| 54440 | 31  | 
|
| 63779 | 32  | 
def apply(a: Array[Byte]): Bytes = apply(a, 0, a.length)  | 
33  | 
||
| 
54442
 
c39972ddd672
more specific Protocol_Output: empty message.body, main content via bytes/text;
 
wenzelm 
parents: 
54440 
diff
changeset
 | 
34  | 
def apply(a: Array[Byte], offset: Int, length: Int): Bytes =  | 
| 
 
c39972ddd672
more specific Protocol_Output: empty message.body, main content via bytes/text;
 
wenzelm 
parents: 
54440 
diff
changeset
 | 
35  | 
if (length == 0) empty  | 
| 
 
c39972ddd672
more specific Protocol_Output: empty message.body, main content via bytes/text;
 
wenzelm 
parents: 
54440 
diff
changeset
 | 
36  | 
    else {
 | 
| 
 
c39972ddd672
more specific Protocol_Output: empty message.body, main content via bytes/text;
 
wenzelm 
parents: 
54440 
diff
changeset
 | 
37  | 
val b = new Array[Byte](length)  | 
| 55618 | 38  | 
System.arraycopy(a, offset, b, 0, length)  | 
| 
54442
 
c39972ddd672
more specific Protocol_Output: empty message.body, main content via bytes/text;
 
wenzelm 
parents: 
54440 
diff
changeset
 | 
39  | 
new Bytes(b, 0, b.length)  | 
| 
 
c39972ddd672
more specific Protocol_Output: empty message.body, main content via bytes/text;
 
wenzelm 
parents: 
54440 
diff
changeset
 | 
40  | 
}  | 
| 
 
c39972ddd672
more specific Protocol_Output: empty message.body, main content via bytes/text;
 
wenzelm 
parents: 
54440 
diff
changeset
 | 
41  | 
|
| 54440 | 42  | 
|
| 68108 | 43  | 
def base64(s: String): Bytes =  | 
44  | 
  {
 | 
|
45  | 
val a = Base64.getDecoder.decode(s)  | 
|
46  | 
new Bytes(a, 0, a.length)  | 
|
47  | 
}  | 
|
48  | 
||
| 68087 | 49  | 
|
| 54440 | 50  | 
/* read */  | 
51  | 
||
| 
64005
 
f6e965cf1617
clarified magic values (see also java/io/BufferedInputStream.java);
 
wenzelm 
parents: 
64004 
diff
changeset
 | 
52  | 
def read_stream(stream: InputStream, limit: Int = Integer.MAX_VALUE, hint: Int = 1024): Bytes =  | 
| 64004 | 53  | 
if (limit == 0) empty  | 
54  | 
    else {
 | 
|
| 
64005
 
f6e965cf1617
clarified magic values (see also java/io/BufferedInputStream.java);
 
wenzelm 
parents: 
64004 
diff
changeset
 | 
55  | 
val out = new ByteArrayOutputStream(if (limit == Integer.MAX_VALUE) hint else limit)  | 
| 
 
f6e965cf1617
clarified magic values (see also java/io/BufferedInputStream.java);
 
wenzelm 
parents: 
64004 
diff
changeset
 | 
56  | 
val buf = new Array[Byte](8192)  | 
| 64004 | 57  | 
var m = 0  | 
| 54440 | 58  | 
|
59  | 
      do {
 | 
|
| 64004 | 60  | 
m = stream.read(buf, 0, buf.size min (limit - out.size))  | 
61  | 
if (m != -1) out.write(buf, 0, m)  | 
|
62  | 
} while (m != -1 && limit > out.size)  | 
|
63  | 
||
64  | 
new Bytes(out.toByteArray, 0, out.size)  | 
|
| 54440 | 65  | 
}  | 
| 
64001
 
7ecb22be8f03
more general read_stream: return actual byte count;
 
wenzelm 
parents: 
63779 
diff
changeset
 | 
66  | 
|
| 
 
7ecb22be8f03
more general read_stream: return actual byte count;
 
wenzelm 
parents: 
63779 
diff
changeset
 | 
67  | 
def read(file: JFile): Bytes =  | 
| 64004 | 68  | 
using(new FileInputStream(file))(read_stream(_, file.length.toInt))  | 
| 64229 | 69  | 
|
70  | 
def read(path: Path): Bytes = read(path.file)  | 
|
71  | 
||
| 65070 | 72  | 
def read(url: URL): Bytes = using(url.openStream)(read_stream(_))  | 
73  | 
||
| 64229 | 74  | 
|
75  | 
/* write */  | 
|
76  | 
||
| 
69393
 
ed0824ef337e
static type for Library.using: avoid Java 11 warnings on "Illegal reflective access";
 
wenzelm 
parents: 
69365 
diff
changeset
 | 
77  | 
def write(file: JFile, bytes: Bytes): Unit =  | 
| 
 
ed0824ef337e
static type for Library.using: avoid Java 11 warnings on "Illegal reflective access";
 
wenzelm 
parents: 
69365 
diff
changeset
 | 
78  | 
using(new FileOutputStream(file))(bytes.write_stream(_))  | 
| 64229 | 79  | 
|
80  | 
def write(path: Path, bytes: Bytes): Unit = write(path.file, bytes)  | 
|
| 54439 | 81  | 
}  | 
82  | 
||
83  | 
final class Bytes private(  | 
|
84  | 
protected val bytes: Array[Byte],  | 
|
85  | 
protected val offset: Int,  | 
|
| 60833 | 86  | 
val length: Int) extends CharSequence  | 
| 54439 | 87  | 
{
 | 
88  | 
/* equality */  | 
|
89  | 
||
| 54440 | 90  | 
override def equals(that: Any): Boolean =  | 
91  | 
  {
 | 
|
92  | 
    that match {
 | 
|
93  | 
case other: Bytes =>  | 
|
94  | 
if (this eq other) true  | 
|
95  | 
else if (length != other.length) false  | 
|
96  | 
else (0 until length).forall(i => bytes(offset + i) == other.bytes(other.offset + i))  | 
|
97  | 
case _ => false  | 
|
98  | 
}  | 
|
99  | 
}  | 
|
100  | 
||
| 54439 | 101  | 
private lazy val hash: Int =  | 
102  | 
  {
 | 
|
103  | 
var h = 0  | 
|
104  | 
    for (i <- offset until offset + length) {
 | 
|
105  | 
val b = bytes(i).asInstanceOf[Int] & 0xFF  | 
|
106  | 
h = 31 * h + b  | 
|
107  | 
}  | 
|
108  | 
h  | 
|
109  | 
}  | 
|
110  | 
||
111  | 
override def hashCode(): Int = hash  | 
|
112  | 
||
113  | 
||
114  | 
/* content */  | 
|
115  | 
||
| 54512 | 116  | 
lazy val sha1_digest: SHA1.Digest = SHA1.digest(bytes)  | 
| 54440 | 117  | 
|
| 69448 | 118  | 
def is_empty: Boolean = length == 0  | 
119  | 
||
120  | 
def iterator: Iterator[Byte] =  | 
|
121  | 
for (i <- (offset until (offset + length)).iterator)  | 
|
122  | 
yield bytes(i)  | 
|
123  | 
||
| 
69365
 
c5b3860d29ef
avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray;
 
wenzelm 
parents: 
68167 
diff
changeset
 | 
124  | 
def array: Array[Byte] =  | 
| 
 
c5b3860d29ef
avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray;
 
wenzelm 
parents: 
68167 
diff
changeset
 | 
125  | 
  {
 | 
| 
 
c5b3860d29ef
avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray;
 
wenzelm 
parents: 
68167 
diff
changeset
 | 
126  | 
val a = new Array[Byte](length)  | 
| 
 
c5b3860d29ef
avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray;
 
wenzelm 
parents: 
68167 
diff
changeset
 | 
127  | 
System.arraycopy(bytes, offset, a, 0, length)  | 
| 
 
c5b3860d29ef
avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray;
 
wenzelm 
parents: 
68167 
diff
changeset
 | 
128  | 
a  | 
| 
 
c5b3860d29ef
avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray;
 
wenzelm 
parents: 
68167 
diff
changeset
 | 
129  | 
}  | 
| 
 
c5b3860d29ef
avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray;
 
wenzelm 
parents: 
68167 
diff
changeset
 | 
130  | 
|
| 
65279
 
fa62e095d8f1
clarified signature (again, see also 3ed43cfc8b14);
 
wenzelm 
parents: 
65070 
diff
changeset
 | 
131  | 
def text: String =  | 
| 
 
fa62e095d8f1
clarified signature (again, see also 3ed43cfc8b14);
 
wenzelm 
parents: 
65070 
diff
changeset
 | 
132  | 
UTF8.decode_chars(s => s, bytes, offset, offset + length).toString  | 
| 
 
fa62e095d8f1
clarified signature (again, see also 3ed43cfc8b14);
 
wenzelm 
parents: 
65070 
diff
changeset
 | 
133  | 
|
| 68094 | 134  | 
def base64: String =  | 
135  | 
  {
 | 
|
136  | 
val b =  | 
|
137  | 
if (offset == 0 && length == bytes.length) bytes  | 
|
138  | 
else Bytes(bytes, offset, length).bytes  | 
|
139  | 
Base64.getEncoder.encodeToString(b)  | 
|
140  | 
}  | 
|
141  | 
||
| 68106 | 142  | 
def maybe_base64: (Boolean, String) =  | 
143  | 
  {
 | 
|
144  | 
val s = text  | 
|
145  | 
if (this == Bytes(s)) (false, s) else (true, base64)  | 
|
146  | 
}  | 
|
147  | 
||
| 
68150
 
f0f34cbed539
clarified output: avoid costly operations on huge blobs;
 
wenzelm 
parents: 
68149 
diff
changeset
 | 
148  | 
  override def toString: String = "Bytes(" + length + ")"
 | 
| 54439 | 149  | 
|
| 54440 | 150  | 
def isEmpty: Boolean = length == 0  | 
| 54439 | 151  | 
|
| 65630 | 152  | 
def proper: Option[Bytes] = if (isEmpty) None else Some(this)  | 
153  | 
def proper_text: Option[String] = if (isEmpty) None else Some(text)  | 
|
154  | 
||
| 54439 | 155  | 
def +(other: Bytes): Bytes =  | 
| 54440 | 156  | 
if (other.isEmpty) this  | 
157  | 
else if (isEmpty) other  | 
|
| 54439 | 158  | 
    else {
 | 
159  | 
val new_bytes = new Array[Byte](length + other.length)  | 
|
| 55618 | 160  | 
System.arraycopy(bytes, offset, new_bytes, 0, length)  | 
161  | 
System.arraycopy(other.bytes, other.offset, new_bytes, length, other.length)  | 
|
| 54439 | 162  | 
new Bytes(new_bytes, 0, new_bytes.length)  | 
163  | 
}  | 
|
| 54440 | 164  | 
|
165  | 
||
| 60833 | 166  | 
/* CharSequence operations */  | 
167  | 
||
168  | 
def charAt(i: Int): Char =  | 
|
169  | 
if (0 <= i && i < length) (bytes(offset + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]  | 
|
170  | 
else throw new IndexOutOfBoundsException  | 
|
171  | 
||
172  | 
def subSequence(i: Int, j: Int): Bytes =  | 
|
173  | 
  {
 | 
|
174  | 
if (0 <= i && i <= j && j <= length) new Bytes(bytes, offset + i, j - i)  | 
|
175  | 
else throw new IndexOutOfBoundsException  | 
|
176  | 
}  | 
|
177  | 
||
| 69448 | 178  | 
def trim_line: Bytes =  | 
179  | 
if (length >= 2 && charAt(length - 2) == 13 && charAt(length - 1) == 10)  | 
|
180  | 
subSequence(0, length - 2)  | 
|
181  | 
else if (length >= 1 && (charAt(length - 1) == 13 || charAt(length - 1) == 10))  | 
|
182  | 
subSequence(0, length - 1)  | 
|
183  | 
else this  | 
|
184  | 
||
| 60833 | 185  | 
|
| 64004 | 186  | 
/* streams */  | 
187  | 
||
188  | 
def stream(): ByteArrayInputStream = new ByteArrayInputStream(bytes, offset, length)  | 
|
189  | 
||
190  | 
def write_stream(stream: OutputStream): Unit = stream.write(bytes, offset, length)  | 
|
191  | 
||
192  | 
||
193  | 
/* XZ data compression */  | 
|
| 54440 | 194  | 
|
| 68018 | 195  | 
def uncompress(cache: XZ.Cache = XZ.cache()): Bytes =  | 
196  | 
using(new XZInputStream(stream(), cache))(Bytes.read_stream(_, hint = length))  | 
|
| 64004 | 197  | 
|
| 68018 | 198  | 
def compress(options: XZ.Options = XZ.options(), cache: XZ.Cache = XZ.cache()): Bytes =  | 
| 64004 | 199  | 
  {
 | 
200  | 
val result = new ByteArrayOutputStream(length)  | 
|
| 68018 | 201  | 
using(new XZOutputStream(result, options, cache))(write_stream(_))  | 
| 64004 | 202  | 
new Bytes(result.toByteArray, 0, result.size)  | 
203  | 
}  | 
|
| 68167 | 204  | 
|
205  | 
def maybe_compress(options: XZ.Options = XZ.options(), cache: XZ.Cache = XZ.cache())  | 
|
206  | 
: (Boolean, Bytes) =  | 
|
207  | 
  {
 | 
|
208  | 
val compressed = compress(options = options, cache = cache)  | 
|
209  | 
if (compressed.length < length) (true, compressed) else (false, this)  | 
|
210  | 
}  | 
|
| 54439 | 211  | 
}  |