author | wenzelm |
Tue, 18 Jul 2023 19:51:12 +0200 | |
changeset 78396 | 7853d9072d1b |
parent 78369 | ba71ea02d965 |
child 78509 | 146468e05dd4 |
permissions | -rw-r--r-- |
76991 | 1 |
/* Title: Pure/ML/ml_heap.scala |
2 |
Author: Makarius |
|
3 |
||
4 |
ML heap operations. |
|
5 |
*/ |
|
6 |
||
7 |
package isabelle |
|
8 |
||
9 |
||
10 |
import java.nio.ByteBuffer |
|
11 |
import java.nio.channels.FileChannel |
|
12 |
import java.nio.file.StandardOpenOption |
|
13 |
||
14 |
||
15 |
object ML_Heap { |
|
16 |
/** heap file with SHA1 digest **/ |
|
17 |
||
18 |
private val sha1_prefix = "SHA1:" |
|
19 |
||
78182 | 20 |
def read_file_digest(heap: Path): Option[SHA1.Digest] = { |
76991 | 21 |
if (heap.is_file) { |
77711
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
wenzelm
parents:
77206
diff
changeset
|
22 |
val l = sha1_prefix.length |
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
wenzelm
parents:
77206
diff
changeset
|
23 |
val m = l + SHA1.digest_length |
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
wenzelm
parents:
77206
diff
changeset
|
24 |
val n = heap.file.length |
77718 | 25 |
val bs = Bytes.read_file(heap.file, offset = n - m) |
77711
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
wenzelm
parents:
77206
diff
changeset
|
26 |
if (bs.length == m) { |
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
wenzelm
parents:
77206
diff
changeset
|
27 |
val s = bs.text |
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
wenzelm
parents:
77206
diff
changeset
|
28 |
if (s.startsWith(sha1_prefix)) Some(SHA1.fake_digest(s.substring(l))) |
76991 | 29 |
else None |
30 |
} |
|
77711
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
wenzelm
parents:
77206
diff
changeset
|
31 |
else None |
76991 | 32 |
} |
33 |
else None |
|
34 |
} |
|
35 |
||
78182 | 36 |
def write_file_digest(heap: Path): SHA1.Digest = |
37 |
read_file_digest(heap) getOrElse { |
|
77206 | 38 |
val digest = SHA1.digest(heap) |
39 |
File.append(heap, sha1_prefix + digest.toString) |
|
40 |
digest |
|
76991 | 41 |
} |
77720 | 42 |
|
43 |
||
44 |
/* SQL data model */ |
|
45 |
||
78396
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
46 |
object private_data extends SQL.Data("isabelle_heaps") { |
78187
2df0f3604a67
clarified signature: more explicit class SQL.Data;
wenzelm
parents:
78186
diff
changeset
|
47 |
override lazy val tables = SQL.Tables(Base.table, Slices.table) |
78183 | 48 |
|
49 |
object Generic { |
|
50 |
val name = SQL.Column.string("name").make_primary_key |
|
51 |
} |
|
52 |
||
53 |
object Base { |
|
54 |
val name = Generic.name |
|
55 |
val size = SQL.Column.long("size") |
|
56 |
val digest = SQL.Column.string("digest") |
|
57 |
||
78266 | 58 |
val table = make_table(List(name, size, digest)) |
78183 | 59 |
} |
60 |
||
61 |
object Slices { |
|
62 |
val name = Generic.name |
|
63 |
val slice = SQL.Column.int("slice").make_primary_key |
|
64 |
val content = SQL.Column.bytes("content") |
|
65 |
||
78266 | 66 |
val table = make_table(List(name, slice, content), name = "slices") |
78183 | 67 |
} |
68 |
||
78278 | 69 |
object Slices_Size { |
70 |
val name = Generic.name |
|
71 |
val slice = SQL.Column.int("slice").make_primary_key |
|
72 |
val size = SQL.Column.long("size") |
|
73 |
||
74 |
val table = make_table(List(name, slice, size), |
|
75 |
body = "SELECT name, slice, pg_size_pretty(length(content)::bigint) as size FROM " + |
|
76 |
Slices.table.ident, |
|
77 |
name = "slices_size") |
|
78 |
} |
|
79 |
||
78196
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
80 |
def get_entry(db: SQL.Database, name: String): Option[SHA1.Digest] = |
78183 | 81 |
db.execute_query_statementO[String]( |
82 |
Base.table.select(List(Base.digest), sql = Generic.name.where_equal(name)), |
|
83 |
_.string(Base.digest) |
|
84 |
).flatMap(proper_string).map(SHA1.fake_digest) |
|
85 |
||
78196
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
86 |
def read_entry(db: SQL.Database, name: String): List[Bytes] = |
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
87 |
db.execute_query_statement( |
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
88 |
Slices.table.select(List(Slices.content), |
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
89 |
sql = Generic.name.where_equal(name) + SQL.order_by(List(Slices.slice))), |
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
90 |
List.from[Bytes], _.bytes(Slices.content)) |
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
91 |
|
78183 | 92 |
def clean_entry(db: SQL.Database, name: String): Unit = { |
93 |
for (table <- List(Base.table, Slices.table)) { |
|
94 |
db.execute_statement(table.delete(sql = Base.name.where_equal(name))) |
|
95 |
} |
|
78278 | 96 |
db.create_view(Slices_Size.table) |
78183 | 97 |
} |
98 |
||
99 |
def prepare_entry(db: SQL.Database, name: String): Unit = |
|
100 |
db.execute_statement(Base.table.insert(), body = |
|
101 |
{ stmt => |
|
102 |
stmt.string(1) = name |
|
103 |
stmt.long(2) = None |
|
104 |
stmt.string(3) = None |
|
105 |
}) |
|
106 |
||
107 |
def write_entry(db: SQL.Database, name: String, slice: Int, content: Bytes): Unit = |
|
108 |
db.execute_statement(Slices.table.insert(), body = |
|
109 |
{ stmt => |
|
110 |
stmt.string(1) = name |
|
111 |
stmt.int(2) = slice |
|
112 |
stmt.bytes(3) = content |
|
113 |
}) |
|
114 |
||
115 |
def finish_entry(db: SQL.Database, name: String, size: Long, digest: SHA1.Digest): Unit = |
|
116 |
db.execute_statement( |
|
117 |
Base.table.update(List(Base.size, Base.digest), sql = Base.name.where_equal(name)), |
|
118 |
body = |
|
119 |
{ stmt => |
|
120 |
stmt.long(1) = size |
|
121 |
stmt.string(2) = digest.toString |
|
122 |
}) |
|
123 |
} |
|
124 |
||
78204 | 125 |
def clean_entry(db: SQL.Database, session_name: String): Unit = |
78396
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
126 |
private_data.transaction_lock(db, create = true, label = "ML_Heap.clean_entry") { |
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
127 |
private_data.clean_entry(db, session_name) |
78213
fd0430a7b7a4
avoid repeated open_database_server: synchronized transaction_lock;
wenzelm
parents:
78204
diff
changeset
|
128 |
} |
78186 | 129 |
|
78204 | 130 |
def get_entry(db: SQL.Database, session_name: String): Option[SHA1.Digest] = |
78396
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
131 |
private_data.transaction_lock(db, create = true, label = "ML_Heap.get_entry") { |
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
132 |
private_data.get_entry(db, session_name) |
78213
fd0430a7b7a4
avoid repeated open_database_server: synchronized transaction_lock;
wenzelm
parents:
78204
diff
changeset
|
133 |
} |
78196
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
134 |
|
78191 | 135 |
def store( |
78183 | 136 |
database: Option[SQL.Database], |
78204 | 137 |
session_name: String, |
78183 | 138 |
heap: Path, |
78188 | 139 |
slice: Long, |
78193 | 140 |
cache: Compress.Cache = Compress.Cache.none |
78183 | 141 |
): SHA1.Digest = { |
142 |
val digest = write_file_digest(heap) |
|
143 |
database match { |
|
78204 | 144 |
case None => |
78183 | 145 |
case Some(db) => |
146 |
val size = File.space(heap).bytes - sha1_prefix.length - SHA1.digest_length |
|
147 |
||
78188 | 148 |
val slices = (size.toDouble / slice.toDouble).ceil.toInt |
78183 | 149 |
val step = (size.toDouble / slices.toDouble).ceil.toLong |
150 |
||
151 |
try { |
|
78396
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
152 |
private_data.transaction_lock(db, create = true, label = "ML_Heap.store1") { |
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
153 |
private_data.prepare_entry(db, session_name) |
78213
fd0430a7b7a4
avoid repeated open_database_server: synchronized transaction_lock;
wenzelm
parents:
78204
diff
changeset
|
154 |
} |
78183 | 155 |
|
156 |
for (i <- 0 until slices) { |
|
157 |
val j = i + 1 |
|
158 |
val offset = step * i |
|
159 |
val limit = if (j < slices) step * j else size |
|
160 |
val content = |
|
161 |
Bytes.read_file(heap.file, offset = offset, limit = limit) |
|
162 |
.compress(cache = cache) |
|
78396
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
163 |
private_data.transaction_lock(db, label = "ML_Heap.store2") { |
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
164 |
private_data.write_entry(db, session_name, i, content) |
78213
fd0430a7b7a4
avoid repeated open_database_server: synchronized transaction_lock;
wenzelm
parents:
78204
diff
changeset
|
165 |
} |
78183 | 166 |
} |
167 |
||
78396
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
168 |
private_data.transaction_lock(db, label = "ML_Heap.store3") { |
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
169 |
private_data.finish_entry(db, session_name, size, digest) |
78213
fd0430a7b7a4
avoid repeated open_database_server: synchronized transaction_lock;
wenzelm
parents:
78204
diff
changeset
|
170 |
} |
78183 | 171 |
} |
172 |
catch { case exn: Throwable => |
|
78396
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
173 |
private_data.transaction_lock(db, create = true, label = "ML_Heap.store4") { |
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
174 |
private_data.clean_entry(db, session_name) |
78213
fd0430a7b7a4
avoid repeated open_database_server: synchronized transaction_lock;
wenzelm
parents:
78204
diff
changeset
|
175 |
} |
78183 | 176 |
throw exn |
177 |
} |
|
178 |
} |
|
179 |
digest |
|
180 |
} |
|
78196
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
181 |
|
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
182 |
def restore( |
78204 | 183 |
database: Option[SQL.Database], |
184 |
session_name: String, |
|
78196
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
185 |
heap: Path, |
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
186 |
cache: Compress.Cache = Compress.Cache.none |
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
187 |
): Unit = { |
78204 | 188 |
database match { |
189 |
case None => |
|
190 |
case Some(db) => |
|
78396
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
191 |
private_data.transaction_lock(db, create = true, label = "ML_Heap.restore") { |
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
192 |
val db_digest = private_data.get_entry(db, session_name) |
78204 | 193 |
val file_digest = read_file_digest(heap) |
78196
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
194 |
|
78204 | 195 |
if (db_digest.isDefined && db_digest != file_digest) { |
196 |
Isabelle_System.make_directory(heap.expand.dir) |
|
197 |
Bytes.write(heap, Bytes.empty) |
|
78396
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
198 |
for (slice <- private_data.read_entry(db, session_name)) { |
78204 | 199 |
Bytes.append(heap, slice.uncompress(cache = cache)) |
200 |
} |
|
201 |
val digest = write_file_digest(heap) |
|
202 |
if (db_digest.get != digest) error("Incoherent content for file " + heap) |
|
78196
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
203 |
} |
78204 | 204 |
} |
78196
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
205 |
} |
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
206 |
} |
76991 | 207 |
} |