author | wenzelm |
Sun, 12 Nov 2023 12:26:08 +0100 | |
changeset 78956 | 12abaffb0346 |
parent 78953 | b6116a86d2ac |
child 78958 | c125f75a5144 |
permissions | -rw-r--r-- |
76991 | 1 |
/* Title: Pure/ML/ml_heap.scala |
2 |
Author: Makarius |
|
3 |
||
4 |
ML heap operations. |
|
5 |
*/ |
|
6 |
||
7 |
package isabelle |
|
8 |
||
9 |
||
10 |
import java.nio.ByteBuffer |
|
11 |
import java.nio.channels.FileChannel |
|
12 |
import java.nio.file.StandardOpenOption |
|
13 |
||
14 |
||
15 |
object ML_Heap { |
|
16 |
/** heap file with SHA1 digest **/ |
|
17 |
||
18 |
private val sha1_prefix = "SHA1:" |
|
19 |
||
78182 | 20 |
def read_file_digest(heap: Path): Option[SHA1.Digest] = { |
76991 | 21 |
if (heap.is_file) { |
77711
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
wenzelm
parents:
77206
diff
changeset
|
22 |
val l = sha1_prefix.length |
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
wenzelm
parents:
77206
diff
changeset
|
23 |
val m = l + SHA1.digest_length |
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
wenzelm
parents:
77206
diff
changeset
|
24 |
val n = heap.file.length |
78953 | 25 |
val bs = Bytes.read_file(heap, offset = n - m) |
77711
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
wenzelm
parents:
77206
diff
changeset
|
26 |
if (bs.length == m) { |
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
wenzelm
parents:
77206
diff
changeset
|
27 |
val s = bs.text |
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
wenzelm
parents:
77206
diff
changeset
|
28 |
if (s.startsWith(sha1_prefix)) Some(SHA1.fake_digest(s.substring(l))) |
76991 | 29 |
else None |
30 |
} |
|
77711
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
wenzelm
parents:
77206
diff
changeset
|
31 |
else None |
76991 | 32 |
} |
33 |
else None |
|
34 |
} |
|
35 |
||
78182 | 36 |
def write_file_digest(heap: Path): SHA1.Digest = |
37 |
read_file_digest(heap) getOrElse { |
|
77206 | 38 |
val digest = SHA1.digest(heap) |
39 |
File.append(heap, sha1_prefix + digest.toString) |
|
40 |
digest |
|
76991 | 41 |
} |
77720 | 42 |
|
43 |
||
44 |
/* SQL data model */ |
|
45 |
||
78396
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
46 |
object private_data extends SQL.Data("isabelle_heaps") { |
78187
2df0f3604a67
clarified signature: more explicit class SQL.Data;
wenzelm
parents:
78186
diff
changeset
|
47 |
override lazy val tables = SQL.Tables(Base.table, Slices.table) |
78183 | 48 |
|
49 |
object Generic { |
|
50 |
val name = SQL.Column.string("name").make_primary_key |
|
51 |
} |
|
52 |
||
53 |
object Base { |
|
54 |
val name = Generic.name |
|
55 |
val size = SQL.Column.long("size") |
|
56 |
val digest = SQL.Column.string("digest") |
|
57 |
||
78266 | 58 |
val table = make_table(List(name, size, digest)) |
78183 | 59 |
} |
60 |
||
61 |
object Slices { |
|
62 |
val name = Generic.name |
|
63 |
val slice = SQL.Column.int("slice").make_primary_key |
|
64 |
val content = SQL.Column.bytes("content") |
|
65 |
||
78266 | 66 |
val table = make_table(List(name, slice, content), name = "slices") |
78183 | 67 |
} |
68 |
||
78278 | 69 |
object Slices_Size { |
70 |
val name = Generic.name |
|
71 |
val slice = SQL.Column.int("slice").make_primary_key |
|
72 |
val size = SQL.Column.long("size") |
|
73 |
||
74 |
val table = make_table(List(name, slice, size), |
|
75 |
body = "SELECT name, slice, pg_size_pretty(length(content)::bigint) as size FROM " + |
|
76 |
Slices.table.ident, |
|
77 |
name = "slices_size") |
|
78 |
} |
|
79 |
||
78510
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
80 |
def get_entries(db: SQL.Database, names: Iterable[String]): Map[String, SHA1.Digest] = { |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
81 |
require(names.nonEmpty) |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
82 |
|
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
83 |
db.execute_query_statement( |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
84 |
Base.table.select(List(Base.name, Base.digest), |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
85 |
sql = Generic.name.where_member(names)), |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
86 |
List.from[(String, String)], |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
87 |
res => res.string(Base.name) -> res.string(Base.digest) |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
88 |
).flatMap({ |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
89 |
case (_, "") => None |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
90 |
case (name, digest) => Some(name -> SHA1.fake_digest(digest)) |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
91 |
}).toMap |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
92 |
} |
78183 | 93 |
|
78196
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
94 |
def read_entry(db: SQL.Database, name: String): List[Bytes] = |
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
95 |
db.execute_query_statement( |
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
96 |
Slices.table.select(List(Slices.content), |
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
97 |
sql = Generic.name.where_equal(name) + SQL.order_by(List(Slices.slice))), |
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
98 |
List.from[Bytes], _.bytes(Slices.content)) |
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
99 |
|
78183 | 100 |
def clean_entry(db: SQL.Database, name: String): Unit = { |
101 |
for (table <- List(Base.table, Slices.table)) { |
|
102 |
db.execute_statement(table.delete(sql = Base.name.where_equal(name))) |
|
103 |
} |
|
78278 | 104 |
db.create_view(Slices_Size.table) |
78183 | 105 |
} |
106 |
||
107 |
def prepare_entry(db: SQL.Database, name: String): Unit = |
|
108 |
db.execute_statement(Base.table.insert(), body = |
|
109 |
{ stmt => |
|
110 |
stmt.string(1) = name |
|
111 |
stmt.long(2) = None |
|
112 |
stmt.string(3) = None |
|
113 |
}) |
|
114 |
||
115 |
def write_entry(db: SQL.Database, name: String, slice: Int, content: Bytes): Unit = |
|
116 |
db.execute_statement(Slices.table.insert(), body = |
|
117 |
{ stmt => |
|
118 |
stmt.string(1) = name |
|
119 |
stmt.int(2) = slice |
|
120 |
stmt.bytes(3) = content |
|
121 |
}) |
|
122 |
||
123 |
def finish_entry(db: SQL.Database, name: String, size: Long, digest: SHA1.Digest): Unit = |
|
124 |
db.execute_statement( |
|
125 |
Base.table.update(List(Base.size, Base.digest), sql = Base.name.where_equal(name)), |
|
126 |
body = |
|
127 |
{ stmt => |
|
128 |
stmt.long(1) = size |
|
129 |
stmt.string(2) = digest.toString |
|
130 |
}) |
|
131 |
} |
|
132 |
||
78204 | 133 |
def clean_entry(db: SQL.Database, session_name: String): Unit = |
78396
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
134 |
private_data.transaction_lock(db, create = true, label = "ML_Heap.clean_entry") { |
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
135 |
private_data.clean_entry(db, session_name) |
78213
fd0430a7b7a4
avoid repeated open_database_server: synchronized transaction_lock;
wenzelm
parents:
78204
diff
changeset
|
136 |
} |
78186 | 137 |
|
78510
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
138 |
def get_entries(db: SQL.Database, names: Iterable[String]): Map[String, SHA1.Digest] = |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
139 |
private_data.transaction_lock(db, create = true, label = "ML_Heap.get_entries") { |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
140 |
private_data.get_entries(db, names) |
78213
fd0430a7b7a4
avoid repeated open_database_server: synchronized transaction_lock;
wenzelm
parents:
78204
diff
changeset
|
141 |
} |
78196
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
142 |
|
78191 | 143 |
def store( |
78183 | 144 |
database: Option[SQL.Database], |
78204 | 145 |
session_name: String, |
78183 | 146 |
heap: Path, |
78188 | 147 |
slice: Long, |
78193 | 148 |
cache: Compress.Cache = Compress.Cache.none |
78183 | 149 |
): SHA1.Digest = { |
150 |
val digest = write_file_digest(heap) |
|
151 |
database match { |
|
78204 | 152 |
case None => |
78183 | 153 |
case Some(db) => |
78956 | 154 |
val size = File.size(heap) - sha1_prefix.length - SHA1.digest_length |
78183 | 155 |
|
78188 | 156 |
val slices = (size.toDouble / slice.toDouble).ceil.toInt |
78183 | 157 |
val step = (size.toDouble / slices.toDouble).ceil.toLong |
158 |
||
159 |
try { |
|
78396
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
160 |
private_data.transaction_lock(db, create = true, label = "ML_Heap.store1") { |
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
161 |
private_data.prepare_entry(db, session_name) |
78213
fd0430a7b7a4
avoid repeated open_database_server: synchronized transaction_lock;
wenzelm
parents:
78204
diff
changeset
|
162 |
} |
78183 | 163 |
|
164 |
for (i <- 0 until slices) { |
|
165 |
val j = i + 1 |
|
166 |
val offset = step * i |
|
167 |
val limit = if (j < slices) step * j else size |
|
168 |
val content = |
|
78953 | 169 |
Bytes.read_file(heap, offset = offset, limit = limit) |
78183 | 170 |
.compress(cache = cache) |
78396
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
171 |
private_data.transaction_lock(db, label = "ML_Heap.store2") { |
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
172 |
private_data.write_entry(db, session_name, i, content) |
78213
fd0430a7b7a4
avoid repeated open_database_server: synchronized transaction_lock;
wenzelm
parents:
78204
diff
changeset
|
173 |
} |
78183 | 174 |
} |
175 |
||
78396
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
176 |
private_data.transaction_lock(db, label = "ML_Heap.store3") { |
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
177 |
private_data.finish_entry(db, session_name, size, digest) |
78213
fd0430a7b7a4
avoid repeated open_database_server: synchronized transaction_lock;
wenzelm
parents:
78204
diff
changeset
|
178 |
} |
78183 | 179 |
} |
180 |
catch { case exn: Throwable => |
|
78396
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
181 |
private_data.transaction_lock(db, create = true, label = "ML_Heap.store4") { |
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
182 |
private_data.clean_entry(db, session_name) |
78213
fd0430a7b7a4
avoid repeated open_database_server: synchronized transaction_lock;
wenzelm
parents:
78204
diff
changeset
|
183 |
} |
78183 | 184 |
throw exn |
185 |
} |
|
186 |
} |
|
187 |
digest |
|
188 |
} |
|
78196
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
189 |
|
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
190 |
def restore( |
78204 | 191 |
database: Option[SQL.Database], |
78510
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
192 |
heaps: List[Path], |
78196
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
193 |
cache: Compress.Cache = Compress.Cache.none |
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
194 |
): Unit = { |
78204 | 195 |
database match { |
78510
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
196 |
case Some(db) if heaps.nonEmpty => |
78396
7853d9072d1b
renamed object Data to private_data, to emphasize its intended scope (but it is publicly accessible in the database);
wenzelm
parents:
78369
diff
changeset
|
197 |
private_data.transaction_lock(db, create = true, label = "ML_Heap.restore") { |
78510
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
198 |
val db_digests = private_data.get_entries(db, heaps.map(_.file_name)) |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
199 |
for (heap <- heaps) { |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
200 |
val session_name = heap.file_name |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
201 |
val file_digest = read_file_digest(heap) |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
202 |
val db_digest = db_digests.get(session_name) |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
203 |
if (db_digest.isDefined && db_digest != file_digest) { |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
204 |
val base_dir = Isabelle_System.make_directory(heap.expand.dir) |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
205 |
Isabelle_System.with_tmp_file(session_name + "_", base_dir = base_dir.file) { tmp => |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
206 |
Bytes.write(tmp, Bytes.empty) |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
207 |
for (slice <- private_data.read_entry(db, session_name)) { |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
208 |
Bytes.append(tmp, slice.uncompress(cache = cache)) |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
209 |
} |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
210 |
val digest = write_file_digest(tmp) |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
211 |
if (db_digest.get == digest) { |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
212 |
Isabelle_System.chmod("a+r", tmp) |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
213 |
Isabelle_System.move_file(tmp, heap) |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
214 |
} |
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
215 |
else error("Incoherent content for session heap " + quote(session_name)) |
78204 | 216 |
} |
78509
146468e05dd4
more robust: atomic file-system result via tmp file;
wenzelm
parents:
78396
diff
changeset
|
217 |
} |
78196
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
218 |
} |
78204 | 219 |
} |
78510
8f45302a9ff0
more thorough ML_Heap.restore: include ancestors; prefer simultaneous ML_Heap.get_entries: just one database access for heap hierarchy;
wenzelm
parents:
78509
diff
changeset
|
220 |
case _ => |
78196
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
221 |
} |
140a6f2e3728
restore heaps from database, which takes precedence over file-system;
wenzelm
parents:
78193
diff
changeset
|
222 |
} |
76991 | 223 |
} |