author | wenzelm |
Wed, 21 Jun 2023 15:41:18 +0200 | |
changeset 78190 | 40ae1cd71133 |
parent 78187 | 2df0f3604a67 |
child 78196 | 140a6f2e3728 |
permissions | -rw-r--r-- |
78178 | 1 |
/* Title: Pure/Thy/store.scala |
2 |
Author: Makarius |
|
3 |
||
4 |
Persistent store for session content: within file-system and/or SQL database. |
|
5 |
*/ |
|
6 |
||
7 |
package isabelle |
|
8 |
||
9 |
||
10 |
import java.sql.SQLException |
|
11 |
||
12 |
||
13 |
object Store { |
|
14 |
def apply(options: Options, cache: Term.Cache = Term.Cache.make()): Store = |
|
15 |
new Store(options, cache) |
|
16 |
||
17 |
||
78179 | 18 |
/* session build info */ |
19 |
||
20 |
sealed case class Build_Info( |
|
21 |
sources: SHA1.Shasum, |
|
22 |
input_heaps: SHA1.Shasum, |
|
23 |
output_heap: SHA1.Shasum, |
|
24 |
return_code: Int, |
|
25 |
uuid: String |
|
26 |
) { |
|
27 |
def ok: Boolean = return_code == 0 |
|
28 |
} |
|
29 |
||
30 |
||
31 |
/* session sources */ |
|
78178 | 32 |
|
33 |
sealed case class Source_File( |
|
34 |
name: String, |
|
35 |
digest: SHA1.Digest, |
|
36 |
compressed: Boolean, |
|
37 |
body: Bytes, |
|
38 |
cache: Compress.Cache |
|
39 |
) { |
|
40 |
override def toString: String = name |
|
41 |
||
42 |
def bytes: Bytes = if (compressed) body.uncompress(cache = cache) else body |
|
43 |
} |
|
44 |
||
45 |
object Sources { |
|
46 |
def load(session_base: Sessions.Base, cache: Compress.Cache = Compress.Cache.none): Sources = |
|
47 |
new Sources( |
|
48 |
session_base.session_sources.foldLeft(Map.empty) { |
|
49 |
case (sources, (path, digest)) => |
|
50 |
def err(): Nothing = error("Incoherent digest for source file: " + path) |
|
51 |
val name = File.symbolic_path(path) |
|
52 |
sources.get(name) match { |
|
53 |
case Some(source_file) => |
|
54 |
if (source_file.digest == digest) sources else err() |
|
55 |
case None => |
|
56 |
val bytes = Bytes.read(path) |
|
57 |
if (bytes.sha1_digest == digest) { |
|
58 |
val (compressed, body) = |
|
59 |
bytes.maybe_compress(Compress.Options_Zstd(), cache = cache) |
|
60 |
val file = Source_File(name, digest, compressed, body, cache) |
|
61 |
sources + (name -> file) |
|
62 |
} |
|
63 |
else err() |
|
64 |
} |
|
65 |
}) |
|
66 |
} |
|
67 |
||
68 |
class Sources private(rep: Map[String, Source_File]) extends Iterable[Source_File] { |
|
69 |
override def toString: String = rep.values.toList.sortBy(_.name).mkString("Sources(", ", ", ")") |
|
70 |
override def iterator: Iterator[Source_File] = rep.valuesIterator |
|
71 |
||
72 |
def get(name: String): Option[Source_File] = rep.get(name) |
|
73 |
def apply(name: String): Source_File = |
|
74 |
get(name).getOrElse(error("Missing session sources entry " + quote(name))) |
|
75 |
} |
|
76 |
||
77 |
||
78179 | 78 |
/* SQL data model */ |
78178 | 79 |
|
78187
2df0f3604a67
clarified signature: more explicit class SQL.Data;
wenzelm
parents:
78186
diff
changeset
|
80 |
object Data extends SQL.Data() { |
2df0f3604a67
clarified signature: more explicit class SQL.Data;
wenzelm
parents:
78186
diff
changeset
|
81 |
override lazy val tables = |
2df0f3604a67
clarified signature: more explicit class SQL.Data;
wenzelm
parents:
78186
diff
changeset
|
82 |
SQL.Tables(Session_Info.table, Sources.table, Export.Data.table, Document_Build.Data.table) |
2df0f3604a67
clarified signature: more explicit class SQL.Data;
wenzelm
parents:
78186
diff
changeset
|
83 |
|
78179 | 84 |
object Session_Info { |
85 |
val session_name = SQL.Column.string("session_name").make_primary_key |
|
86 |
||
87 |
// Build_Log.Session_Info |
|
88 |
val session_timing = SQL.Column.bytes("session_timing") |
|
89 |
val command_timings = SQL.Column.bytes("command_timings") |
|
90 |
val theory_timings = SQL.Column.bytes("theory_timings") |
|
91 |
val ml_statistics = SQL.Column.bytes("ml_statistics") |
|
92 |
val task_statistics = SQL.Column.bytes("task_statistics") |
|
93 |
val errors = SQL.Column.bytes("errors") |
|
94 |
val build_log_columns = |
|
95 |
List(session_name, session_timing, command_timings, theory_timings, |
|
96 |
ml_statistics, task_statistics, errors) |
|
78178 | 97 |
|
78179 | 98 |
// Build_Info |
99 |
val sources = SQL.Column.string("sources") |
|
100 |
val input_heaps = SQL.Column.string("input_heaps") |
|
101 |
val output_heap = SQL.Column.string("output_heap") |
|
102 |
val return_code = SQL.Column.int("return_code") |
|
103 |
val uuid = SQL.Column.string("uuid") |
|
104 |
val build_columns = List(sources, input_heaps, output_heap, return_code, uuid) |
|
105 |
||
106 |
val table = SQL.Table("isabelle_session_info", build_log_columns ::: build_columns) |
|
78178 | 107 |
|
78179 | 108 |
val augment_table: PostgreSQL.Source = |
109 |
"ALTER TABLE IF EXISTS " + table.ident + |
|
110 |
" ADD COLUMN IF NOT EXISTS " + uuid.decl(SQL.sql_type_postgresql) |
|
111 |
} |
|
112 |
||
113 |
object Sources { |
|
114 |
val session_name = SQL.Column.string("session_name").make_primary_key |
|
115 |
val name = SQL.Column.string("name").make_primary_key |
|
116 |
val digest = SQL.Column.string("digest") |
|
117 |
val compressed = SQL.Column.bool("compressed") |
|
118 |
val body = SQL.Column.bytes("body") |
|
78178 | 119 |
|
78179 | 120 |
val table = |
121 |
SQL.Table("isabelle_sources", List(session_name, name, digest, compressed, body)) |
|
122 |
||
123 |
def where_equal(session_name: String, name: String = ""): SQL.Source = |
|
124 |
SQL.where_and( |
|
125 |
Sources.session_name.equal(session_name), |
|
126 |
if_proper(name, Sources.name.equal(name))) |
|
127 |
} |
|
128 |
||
78181 | 129 |
def write_session_info( |
130 |
db: SQL.Database, |
|
131 |
cache: Compress.Cache, |
|
132 |
session_name: String, |
|
133 |
build_log: Build_Log.Session_Info, |
|
134 |
build: Build_Info |
|
135 |
): Unit = { |
|
136 |
db.execute_statement(Store.Data.Session_Info.table.insert(), body = |
|
137 |
{ stmt => |
|
138 |
stmt.string(1) = session_name |
|
139 |
stmt.bytes(2) = Properties.encode(build_log.session_timing) |
|
140 |
stmt.bytes(3) = Properties.compress(build_log.command_timings, cache = cache) |
|
141 |
stmt.bytes(4) = Properties.compress(build_log.theory_timings, cache = cache) |
|
142 |
stmt.bytes(5) = Properties.compress(build_log.ml_statistics, cache = cache) |
|
143 |
stmt.bytes(6) = Properties.compress(build_log.task_statistics, cache = cache) |
|
144 |
stmt.bytes(7) = Build_Log.compress_errors(build_log.errors, cache = cache) |
|
145 |
stmt.string(8) = build.sources.toString |
|
146 |
stmt.string(9) = build.input_heaps.toString |
|
147 |
stmt.string(10) = build.output_heap.toString |
|
148 |
stmt.int(11) = build.return_code |
|
149 |
stmt.string(12) = build.uuid |
|
150 |
}) |
|
151 |
} |
|
152 |
||
78179 | 153 |
def write_sources(db: SQL.Database, session_name: String, sources: Sources): Unit = |
154 |
for (source_file <- sources) { |
|
155 |
db.execute_statement(Sources.table.insert(), body = |
|
156 |
{ stmt => |
|
157 |
stmt.string(1) = session_name |
|
158 |
stmt.string(2) = source_file.name |
|
159 |
stmt.string(3) = source_file.digest.toString |
|
160 |
stmt.bool(4) = source_file.compressed |
|
161 |
stmt.bytes(5) = source_file.body |
|
162 |
}) |
|
163 |
} |
|
78178 | 164 |
|
78179 | 165 |
def read_sources( |
166 |
db: SQL.Database, |
|
78180 | 167 |
cache: Compress.Cache, |
78179 | 168 |
session_name: String, |
169 |
name: String = "" |
|
170 |
): List[Source_File] = { |
|
171 |
db.execute_query_statement( |
|
172 |
Sources.table.select( |
|
173 |
sql = Sources.where_equal(session_name, name = name) + SQL.order_by(List(Sources.name))), |
|
174 |
List.from[Source_File], |
|
175 |
{ res => |
|
176 |
val res_name = res.string(Sources.name) |
|
177 |
val digest = SHA1.fake_digest(res.string(Sources.digest)) |
|
178 |
val compressed = res.bool(Sources.compressed) |
|
179 |
val body = res.bytes(Sources.body) |
|
78180 | 180 |
Source_File(res_name, digest, compressed, body, cache) |
78179 | 181 |
} |
182 |
) |
|
183 |
} |
|
78178 | 184 |
} |
185 |
} |
|
186 |
||
187 |
class Store private(val options: Options, val cache: Term.Cache) { |
|
188 |
store => |
|
189 |
||
190 |
override def toString: String = "Store(output_dir = " + output_dir.absolute + ")" |
|
191 |
||
192 |
||
193 |
/* directories */ |
|
194 |
||
195 |
val system_output_dir: Path = Path.explode("$ISABELLE_HEAPS_SYSTEM/$ML_IDENTIFIER") |
|
196 |
val user_output_dir: Path = Path.explode("$ISABELLE_HEAPS/$ML_IDENTIFIER") |
|
197 |
||
198 |
def system_heaps: Boolean = options.bool("system_heaps") |
|
199 |
||
200 |
val output_dir: Path = |
|
201 |
if (system_heaps) system_output_dir else user_output_dir |
|
202 |
||
203 |
val input_dirs: List[Path] = |
|
204 |
if (system_heaps) List(system_output_dir) |
|
205 |
else List(user_output_dir, system_output_dir) |
|
206 |
||
207 |
def presentation_dir: Path = |
|
208 |
if (system_heaps) Path.explode("$ISABELLE_BROWSER_INFO_SYSTEM") |
|
209 |
else Path.explode("$ISABELLE_BROWSER_INFO") |
|
210 |
||
211 |
||
212 |
/* file names */ |
|
213 |
||
214 |
def heap(name: String): Path = Path.basic(name) |
|
215 |
def database(name: String): Path = Path.basic("log") + Path.basic(name).db |
|
216 |
def log(name: String): Path = Path.basic("log") + Path.basic(name) |
|
217 |
def log_gz(name: String): Path = log(name).gz |
|
218 |
||
219 |
def output_heap(name: String): Path = output_dir + heap(name) |
|
220 |
def output_database(name: String): Path = output_dir + database(name) |
|
221 |
def output_log(name: String): Path = output_dir + log(name) |
|
222 |
def output_log_gz(name: String): Path = output_dir + log_gz(name) |
|
223 |
||
224 |
||
225 |
/* heap */ |
|
226 |
||
227 |
def find_heap(name: String): Option[Path] = |
|
228 |
input_dirs.map(_ + heap(name)).find(_.is_file) |
|
229 |
||
230 |
def find_heap_shasum(name: String): SHA1.Shasum = |
|
231 |
(for { |
|
232 |
path <- find_heap(name) |
|
78182 | 233 |
digest <- ML_Heap.read_file_digest(path) |
78178 | 234 |
} yield SHA1.shasum(digest, name)).getOrElse(SHA1.no_shasum) |
235 |
||
236 |
def the_heap(name: String): Path = |
|
237 |
find_heap(name) getOrElse |
|
238 |
error("Missing heap image for session " + quote(name) + " -- expected in:\n" + |
|
239 |
cat_lines(input_dirs.map(dir => " " + File.standard_path(dir)))) |
|
240 |
||
241 |
||
242 |
/* databases for build process and session content */ |
|
243 |
||
244 |
def find_database(name: String): Option[Path] = |
|
245 |
input_dirs.map(_ + database(name)).find(_.is_file) |
|
246 |
||
247 |
def build_database_server: Boolean = options.bool("build_database_server") |
|
248 |
def build_database_test: Boolean = options.bool("build_database_test") |
|
249 |
||
250 |
def open_database_server(): PostgreSQL.Database = |
|
251 |
PostgreSQL.open_database( |
|
252 |
user = options.string("build_database_user"), |
|
253 |
password = options.string("build_database_password"), |
|
254 |
database = options.string("build_database_name"), |
|
255 |
host = options.string("build_database_host"), |
|
256 |
port = options.int("build_database_port"), |
|
257 |
ssh = |
|
258 |
proper_string(options.string("build_database_ssh_host")).map(ssh_host => |
|
259 |
SSH.open_session(options, |
|
260 |
host = ssh_host, |
|
261 |
user = options.string("build_database_ssh_user"), |
|
262 |
port = options.int("build_database_ssh_port"))), |
|
263 |
ssh_close = true) |
|
264 |
||
265 |
def open_build_database(path: Path): SQL.Database = |
|
266 |
if (build_database_server) open_database_server() |
|
267 |
else SQLite.open_database(path, restrict = true) |
|
268 |
||
269 |
def maybe_open_build_database(path: Path): Option[SQL.Database] = |
|
78184 | 270 |
if (build_database_test) Some(open_build_database(path)) else None |
271 |
||
272 |
def maybe_open_heaps_database(): Option[SQL.Database] = |
|
273 |
if (build_database_test && build_database_server) Some(open_database_server()) else None |
|
78178 | 274 |
|
275 |
def try_open_database( |
|
276 |
name: String, |
|
277 |
output: Boolean = false, |
|
278 |
server: Boolean = build_database_server |
|
279 |
): Option[SQL.Database] = { |
|
280 |
def check(db: SQL.Database): Option[SQL.Database] = |
|
281 |
if (output || session_info_exists(db)) Some(db) else { db.close(); None } |
|
282 |
||
283 |
if (server) check(open_database_server()) |
|
284 |
else if (output) Some(SQLite.open_database(output_database(name))) |
|
285 |
else { |
|
286 |
(for { |
|
287 |
dir <- input_dirs.view |
|
288 |
path = dir + database(name) if path.is_file |
|
289 |
db <- check(SQLite.open_database(path)) |
|
290 |
} yield db).headOption |
|
291 |
} |
|
292 |
} |
|
293 |
||
294 |
def error_database(name: String): Nothing = |
|
295 |
error("Missing build database for session " + quote(name)) |
|
296 |
||
297 |
def open_database(name: String, output: Boolean = false): SQL.Database = |
|
298 |
try_open_database(name, output = output) getOrElse error_database(name) |
|
299 |
||
300 |
def prepare_output(): Unit = Isabelle_System.make_directory(output_dir + Path.basic("log")) |
|
301 |
||
78185 | 302 |
def clean_output(name: String, init: Boolean = false): Option[Boolean] = { |
78178 | 303 |
val relevant_db = |
304 |
build_database_server && |
|
305 |
using_option(try_open_database(name))(init_session_info(_, name)).getOrElse(false) |
|
306 |
||
307 |
val del = |
|
308 |
for { |
|
309 |
dir <- |
|
310 |
(if (system_heaps) List(user_output_dir, system_output_dir) else List(user_output_dir)) |
|
311 |
file <- List(heap(name), database(name), log(name), log_gz(name)) |
|
312 |
path = dir + file if path.is_file |
|
313 |
} yield path.file.delete |
|
314 |
||
78190 | 315 |
for (db <- maybe_open_heaps_database()) { |
316 |
using(db)(ML_Heap.clean_entry(_, name)) |
|
317 |
} |
|
318 |
||
78186 | 319 |
if (init) { |
78190 | 320 |
using(open_database(name, output = true))(init_session_info(_, name)) |
78186 | 321 |
} |
78178 | 322 |
|
78185 | 323 |
if (relevant_db || del.nonEmpty) Some(del.forall(identity)) else None |
78178 | 324 |
} |
325 |
||
326 |
def check_output( |
|
327 |
name: String, |
|
328 |
session_options: Options, |
|
329 |
sources_shasum: SHA1.Shasum, |
|
330 |
input_shasum: SHA1.Shasum, |
|
331 |
fresh_build: Boolean, |
|
332 |
store_heap: Boolean |
|
333 |
): (Boolean, SHA1.Shasum) = { |
|
334 |
try_open_database(name) match { |
|
335 |
case Some(db) => |
|
336 |
using(db)(read_build(_, name)) match { |
|
337 |
case Some(build) => |
|
338 |
val output_shasum = find_heap_shasum(name) |
|
339 |
val current = |
|
340 |
!fresh_build && |
|
341 |
build.ok && |
|
342 |
Sessions.eq_sources(session_options, build.sources, sources_shasum) && |
|
343 |
build.input_heaps == input_shasum && |
|
344 |
build.output_heap == output_shasum && |
|
345 |
!(store_heap && output_shasum.is_empty) |
|
346 |
(current, output_shasum) |
|
347 |
case None => (false, SHA1.no_shasum) |
|
348 |
} |
|
349 |
case None => (false, SHA1.no_shasum) |
|
350 |
} |
|
351 |
} |
|
352 |
||
353 |
||
354 |
/* SQL database content */ |
|
355 |
||
356 |
def read_bytes(db: SQL.Database, name: String, column: SQL.Column): Bytes = |
|
357 |
db.execute_query_statementO[Bytes]( |
|
78179 | 358 |
Store.Data.Session_Info.table.select(List(column), |
359 |
sql = Store.Data.Session_Info.session_name.where_equal(name)), |
|
78178 | 360 |
res => res.bytes(column) |
361 |
).getOrElse(Bytes.empty) |
|
362 |
||
363 |
def read_properties(db: SQL.Database, name: String, column: SQL.Column): List[Properties.T] = |
|
364 |
Properties.uncompress(read_bytes(db, name, column), cache = cache) |
|
365 |
||
366 |
||
367 |
/* session info */ |
|
368 |
||
78181 | 369 |
def session_info_exists(db: SQL.Database): Boolean = { |
370 |
val tables = db.tables |
|
78187
2df0f3604a67
clarified signature: more explicit class SQL.Data;
wenzelm
parents:
78186
diff
changeset
|
371 |
Store.Data.tables.forall(table => tables.contains(table.name)) |
78181 | 372 |
} |
373 |
||
374 |
def session_info_defined(db: SQL.Database, name: String): Boolean = |
|
375 |
db.execute_query_statementB( |
|
376 |
Store.Data.Session_Info.table.select(List(Store.Data.Session_Info.session_name), |
|
377 |
sql = Store.Data.Session_Info.session_name.where_equal(name))) |
|
378 |
||
78178 | 379 |
def init_session_info(db: SQL.Database, name: String): Boolean = |
78187
2df0f3604a67
clarified signature: more explicit class SQL.Data;
wenzelm
parents:
78186
diff
changeset
|
380 |
Store.Data.transaction_lock(db, create = true) { |
78178 | 381 |
val already_defined = session_info_defined(db, name) |
382 |
||
383 |
db.execute_statement( |
|
78179 | 384 |
Store.Data.Session_Info.table.delete( |
385 |
sql = Store.Data.Session_Info.session_name.where_equal(name))) |
|
386 |
if (db.is_postgresql) db.execute_statement(Store.Data.Session_Info.augment_table) |
|
78178 | 387 |
|
78179 | 388 |
db.execute_statement(Store.Data.Sources.table.delete( |
389 |
sql = Store.Data.Sources.where_equal(name))) |
|
78178 | 390 |
|
391 |
db.execute_statement( |
|
392 |
Export.Data.table.delete(sql = Export.Data.session_name.where_equal(name))) |
|
393 |
||
394 |
db.execute_statement( |
|
395 |
Document_Build.Data.table.delete(sql = Document_Build.Data.session_name.where_equal(name))) |
|
396 |
||
397 |
already_defined |
|
398 |
} |
|
399 |
||
400 |
def write_session_info( |
|
401 |
db: SQL.Database, |
|
402 |
session_name: String, |
|
403 |
sources: Store.Sources, |
|
404 |
build_log: Build_Log.Session_Info, |
|
405 |
build: Store.Build_Info |
|
406 |
): Unit = { |
|
78187
2df0f3604a67
clarified signature: more explicit class SQL.Data;
wenzelm
parents:
78186
diff
changeset
|
407 |
Store.Data.transaction_lock(db) { |
78179 | 408 |
Store.Data.write_sources(db, session_name, sources) |
78181 | 409 |
Store.Data.write_session_info(db, cache.compress, session_name, build_log, build) |
78178 | 410 |
} |
411 |
} |
|
412 |
||
413 |
def read_session_timing(db: SQL.Database, name: String): Properties.T = |
|
78179 | 414 |
Properties.decode(read_bytes(db, name, Store.Data.Session_Info.session_timing), cache = cache) |
78178 | 415 |
|
416 |
def read_command_timings(db: SQL.Database, name: String): Bytes = |
|
78179 | 417 |
read_bytes(db, name, Store.Data.Session_Info.command_timings) |
78178 | 418 |
|
419 |
def read_theory_timings(db: SQL.Database, name: String): List[Properties.T] = |
|
78179 | 420 |
read_properties(db, name, Store.Data.Session_Info.theory_timings) |
78178 | 421 |
|
422 |
def read_ml_statistics(db: SQL.Database, name: String): List[Properties.T] = |
|
78179 | 423 |
read_properties(db, name, Store.Data.Session_Info.ml_statistics) |
78178 | 424 |
|
425 |
def read_task_statistics(db: SQL.Database, name: String): List[Properties.T] = |
|
78179 | 426 |
read_properties(db, name, Store.Data.Session_Info.task_statistics) |
78178 | 427 |
|
428 |
def read_theories(db: SQL.Database, name: String): List[String] = |
|
429 |
read_theory_timings(db, name).flatMap(Markup.Name.unapply) |
|
430 |
||
431 |
def read_errors(db: SQL.Database, name: String): List[String] = |
|
78179 | 432 |
Build_Log.uncompress_errors(read_bytes(db, name, Store.Data.Session_Info.errors), cache = cache) |
78178 | 433 |
|
434 |
def read_build(db: SQL.Database, name: String): Option[Store.Build_Info] = { |
|
78179 | 435 |
if (db.tables.contains(Store.Data.Session_Info.table.name)) { |
78178 | 436 |
db.execute_query_statementO[Store.Build_Info]( |
78179 | 437 |
Store.Data.Session_Info.table.select( |
438 |
sql = Store.Data.Session_Info.session_name.where_equal(name)), |
|
78178 | 439 |
{ res => |
440 |
val uuid = |
|
78179 | 441 |
try { Option(res.string(Store.Data.Session_Info.uuid)).getOrElse("") } |
78178 | 442 |
catch { case _: SQLException => "" } |
443 |
Store.Build_Info( |
|
78179 | 444 |
SHA1.fake_shasum(res.string(Store.Data.Session_Info.sources)), |
445 |
SHA1.fake_shasum(res.string(Store.Data.Session_Info.input_heaps)), |
|
446 |
SHA1.fake_shasum(res.string(Store.Data.Session_Info.output_heap)), |
|
447 |
res.int(Store.Data.Session_Info.return_code), |
|
78178 | 448 |
uuid) |
449 |
} |
|
450 |
) |
|
451 |
} |
|
452 |
else None |
|
453 |
} |
|
454 |
||
78179 | 455 |
def read_sources(db: SQL.Database, session: String, name: String = ""): List[Store.Source_File] = |
78180 | 456 |
Store.Data.read_sources(db, cache.compress, session, name = name) |
78178 | 457 |
} |