src/Pure/General/file_store.scala
author wenzelm
Wed, 12 Mar 2025 11:39:00 +0100
changeset 82265 4b875a4c83b0
parent 82162 5ecd0209c0a8
permissions -rw-r--r--
update for release;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
82148
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
     1
/*  Title:      Pure/General/file_store.scala
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
     3
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
     4
Persistent store for file-system content, backed by SQL database
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
     5
(notably SQLite).
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
     6
*/
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
     7
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
     8
package isabelle
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
     9
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    10
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    11
import java.io.{File => JFile}
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    12
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    13
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    14
object File_Store {
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    15
  /* main operations */
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    16
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    17
  def make_database(database: Path, dir: Path,
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    18
    pred: JFile => Boolean = _ => true,
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    19
    name_prefix: String = "",
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    20
    compress_options: Compress.Options = Compress.Options(),
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    21
    compress_cache: Compress.Cache = Compress.Cache.none
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    22
  ): Unit = {
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    23
    database.file.delete()
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    24
    using(SQLite.open_database(database)) { db =>
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    25
      private_data.transaction_lock(db, create = true) {
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    26
        val root = dir.canonical_file
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    27
        val root_path = File.path(root)
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    28
        for {
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    29
          file <- File.find_files(root, pred = pred)
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    30
          rel_path <- File.relative_path(root_path, File.path(file))
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    31
        } {
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    32
          val entry =
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    33
            Entry.read_file(rel_path, name_prefix = name_prefix, dir = root_path,
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    34
              compress_options = compress_options, compress_cache = compress_cache)
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    35
          private_data.write_entry(db, entry)
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    36
        }
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    37
      }
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    38
    }
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    39
  }
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    40
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    41
  def database_entry(database: Path, name: String): Option[Entry] =
82149
18709ffb8137 tuned signature: more explicit operations;
wenzelm
parents: 82148
diff changeset
    42
    if (database.is_file) {
82148
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    43
      using(SQLite.open_database(database)) { db =>
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    44
        private_data.transaction_lock(db) {
82149
18709ffb8137 tuned signature: more explicit operations;
wenzelm
parents: 82148
diff changeset
    45
          if (private_data.tables_ok(db)) private_data.read_entry(db, name)
18709ffb8137 tuned signature: more explicit operations;
wenzelm
parents: 82148
diff changeset
    46
          else None
82148
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    47
        }
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    48
      }
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    49
    }
82149
18709ffb8137 tuned signature: more explicit operations;
wenzelm
parents: 82148
diff changeset
    50
    else None
82148
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    51
82151
a42414afe05f more operations;
wenzelm
parents: 82150
diff changeset
    52
  def database_extract(database: Path, dir: Path,
a42414afe05f more operations;
wenzelm
parents: 82150
diff changeset
    53
    compress_cache: Compress.Cache = Compress.Cache.none
a42414afe05f more operations;
wenzelm
parents: 82150
diff changeset
    54
  ): Unit = {
a42414afe05f more operations;
wenzelm
parents: 82150
diff changeset
    55
    if (database.is_file) {
a42414afe05f more operations;
wenzelm
parents: 82150
diff changeset
    56
      using(SQLite.open_database(database)) { db =>
a42414afe05f more operations;
wenzelm
parents: 82150
diff changeset
    57
        private_data.transaction_lock(db) {
a42414afe05f more operations;
wenzelm
parents: 82150
diff changeset
    58
          if (private_data.tables_ok(db)) {
a42414afe05f more operations;
wenzelm
parents: 82150
diff changeset
    59
            for {
a42414afe05f more operations;
wenzelm
parents: 82150
diff changeset
    60
              name <- private_data.read_names(db)
a42414afe05f more operations;
wenzelm
parents: 82150
diff changeset
    61
              entry <- private_data.read_entry(db, name)
a42414afe05f more operations;
wenzelm
parents: 82150
diff changeset
    62
            } entry.write_file(dir, compress_cache = compress_cache)
a42414afe05f more operations;
wenzelm
parents: 82150
diff changeset
    63
          }
a42414afe05f more operations;
wenzelm
parents: 82150
diff changeset
    64
        }
a42414afe05f more operations;
wenzelm
parents: 82150
diff changeset
    65
      }
a42414afe05f more operations;
wenzelm
parents: 82150
diff changeset
    66
    }
a42414afe05f more operations;
wenzelm
parents: 82150
diff changeset
    67
  }
a42414afe05f more operations;
wenzelm
parents: 82150
diff changeset
    68
82148
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    69
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    70
  /* entry */
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    71
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    72
  object Entry {
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    73
    def read_file(path: Path,
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    74
      name_prefix: String = "",
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    75
      dir: Path = Path.current,
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    76
      compress_options: Compress.Options = Compress.Options(),
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    77
      compress_cache: Compress.Cache = Compress.Cache.none
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    78
    ): Entry = {
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    79
      val name = Url.append_path(name_prefix, path.expand.implode)
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    80
      val bs = Bytes.read(dir + path)
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    81
      val size = bs.size
82162
5ecd0209c0a8 clarified persistent data: digest may help to synchronize files, without requiring read / uncompress;
wenzelm
parents: 82161
diff changeset
    82
      val digest = SHA1.digest(bs).toString
82148
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    83
      val executable = File.is_executable(dir + path)
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    84
      val (compressed, body) = bs.maybe_compress(options = compress_options, cache = compress_cache)
82162
5ecd0209c0a8 clarified persistent data: digest may help to synchronize files, without requiring read / uncompress;
wenzelm
parents: 82161
diff changeset
    85
      Entry(name, size, digest, executable, compressed, body)
82148
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    86
    }
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    87
  }
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    88
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    89
  sealed case class Entry(
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    90
    name: String,
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    91
    size: Long,
82162
5ecd0209c0a8 clarified persistent data: digest may help to synchronize files, without requiring read / uncompress;
wenzelm
parents: 82161
diff changeset
    92
    digest: String,
82148
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    93
    executable: Boolean,
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    94
    compressed: Boolean,
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    95
    body: Bytes
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    96
  ) {
82161
843c2205b076 proper treatment of empty content, which is never compressed;
wenzelm
parents: 82151
diff changeset
    97
    require(name.nonEmpty && size >= 0 && (size > 0 || !compressed))
82148
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
    98
82150
2eb2aa0375fb tuned signature;
wenzelm
parents: 82149
diff changeset
    99
    def content(compress_cache: Compress.Cache = Compress.Cache.none): Bytes =
2eb2aa0375fb tuned signature;
wenzelm
parents: 82149
diff changeset
   100
      if (compressed) body.uncompress(cache = compress_cache) else body
82148
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   101
82150
2eb2aa0375fb tuned signature;
wenzelm
parents: 82149
diff changeset
   102
    def write_file(dir: Path, compress_cache: Compress.Cache = Compress.Cache.none): Unit = {
82148
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   103
      val path = Path.explode(name)
82150
2eb2aa0375fb tuned signature;
wenzelm
parents: 82149
diff changeset
   104
      File.content(path, content(compress_cache = compress_cache)).write(dir)
82148
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   105
      if (executable) File.set_executable(dir + path)
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   106
    }
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   107
  }
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   108
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   109
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   110
  /* SQL data model */
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   111
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   112
  object private_data extends SQL.Data() {
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   113
    override lazy val tables: SQL.Tables = SQL.Tables(Base.table)
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   114
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   115
    object Base {
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   116
      val name = SQL.Column.string("name").make_primary_key
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   117
      val size = SQL.Column.long("size")
82162
5ecd0209c0a8 clarified persistent data: digest may help to synchronize files, without requiring read / uncompress;
wenzelm
parents: 82161
diff changeset
   118
      val digest = SQL.Column.string("digest")
82148
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   119
      val executable = SQL.Column.bool("executable")
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   120
      val compressed = SQL.Column.bool("compressed")
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   121
      val body = SQL.Column.bytes("body")
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   122
82162
5ecd0209c0a8 clarified persistent data: digest may help to synchronize files, without requiring read / uncompress;
wenzelm
parents: 82161
diff changeset
   123
      val table =
5ecd0209c0a8 clarified persistent data: digest may help to synchronize files, without requiring read / uncompress;
wenzelm
parents: 82161
diff changeset
   124
        SQL.Table("isabelle_file_store", List(name, size, digest, executable, compressed, body))
82148
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   125
    }
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   126
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   127
    def read_names(db: SQL.Database): List[String] =
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   128
      db.execute_query_statement(
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   129
        Base.table.select(List(Base.name)),
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   130
        List.from[String], _.string(Base.name)
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   131
      ).sorted
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   132
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   133
    def read_entry(db: SQL.Database, name: String): Option[Entry] =
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   134
      db.execute_query_statementO[Entry](
82162
5ecd0209c0a8 clarified persistent data: digest may help to synchronize files, without requiring read / uncompress;
wenzelm
parents: 82161
diff changeset
   135
        Base.table.select(Base.table.columns, sql = Base.name.where_equal(name)),
82148
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   136
        { res =>
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   137
          val size = res.long(Base.size)
82162
5ecd0209c0a8 clarified persistent data: digest may help to synchronize files, without requiring read / uncompress;
wenzelm
parents: 82161
diff changeset
   138
          val digest = proper_string(res.string(Base.digest)).getOrElse(SHA1.digest_empty.toString)
82148
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   139
          val executable = res.bool(Base.executable)
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   140
          val compressed = res.bool(Base.compressed)
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   141
          val body = res.bytes(Base.body)
82162
5ecd0209c0a8 clarified persistent data: digest may help to synchronize files, without requiring read / uncompress;
wenzelm
parents: 82161
diff changeset
   142
          Entry(name, size, digest, executable, compressed, body)
82148
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   143
        })
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   144
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   145
    def write_entry(db: SQL.Database, entry: Entry): Unit =
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   146
      db.execute_statement(Base.table.insert(), body = { stmt =>
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   147
        stmt.string(1) = entry.name
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   148
        stmt.long(2) = entry.size
82162
5ecd0209c0a8 clarified persistent data: digest may help to synchronize files, without requiring read / uncompress;
wenzelm
parents: 82161
diff changeset
   149
        stmt.string(3) = if (entry.digest == SHA1.digest_empty.toString) None else Some(entry.digest)
5ecd0209c0a8 clarified persistent data: digest may help to synchronize files, without requiring read / uncompress;
wenzelm
parents: 82161
diff changeset
   150
        stmt.bool(4) = entry.executable
5ecd0209c0a8 clarified persistent data: digest may help to synchronize files, without requiring read / uncompress;
wenzelm
parents: 82161
diff changeset
   151
        stmt.bool(5) = entry.compressed
5ecd0209c0a8 clarified persistent data: digest may help to synchronize files, without requiring read / uncompress;
wenzelm
parents: 82161
diff changeset
   152
        stmt.bytes(6) = entry.body
82148
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   153
      })
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   154
  }
b387a9099b72 support persistent store for file-system content (notably SQLite);
wenzelm
parents:
diff changeset
   155
}