| author | desharna | 
| Sat, 15 Mar 2025 20:17:03 +0100 | |
| changeset 82282 | 919eb0e67930 | 
| parent 82119 | b7929e1dc4fb | 
| permissions | -rw-r--r-- | 
| 64161 | 1 | /* Title: Pure/Admin/check_sources.scala | 
| 56791 | 2 | Author: Makarius | 
| 3 | ||
| 4 | Some sanity checks for Isabelle sources. | |
| 5 | */ | |
| 6 | ||
| 7 | package isabelle | |
| 8 | ||
| 9 | ||
| 75393 | 10 | object Check_Sources {
 | 
| 11 |   def check_file(path: Path): Unit = {
 | |
| 56791 | 12 | val file_name = path.implode | 
| 76884 | 13 | val file_pos = Position.File(File.symbolic_path(path)) | 
| 56791 | 14 | def line_pos(i: Int) = Position.Line_File(i + 1, file_name) | 
| 15 | ||
| 69367 | 16 |     if (space_explode('/', Word.lowercase(path.expand.drop_ext.implode)).contains("aux"))
 | 
| 64324 
416f4d031afd
check Windows file names, e.g. relavant for native Mercurial, but also for Isabelle/Scala;
 wenzelm parents: 
64162diff
changeset | 17 |       Output.warning("Illegal file-name on Windows" + Position.here(file_pos))
 | 
| 
416f4d031afd
check Windows file names, e.g. relavant for native Mercurial, but also for Isabelle/Scala;
 wenzelm parents: 
64162diff
changeset | 18 | |
| 72254 | 19 | val bytes = Bytes.read(path) | 
| 20 | val content = bytes.text | |
| 21 | ||
| 22 |     if (Bytes(content) != bytes) {
 | |
| 23 |       Output.warning("Bad UTF8 encoding" + Position.here(file_pos))
 | |
| 24 | } | |
| 56791 | 25 | |
| 75393 | 26 |     for { (line, i) <- split_lines(content).iterator.zipWithIndex } {
 | 
| 56791 | 27 |       try {
 | 
| 28 | Symbol.decode_strict(line) | |
| 29 | ||
| 75393 | 30 |         for { c <- Codepoint.iterator(line); if c > 128 && !Character.isAlphabetic(c) } {
 | 
| 64610 | 31 |           Output.warning("Suspicious Unicode character " + quote(Codepoint.string(c)) +
 | 
| 56791 | 32 | Position.here(line_pos(i))) | 
| 33 | } | |
| 34 | } | |
| 62814 | 35 |       catch { case ERROR(msg) => Output.warning(msg + Position.here(line_pos(i))) }
 | 
| 56791 | 36 | |
| 37 |       if (line.contains('\t'))
 | |
| 56792 | 38 |         Output.warning("TAB character" + Position.here(line_pos(i)))
 | 
| 56791 | 39 | } | 
| 40 | ||
| 41 |     if (content.contains('\r'))
 | |
| 56792 | 42 |       Output.warning("CR character" + Position.here(file_pos))
 | 
| 62814 | 43 | |
| 44 | if (Word.bidi_detect(content)) | |
| 64368 | 45 |       Output.warning("Bidirectional Unicode text" + Position.here(file_pos))
 | 
| 56791 | 46 | } | 
| 56829 | 47 | |
| 75393 | 48 |   def check_hg(root: Path): Unit = {
 | 
| 56830 
e760242101fc
tuned signature -- channels for diagnostic output for system tools means stderr;
 wenzelm parents: 
56829diff
changeset | 49 |     Output.writeln("Checking " + root + " ...")
 | 
| 64162 | 50 | val hg = Mercurial.repository(root) | 
| 51 |     for {
 | |
| 65822 
17b8528c2f53
clarified notion of known files (before actual commit);
 wenzelm parents: 
64610diff
changeset | 52 | file <- hg.known_files() | 
| 82119 | 53 |       if File.is_scala(file) || File.is_thy(file) || File.is_ML(file) || file.endsWith("/ROOT")
 | 
| 64162 | 54 | } check_file(root + Path.explode(file)) | 
| 56829 | 55 | } | 
| 56 | ||
| 57 | ||
| 62834 | 58 | /* Isabelle tool wrapper */ | 
| 56829 | 59 | |
| 62834 | 60 | val isabelle_tool = | 
| 72763 | 61 |     Isabelle_Tool("check_sources", "some sanity checks for Isabelle sources",
 | 
| 75394 | 62 | Scala_Project.here, | 
| 63 |       { args =>
 | |
| 64 |         val getopts = Getopts("""
 | |
| 62452 | 65 | Usage: isabelle check_sources [ROOT_DIRS...] | 
| 66 | ||
| 82119 | 67 | Check .scala, .thy, .ML, ROOT against known files of Mercurial ROOT_DIRS. | 
| 62452 | 68 | """) | 
| 69 | ||
| 75394 | 70 | val specs = getopts(args) | 
| 71 | if (specs.isEmpty) getopts.usage() | |
| 62452 | 72 | |
| 75394 | 73 | for (root <- specs) check_hg(Path.explode(root)) | 
| 74 | }) | |
| 56791 | 75 | } |