src/Pure/Admin/check_sources.scala
author wenzelm
Sun, 22 Jan 2023 21:52:58 +0100
changeset 77040 96879e303ea3
parent 76884 a004c5322ea4
permissions -rw-r--r--
clarified modules (again, in contrast to f8f065e20837);
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
64161
2b1128e95dfb explicit indication of Admin tools;
wenzelm
parents: 63997
diff changeset
     1
/*  Title:      Pure/Admin/check_sources.scala
56791
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
     3
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
     4
Some sanity checks for Isabelle sources.
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
     5
*/
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
     6
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
     7
package isabelle
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
     8
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
     9
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 73340
diff changeset
    10
object Check_Sources {
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 73340
diff changeset
    11
  def check_file(path: Path): Unit = {
56791
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
    12
    val file_name = path.implode
76884
a004c5322ea4 clarified modules;
wenzelm
parents: 76831
diff changeset
    13
    val file_pos = Position.File(File.symbolic_path(path))
56791
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
    14
    def line_pos(i: Int) = Position.Line_File(i + 1, file_name)
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
    15
69367
34b7550b66c7 tuned signature;
wenzelm
parents: 69277
diff changeset
    16
    if (space_explode('/', Word.lowercase(path.expand.drop_ext.implode)).contains("aux"))
64324
416f4d031afd check Windows file names, e.g. relavant for native Mercurial, but also for Isabelle/Scala;
wenzelm
parents: 64162
diff changeset
    17
      Output.warning("Illegal file-name on Windows" + Position.here(file_pos))
416f4d031afd check Windows file names, e.g. relavant for native Mercurial, but also for Isabelle/Scala;
wenzelm
parents: 64162
diff changeset
    18
72254
8c5b8d7999bd more checks;
wenzelm
parents: 69367
diff changeset
    19
    val bytes = Bytes.read(path)
8c5b8d7999bd more checks;
wenzelm
parents: 69367
diff changeset
    20
    val content = bytes.text
8c5b8d7999bd more checks;
wenzelm
parents: 69367
diff changeset
    21
8c5b8d7999bd more checks;
wenzelm
parents: 69367
diff changeset
    22
    if (Bytes(content) != bytes) {
8c5b8d7999bd more checks;
wenzelm
parents: 69367
diff changeset
    23
      Output.warning("Bad UTF8 encoding" + Position.here(file_pos))
8c5b8d7999bd more checks;
wenzelm
parents: 69367
diff changeset
    24
    }
56791
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
    25
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 73340
diff changeset
    26
    for { (line, i) <- split_lines(content).iterator.zipWithIndex } {
56791
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
    27
      try {
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
    28
        Symbol.decode_strict(line)
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
    29
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 73340
diff changeset
    30
        for { c <- Codepoint.iterator(line); if c > 128 && !Character.isAlphabetic(c) } {
64610
1b89608974e9 clarified modules;
wenzelm
parents: 64368
diff changeset
    31
          Output.warning("Suspicious Unicode character " + quote(Codepoint.string(c)) +
56791
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
    32
            Position.here(line_pos(i)))
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
    33
        }
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
    34
      }
62814
29ca4cdd998d clarified check_sources;
wenzelm
parents: 62615
diff changeset
    35
      catch { case ERROR(msg) => Output.warning(msg + Position.here(line_pos(i))) }
56791
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
    36
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
    37
      if (line.contains('\t'))
56792
wenzelm
parents: 56791
diff changeset
    38
        Output.warning("TAB character" + Position.here(line_pos(i)))
56791
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
    39
    }
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
    40
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
    41
    if (content.contains('\r'))
56792
wenzelm
parents: 56791
diff changeset
    42
      Output.warning("CR character" + Position.here(file_pos))
62814
29ca4cdd998d clarified check_sources;
wenzelm
parents: 62615
diff changeset
    43
29ca4cdd998d clarified check_sources;
wenzelm
parents: 62615
diff changeset
    44
    if (Word.bidi_detect(content))
64368
364d74ea985f tuned message;
wenzelm
parents: 64324
diff changeset
    45
      Output.warning("Bidirectional Unicode text" + Position.here(file_pos))
56791
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
    46
  }
56829
f151ade98b15 proper tool wrap-up;
wenzelm
parents: 56792
diff changeset
    47
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 73340
diff changeset
    48
  def check_hg(root: Path): Unit = {
56830
e760242101fc tuned signature -- channels for diagnostic output for system tools means stderr;
wenzelm
parents: 56829
diff changeset
    49
    Output.writeln("Checking " + root + " ...")
64162
03057a8fdd1f simplified: no internal state for Mercurial;
wenzelm
parents: 64161
diff changeset
    50
    val hg = Mercurial.repository(root)
03057a8fdd1f simplified: no internal state for Mercurial;
wenzelm
parents: 64161
diff changeset
    51
    for {
65822
17b8528c2f53 clarified notion of known files (before actual commit);
wenzelm
parents: 64610
diff changeset
    52
      file <- hg.known_files()
75906
2167b9e3157a clarified signature: support for adhoc file types;
wenzelm
parents: 75394
diff changeset
    53
      if File.is_thy(file) || File.is_ML(file) || file.endsWith("/ROOT")
64162
03057a8fdd1f simplified: no internal state for Mercurial;
wenzelm
parents: 64161
diff changeset
    54
    } check_file(root + Path.explode(file))
56829
f151ade98b15 proper tool wrap-up;
wenzelm
parents: 56792
diff changeset
    55
  }
f151ade98b15 proper tool wrap-up;
wenzelm
parents: 56792
diff changeset
    56
f151ade98b15 proper tool wrap-up;
wenzelm
parents: 56792
diff changeset
    57
62834
970cedec9748 prefer internal tool;
wenzelm
parents: 62814
diff changeset
    58
  /* Isabelle tool wrapper */
56829
f151ade98b15 proper tool wrap-up;
wenzelm
parents: 56792
diff changeset
    59
62834
970cedec9748 prefer internal tool;
wenzelm
parents: 62814
diff changeset
    60
  val isabelle_tool =
72763
3cc73d00553c added document antiquotation @{tool};
wenzelm
parents: 72254
diff changeset
    61
    Isabelle_Tool("check_sources", "some sanity checks for Isabelle sources",
75394
42267c650205 tuned formatting;
wenzelm
parents: 75393
diff changeset
    62
      Scala_Project.here,
42267c650205 tuned formatting;
wenzelm
parents: 75393
diff changeset
    63
      { args =>
42267c650205 tuned formatting;
wenzelm
parents: 75393
diff changeset
    64
        val getopts = Getopts("""
62452
f25b67245699 more official "isabelle check_sources";
wenzelm
parents: 62303
diff changeset
    65
Usage: isabelle check_sources [ROOT_DIRS...]
f25b67245699 more official "isabelle check_sources";
wenzelm
parents: 62303
diff changeset
    66
65822
17b8528c2f53 clarified notion of known files (before actual commit);
wenzelm
parents: 64610
diff changeset
    67
  Check .thy, .ML, ROOT against known files of Mercurial ROOT_DIRS.
62452
f25b67245699 more official "isabelle check_sources";
wenzelm
parents: 62303
diff changeset
    68
""")
f25b67245699 more official "isabelle check_sources";
wenzelm
parents: 62303
diff changeset
    69
75394
42267c650205 tuned formatting;
wenzelm
parents: 75393
diff changeset
    70
        val specs = getopts(args)
42267c650205 tuned formatting;
wenzelm
parents: 75393
diff changeset
    71
        if (specs.isEmpty) getopts.usage()
62452
f25b67245699 more official "isabelle check_sources";
wenzelm
parents: 62303
diff changeset
    72
75394
42267c650205 tuned formatting;
wenzelm
parents: 75393
diff changeset
    73
        for (root <- specs) check_hg(Path.explode(root))
42267c650205 tuned formatting;
wenzelm
parents: 75393
diff changeset
    74
      })
56791
23883e1879c5 some sanity checks for Isabelle sources;
wenzelm
parents:
diff changeset
    75
}