src/Pure/Tools/prismjs.scala
author Fabian Huch <huch@in.tum.de>
Thu, 09 Nov 2023 15:45:51 +0100
changeset 78931 26841d3c568c
parent 78592 fdfe9b91d96e
permissions -rw-r--r--
performance tuning for build schedule: explicit schedule generation, without mixing heuristics;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
     1
/*  Title:      Pure/Tools/prismjs.scala
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
     3
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
     4
Support for the Prism.js syntax highlighter -- via external Node.js process.
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
     5
*/
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
     6
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
     7
package isabelle
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
     8
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
     9
import scala.collection.mutable
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    10
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    11
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    12
object Prismjs {
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    13
  /* component resources */
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    14
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    15
  val HOME: Path = Path.explode("$ISABELLE_PRISMJS_HOME")
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    16
76513
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    17
  sealed case class Language(names: List[String]) {
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    18
    require(names.nonEmpty)
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    19
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    20
    def name: String = names.head
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    21
    def alias: List[String] = names.tail
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    22
    override def toString: String = name
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    23
  }
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    24
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    25
  lazy val languages: List[Language] = {
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    26
    val components_path = HOME + Path.explode("components.json")
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    27
    val components_json = JSON.parse(File.read(components_path))
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    28
    JSON.value(components_json, "languages") match {
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    29
      case Some(JSON.Object(langs)) =>
78592
fdfe9b91d96e misc tuning: support "scalac -source 3.3";
wenzelm
parents: 76516
diff changeset
    30
        (for (case (name, JSON.Object(info)) <- langs.iterator if name != "meta") yield {
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    31
          val alias =
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    32
            JSON.Value.List.unapply(info.getOrElse("alias", Nil), JSON.Value.String.unapply)
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    33
              .getOrElse(Nil)
76513
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    34
          Language(name :: alias)
76516
ca88e5496553 prefer sorted result;
wenzelm
parents: 76514
diff changeset
    35
        }).toList.sortBy(_.name)
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    36
      case _ => error("Failed to determine languages from " + components_path)
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    37
    }
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    38
  }
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    39
76513
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    40
  def check_language(name: String): Unit =
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    41
    if (!languages.exists(_.names.contains(name))) error("Unknown language " + quote(name))
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    42
76513
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    43
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    44
  /* generate JavaScript */
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    45
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    46
  def prelude(lang: JS.Source): JS.Source =
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    47
    cat_lines(List(
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    48
      Nodejs.require_fs,
76510
b0ad975cd25b clarified JS namespace;
wenzelm
parents: 76508
diff changeset
    49
      Nodejs.require_path("const prismjs", HOME),
b0ad975cd25b clarified JS namespace;
wenzelm
parents: 76508
diff changeset
    50
      Nodejs.require_path("prismjs.load", HOME + Path.explode("components"), dir = true),
b0ad975cd25b clarified JS namespace;
wenzelm
parents: 76508
diff changeset
    51
      JS.function("prismjs.load", lang),
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    52
      """
76513
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    53
function prismjs_encode(t) {
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    54
  if (t instanceof prismjs.Token) {
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    55
    var types = [t.type]
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    56
    if (Array.isArray(t.alias)) { for (a of t.alias) { types.push(a) } }
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    57
    else if (t.alias !== undefined) { types.push(t.alias) }
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    58
    return {"types": types, "content": prismjs_encode(t.content) }
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    59
  }
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    60
  else if (Array.isArray(t)) { return t.map(prismjs_encode) }
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    61
  else { return t }
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    62
}
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    63
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    64
function prismjs_tokenize(text, lang) {
76513
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    65
  return prismjs.tokenize(text, prismjs.languages[lang]).map(prismjs_encode)
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    66
}
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    67
"""))
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    68
76513
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    69
  def main(lang: JS.Source, input: Path, output: Path): JS.Source =
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    70
    Nodejs.write_file(output,
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    71
      JS.json_print(JS.function("prismjs_tokenize", Nodejs.read_file(input), lang)))
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    72
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    73
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    74
  /* tokenize */
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    75
76513
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    76
  sealed case class Token(types: List[String], content: Content) {
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    77
    def string: String = content.string
76514
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
    78
    def yxml: String = YXML.string_of_body(Encode.token(this))
76513
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    79
  }
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    80
  sealed abstract class Content { def string: String }
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    81
  case class Atom(string: String) extends Content
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    82
  case class Nested(tokens: List[Token]) extends Content {
76514
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
    83
    def string: String = tokens.map(_.string).mkString
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
    84
  }
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
    85
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
    86
  object Encode {
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
    87
    import XML.Encode._
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
    88
    def token(tok: Token): XML.Body =
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
    89
      pair(list(string), content)(tok.types, tok.content)
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
    90
    def content: T[Content] = {
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
    91
      variant[Content](List(
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
    92
        { case Atom(a) => (List(a), Nil) },
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
    93
        { case Nested(a) => (Nil, list(token)(a)) }
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
    94
      ))
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
    95
    }
76513
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    96
  }
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    97
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    98
  def decode(json: JSON.T): Option[Token] =
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
    99
    JSON.Value.String.unapply(json).map(string => Token(Nil, Atom(string))) orElse
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
   100
      (for {
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
   101
        types <- JSON.strings(json, "types")
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
   102
        content_json <- JSON.value(json, "content")
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
   103
        content <-
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
   104
          JSON.Value.String.unapply(content_json).map(Atom.apply) orElse
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
   105
          JSON.Value.List.unapply(content_json, decode).map(Nested.apply)
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
   106
      } yield Token(types, content))
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   107
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   108
  def tokenize(text: String, language: String): List[Token] = {
76513
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
   109
    check_language(language)
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   110
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   111
    val json =
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   112
      Isabelle_System.with_tmp_file("input", ext = "json") { input =>
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   113
        Isabelle_System.with_tmp_file("output", ext = "json") { output =>
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   114
          File.write(input, text)
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   115
          val lang = JS.value(language)
76513
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
   116
          Nodejs.execute(prelude(lang) + "\n" + main(lang, input, output))
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   117
          JSON.parse(File.read(output))
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   118
        }
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   119
      }
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   120
76513
5a9a82522266 retain data structures more accurately;
wenzelm
parents: 76512
diff changeset
   121
    JSON.Value.List.unapply(json, decode).getOrElse(error("Malformed JSON: " + json))
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   122
  }
76514
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   123
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   124
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   125
  /* Scala functions in ML */
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   126
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   127
  object Languages extends Scala.Fun_Strings("Prismjs.languages") {
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   128
    val here = Scala_Project.here
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   129
    def apply(args: List[String]): List[String] =
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   130
      languages.map(language => cat_lines(language.names))
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   131
  }
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   132
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   133
  object Tokenize extends Scala.Fun_Strings("Prismjs.tokenize", thread = true) {
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   134
    val here = Scala_Project.here
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   135
    def apply(args: List[String]): List[String] =
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   136
      args match {
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   137
        case List(text, language) => tokenize(text, language).map(_.yxml)
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   138
        case _ => error("Bad arguments")
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   139
      }
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   140
  }
2615cf68f6f4 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
wenzelm
parents: 76513
diff changeset
   141
}