--- a/src/Pure/Tools/prismjs.scala Sat Nov 12 19:09:41 2022 +0100
+++ b/src/Pure/Tools/prismjs.scala Sun Nov 13 14:56:24 2022 +0100
@@ -14,23 +14,34 @@
val HOME: Path = Path.explode("$ISABELLE_PRISMJS_HOME")
- lazy val available_languages: List[String] = {
+ sealed case class Language(names: List[String]) {
+ require(names.nonEmpty)
+
+ def name: String = names.head
+ def alias: List[String] = names.tail
+ override def toString: String = name
+ }
+
+ lazy val languages: List[Language] = {
val components_path = HOME + Path.explode("components.json")
val components_json = JSON.parse(File.read(components_path))
JSON.value(components_json, "languages") match {
case Some(JSON.Object(langs)) =>
- (for ((lang, JSON.Object(info)) <- langs.iterator if lang != "meta") yield {
+ (for ((name, JSON.Object(info)) <- langs.iterator if name != "meta") yield {
val alias =
JSON.Value.List.unapply(info.getOrElse("alias", Nil), JSON.Value.String.unapply)
.getOrElse(Nil)
- lang :: alias
- }).toList.flatten.sorted
+ Language(name :: alias)
+ }).toList
case _ => error("Failed to determine languages from " + components_path)
}
}
+ def check_language(name: String): Unit =
+ if (!languages.exists(_.names.contains(name))) error("Unknown language " + quote(name))
- /* JavaScript prelude */
+
+ /* generate JavaScript */
def prelude(lang: JS.Source): JS.Source =
cat_lines(List(
@@ -39,72 +50,61 @@
Nodejs.require_path("prismjs.load", HOME + Path.explode("components"), dir = true),
JS.function("prismjs.load", lang),
"""
-function prismjs_content(t) {
- if (t instanceof prismjs.Token) { return prismjs_content(t.content) }
- else if (Array.isArray(t)) { return t.map(prismjs_content).join("") }
+function prismjs_encode(t) {
+ if (t instanceof prismjs.Token) {
+ var types = [t.type]
+ if (Array.isArray(t.alias)) { for (a of t.alias) { types.push(a) } }
+ else if (t.alias !== undefined) { types.push(t.alias) }
+ return {"types": types, "content": prismjs_encode(t.content) }
+ }
+ else if (Array.isArray(t)) { return t.map(prismjs_encode) }
else { return t }
}
function prismjs_tokenize(text, lang) {
- return prismjs.tokenize(text, prismjs.languages[lang]).map(function (t) {
- var types = []
- var content = ""
- if (t instanceof prismjs.Token) {
- types.push(t.type)
- if (Array.isArray(t.alias)) { for (a of t.alias) { types.push(a) } }
- else if (t.alias !== undefined) { types.push(t.alias) }
- content = prismjs_content(t)
- }
- else { content = t }
- return {"types": types, "content": content}
- })
+ return prismjs.tokenize(text, prismjs.languages[lang]).map(prismjs_encode)
}
"""))
+ def main(lang: JS.Source, input: Path, output: Path): JS.Source =
+ Nodejs.write_file(output,
+ JS.json_print(JS.function("prismjs_tokenize", Nodejs.read_file(input), lang)))
+
/* tokenize */
- sealed case class Token(
- types: List[String],
- content: String,
- range: Text.Range)
+ sealed case class Token(types: List[String], content: Content) {
+ def string: String = content.string
+ }
+ sealed abstract class Content { def string: String }
+ case class Atom(string: String) extends Content
+ case class Nested(tokens: List[Token]) extends Content {
+ def string: String = tokens.map(_.content.string).mkString
+ }
+
+ def decode(json: JSON.T): Option[Token] =
+ JSON.Value.String.unapply(json).map(string => Token(Nil, Atom(string))) orElse
+ (for {
+ types <- JSON.strings(json, "types")
+ content_json <- JSON.value(json, "content")
+ content <-
+ JSON.Value.String.unapply(content_json).map(Atom.apply) orElse
+ JSON.Value.List.unapply(content_json, decode).map(Nested.apply)
+ } yield Token(types, content))
def tokenize(text: String, language: String): List[Token] = {
- if (!available_languages.contains(language)) {
- error("Unknown language " + quote(language))
- }
+ check_language(language)
val json =
Isabelle_System.with_tmp_file("input", ext = "json") { input =>
Isabelle_System.with_tmp_file("output", ext = "json") { output =>
File.write(input, text)
val lang = JS.value(language)
- Nodejs.execute(
- prelude(lang) + "\n" +
- Nodejs.write_file(output,
- JS.json_print(JS.function("prismjs_tokenize", Nodejs.read_file(input), lang))))
+ Nodejs.execute(prelude(lang) + "\n" + main(lang, input, output))
JSON.parse(File.read(output))
}
}
- def parse_token(t: JSON.T): Token =
- (for {
- types <- JSON.strings(t, "types")
- content <- JSON.string(t, "content")
- } yield Token(types, content, Text.Range.zero))
- .getOrElse(error("Malformed JSON token: " + t))
-
- val tokens0 =
- JSON.Value.List.unapply(json, t => Some(parse_token(t)))
- .getOrElse(error("Malformed JSON: array expected"))
-
- var stop = 0
- val tokens = new mutable.ListBuffer[Token]
- for (tok <- tokens0) {
- val start = stop
- stop += tok.content.length
- tokens += tok.copy(range = Text.Range(start, stop))
- }
- tokens.toList
+ JSON.Value.List.unapply(json, decode).getOrElse(error("Malformed JSON: " + json))
}
}