retain data structures more accurately;
authorwenzelm
Sun, 13 Nov 2022 14:56:24 +0100
changeset 76513 5a9a82522266
parent 76512 c3b4e5e4c4e5
child 76514 2615cf68f6f4
retain data structures more accurately; tuned signature;
src/Pure/Tools/prismjs.scala
--- a/src/Pure/Tools/prismjs.scala	Sat Nov 12 19:09:41 2022 +0100
+++ b/src/Pure/Tools/prismjs.scala	Sun Nov 13 14:56:24 2022 +0100
@@ -14,23 +14,34 @@
 
   val HOME: Path = Path.explode("$ISABELLE_PRISMJS_HOME")
 
-  lazy val available_languages: List[String] = {
+  sealed case class Language(names: List[String]) {
+    require(names.nonEmpty)
+
+    def name: String = names.head
+    def alias: List[String] = names.tail
+    override def toString: String = name
+  }
+
+  lazy val languages: List[Language] = {
     val components_path = HOME + Path.explode("components.json")
     val components_json = JSON.parse(File.read(components_path))
     JSON.value(components_json, "languages") match {
       case Some(JSON.Object(langs)) =>
-        (for ((lang, JSON.Object(info)) <- langs.iterator if lang != "meta") yield {
+        (for ((name, JSON.Object(info)) <- langs.iterator if name != "meta") yield {
           val alias =
             JSON.Value.List.unapply(info.getOrElse("alias", Nil), JSON.Value.String.unapply)
               .getOrElse(Nil)
-          lang :: alias
-        }).toList.flatten.sorted
+          Language(name :: alias)
+        }).toList
       case _ => error("Failed to determine languages from " + components_path)
     }
   }
 
+  def check_language(name: String): Unit =
+    if (!languages.exists(_.names.contains(name))) error("Unknown language " + quote(name))
 
-  /* JavaScript prelude */
+
+  /* generate JavaScript */
 
   def prelude(lang: JS.Source): JS.Source =
     cat_lines(List(
@@ -39,72 +50,61 @@
       Nodejs.require_path("prismjs.load", HOME + Path.explode("components"), dir = true),
       JS.function("prismjs.load", lang),
       """
-function prismjs_content(t) {
-  if (t instanceof prismjs.Token) { return prismjs_content(t.content) }
-  else if (Array.isArray(t)) { return t.map(prismjs_content).join("") }
+function prismjs_encode(t) {
+  if (t instanceof prismjs.Token) {
+    var types = [t.type]
+    if (Array.isArray(t.alias)) { for (a of t.alias) { types.push(a) } }
+    else if (t.alias !== undefined) { types.push(t.alias) }
+    return {"types": types, "content": prismjs_encode(t.content) }
+  }
+  else if (Array.isArray(t)) { return t.map(prismjs_encode) }
   else { return t }
 }
 
 function prismjs_tokenize(text, lang) {
-  return prismjs.tokenize(text, prismjs.languages[lang]).map(function (t) {
-    var types = []
-    var content = ""
-    if (t instanceof prismjs.Token) {
-      types.push(t.type)
-      if (Array.isArray(t.alias)) { for (a of t.alias) { types.push(a) } }
-      else if (t.alias !== undefined) { types.push(t.alias) }
-      content = prismjs_content(t)
-    }
-    else { content = t }
-    return {"types": types, "content": content}
-  })
+  return prismjs.tokenize(text, prismjs.languages[lang]).map(prismjs_encode)
 }
 """))
 
+  def main(lang: JS.Source, input: Path, output: Path): JS.Source =
+    Nodejs.write_file(output,
+      JS.json_print(JS.function("prismjs_tokenize", Nodejs.read_file(input), lang)))
+
 
   /* tokenize */
 
-  sealed case class Token(
-    types: List[String],
-    content: String,
-    range: Text.Range)
+  sealed case class Token(types: List[String], content: Content) {
+    def string: String = content.string
+  }
+  sealed abstract class Content { def string: String }
+  case class Atom(string: String) extends Content
+  case class Nested(tokens: List[Token]) extends Content {
+    def string: String = tokens.map(_.content.string).mkString
+  }
+
+  def decode(json: JSON.T): Option[Token] =
+    JSON.Value.String.unapply(json).map(string => Token(Nil, Atom(string))) orElse
+      (for {
+        types <- JSON.strings(json, "types")
+        content_json <- JSON.value(json, "content")
+        content <-
+          JSON.Value.String.unapply(content_json).map(Atom.apply) orElse
+          JSON.Value.List.unapply(content_json, decode).map(Nested.apply)
+      } yield Token(types, content))
 
   def tokenize(text: String, language: String): List[Token] = {
-    if (!available_languages.contains(language)) {
-      error("Unknown language " + quote(language))
-    }
+    check_language(language)
 
     val json =
       Isabelle_System.with_tmp_file("input", ext = "json") { input =>
         Isabelle_System.with_tmp_file("output", ext = "json") { output =>
           File.write(input, text)
           val lang = JS.value(language)
-          Nodejs.execute(
-            prelude(lang) + "\n" +
-            Nodejs.write_file(output,
-              JS.json_print(JS.function("prismjs_tokenize", Nodejs.read_file(input), lang))))
+          Nodejs.execute(prelude(lang) + "\n" + main(lang, input, output))
           JSON.parse(File.read(output))
         }
       }
 
-    def parse_token(t: JSON.T): Token =
-      (for {
-        types <- JSON.strings(t, "types")
-        content <- JSON.string(t, "content")
-      } yield Token(types, content, Text.Range.zero))
-        .getOrElse(error("Malformed JSON token: " + t))
-
-    val tokens0 =
-      JSON.Value.List.unapply(json, t => Some(parse_token(t)))
-        .getOrElse(error("Malformed JSON: array expected"))
-
-    var stop = 0
-    val tokens = new mutable.ListBuffer[Token]
-    for (tok <- tokens0) {
-      val start = stop
-      stop += tok.content.length
-      tokens += tok.copy(range = Text.Range(start, stop))
-    }
-    tokens.toList
+    JSON.Value.List.unapply(json, decode).getOrElse(error("Malformed JSON: " + json))
   }
 }