more accurate token types;
authorwenzelm
Sat, 12 Nov 2022 19:04:28 +0100
changeset 76511 ec8c04dac257
parent 76510 b0ad975cd25b
child 76512 c3b4e5e4c4e5
more accurate token types; keep untyped tokens for complete covering;
src/Pure/Tools/prismjs.scala
--- a/src/Pure/Tools/prismjs.scala	Sat Nov 12 17:21:38 2022 +0100
+++ b/src/Pure/Tools/prismjs.scala	Sat Nov 12 19:04:28 2022 +0100
@@ -40,17 +40,23 @@
       JS.function("prismjs.load", lang),
       """
 function prismjs_content(t) {
-  if (Array.isArray(t)) { return t.map(prismjs_content).join() }
-  else if (t instanceof prismjs.Token) { return prismjs_content(t.content) }
+  if (t instanceof prismjs.Token) { return prismjs_content(t.content) }
+  else if (Array.isArray(t)) { return t.map(prismjs_content).join() }
   else { return t }
 }
 
 function prismjs_tokenize(text, lang) {
   return prismjs.tokenize(text, prismjs.languages[lang]).map(function (t) {
+    var types = []
+    var content = ""
     if (t instanceof prismjs.Token) {
-      return {"kind": t.type, "content": prismjs_content(t)}
+      types.push(t.type)
+      if (Array.isArray(t.alias)) { for (a of t.alias) { types.push(a) } }
+      else if (t.alias !== undefined) { types.push(t.alias) }
+      content = prismjs_content(t)
     }
-    else { return {"kind": "", "content": t} }
+    else { content = t }
+    return {"types": types, "content": content}
   })
 }
 """))
@@ -59,9 +65,9 @@
   /* tokenize */
 
   sealed case class Token(
-    kind: String,
+    types: List[String],
     content: String,
-    range: Text.Range = Text.Range.zero)
+    range: Text.Range)
 
   def tokenize(text: String, language: String): List[Token] = {
     if (!available_languages.contains(language)) {
@@ -83,9 +89,10 @@
 
     def parse_token(t: JSON.T): Token =
       (for {
-        kind <- JSON.string(t, "kind")
+        types <- JSON.strings(t, "types")
         content <- JSON.string(t, "content")
-      } yield Token(kind, content)).getOrElse(error("Malformed JSON token: " + t))
+      } yield Token(types, content, Text.Range.zero))
+        .getOrElse(error("Malformed JSON token: " + t))
 
     val tokens0 =
       JSON.Value.List.unapply(json, t => Some(parse_token(t)))
@@ -96,9 +103,7 @@
     for (tok <- tokens0) {
       val start = stop
       stop += tok.content.length
-      if (tok.kind.nonEmpty) {
-        tokens += tok.copy(range = Text.Range(start, stop))
-      }
+      tokens += tok.copy(range = Text.Range(start, stop))
     }
     tokens.toList
   }