ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
authorwenzelm
Sun, 13 Nov 2022 20:28:39 +0100
changeset 76514 2615cf68f6f4
parent 76513 5a9a82522266
child 76515 3e3541e79219
ML support for the Prism.js syntax highlighter -- via Isabelle/Scala;
src/Pure/PIDE/markup.ML
src/Pure/ROOT.ML
src/Pure/System/scala.scala
src/Pure/Tools/prismjs.ML
src/Pure/Tools/prismjs.scala
--- a/src/Pure/PIDE/markup.ML	Sun Nov 13 14:56:24 2022 +0100
+++ b/src/Pure/PIDE/markup.ML	Sun Nov 13 20:28:39 2022 +0100
@@ -141,6 +141,7 @@
   val ML_antiquotationN: string
   val document_antiquotationN: string
   val document_antiquotation_optionN: string
+  val prismjs_languageN: string
   val raw_textN: string val raw_text: T
   val plain_textN: string val plain_text: T
   val paragraphN: string val paragraph: T
@@ -567,6 +568,8 @@
 
 (* document text *)
 
+val prismjs_languageN = "prismjs_language";
+
 val (raw_textN, raw_text) = markup_elem "raw_text";
 val (plain_textN, plain_text) = markup_elem "plain_text";
 
--- a/src/Pure/ROOT.ML	Sun Nov 13 14:56:24 2022 +0100
+++ b/src/Pure/ROOT.ML	Sun Nov 13 20:28:39 2022 +0100
@@ -349,6 +349,7 @@
 ML_file "General/base64.ML";
 ML_file "General/xz.ML";
 ML_file "General/zstd.ML";
+ML_file "Tools/prismjs.ML";
 ML_file "Tools/build.ML";
 ML_file "Tools/named_thms.ML";
 ML_file "Tools/print_operation.ML";
--- a/src/Pure/System/scala.scala	Sun Nov 13 14:56:24 2022 +0100
+++ b/src/Pure/System/scala.scala	Sun Nov 13 20:28:39 2022 +0100
@@ -383,4 +383,6 @@
   Isabelle_System.Isabelle_Id,
   Isabelle_Tool.Isabelle_Tools,
   isabelle.atp.SystemOnTPTP.List_Systems,
-  isabelle.atp.SystemOnTPTP.Run_System)
+  isabelle.atp.SystemOnTPTP.Run_System,
+  Prismjs.Languages,
+  Prismjs.Tokenize)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/Tools/prismjs.ML	Sun Nov 13 20:28:39 2022 +0100
@@ -0,0 +1,79 @@
+(*  Title:      Pure/Tools/prismjs.ML
+    Author:     Makarius
+
+Support for the Prism.js syntax highlighter -- via Isabelle/Scala.
+*)
+
+signature PRISMJS =
+sig
+  type language
+  val languages: unit -> language list
+  val language_names: language -> string list
+  val language_name: language -> string
+  val language_alias: language -> string list
+  val check_language: Proof.context -> string * Position.T -> string
+  datatype token = Token of string list * content
+  and content = Atom of string | Nested of token list
+  val decode_token: token XML.Decode.T
+  val decode_content: content XML.Decode.T
+  val token_types: token -> string list
+  val token_content: token -> content
+  val token_string: token -> string
+  val content_string: content -> string
+  val tokenize: {text: string, language: string} -> token list
+end;
+
+structure Prismjs: PRISMJS =
+struct
+
+(* languages *)
+
+abstype language = Language of string list
+with
+
+fun language_names (Language names) = names;
+val language_name = hd o language_names;
+val language_alias = tl o language_names;
+
+fun languages () =
+  \<^scala>\<open>Prismjs.languages\<close> []
+  |> map (split_lines #> Language);
+
+fun check_language ctxt arg =
+  let
+    val langs = languages () |> maps language_names |> sort_strings |> map (rpair ());
+    val name =
+      Completion.check_item Markup.prismjs_languageN
+        (fn (name, _) => Markup.entity Markup.prismjs_languageN name)
+        langs ctxt arg;
+  in name end;
+
+end;
+
+
+(* tokenize *)
+
+datatype token = Token of string list * content
+and content = Atom of string | Nested of token list;
+
+local open XML.Decode in
+
+fun decode_token body = body |> pair (list string) decode_content |> Token
+and decode_content body =
+  body |> variant
+   [fn ([a], []) => Atom a,
+    fn ([], a) => Nested (list decode_token a)]
+
+end;
+
+fun token_types (Token (types, _)) = types;
+fun token_content (Token (_, content)) = content;
+fun token_string tok = content_string (token_content tok)
+and content_string (Atom string) = string
+  | content_string (Nested tokens) = implode (map token_string tokens);
+
+fun tokenize {text, language} =
+  \<^scala>\<open>Prismjs.tokenize\<close> [text, language]
+  |> map (YXML.parse_body #> decode_token);
+
+end;
--- a/src/Pure/Tools/prismjs.scala	Sun Nov 13 14:56:24 2022 +0100
+++ b/src/Pure/Tools/prismjs.scala	Sun Nov 13 20:28:39 2022 +0100
@@ -75,11 +75,24 @@
 
   sealed case class Token(types: List[String], content: Content) {
     def string: String = content.string
+    def yxml: String = YXML.string_of_body(Encode.token(this))
   }
   sealed abstract class Content { def string: String }
   case class Atom(string: String) extends Content
   case class Nested(tokens: List[Token]) extends Content {
-    def string: String = tokens.map(_.content.string).mkString
+    def string: String = tokens.map(_.string).mkString
+  }
+
+  object Encode {
+    import XML.Encode._
+    def token(tok: Token): XML.Body =
+      pair(list(string), content)(tok.types, tok.content)
+    def content: T[Content] = {
+      variant[Content](List(
+        { case Atom(a) => (List(a), Nil) },
+        { case Nested(a) => (Nil, list(token)(a)) }
+      ))
+    }
   }
 
   def decode(json: JSON.T): Option[Token] =
@@ -107,4 +120,22 @@
 
     JSON.Value.List.unapply(json, decode).getOrElse(error("Malformed JSON: " + json))
   }
-}
+
+
+  /* Scala functions in ML */
+
+  object Languages extends Scala.Fun_Strings("Prismjs.languages") {
+    val here = Scala_Project.here
+    def apply(args: List[String]): List[String] =
+      languages.map(language => cat_lines(language.names))
+  }
+
+  object Tokenize extends Scala.Fun_Strings("Prismjs.tokenize", thread = true) {
+    val here = Scala_Project.here
+    def apply(args: List[String]): List[String] =
+      args match {
+        case List(text, language) => tokenize(text, language).map(_.yxml)
+        case _ => error("Bad arguments")
+      }
+  }
+}
\ No newline at end of file