--- a/src/Pure/PIDE/markup.ML Sun Nov 13 14:56:24 2022 +0100
+++ b/src/Pure/PIDE/markup.ML Sun Nov 13 20:28:39 2022 +0100
@@ -141,6 +141,7 @@
val ML_antiquotationN: string
val document_antiquotationN: string
val document_antiquotation_optionN: string
+ val prismjs_languageN: string
val raw_textN: string val raw_text: T
val plain_textN: string val plain_text: T
val paragraphN: string val paragraph: T
@@ -567,6 +568,8 @@
(* document text *)
+val prismjs_languageN = "prismjs_language";
+
val (raw_textN, raw_text) = markup_elem "raw_text";
val (plain_textN, plain_text) = markup_elem "plain_text";
--- a/src/Pure/ROOT.ML Sun Nov 13 14:56:24 2022 +0100
+++ b/src/Pure/ROOT.ML Sun Nov 13 20:28:39 2022 +0100
@@ -349,6 +349,7 @@
ML_file "General/base64.ML";
ML_file "General/xz.ML";
ML_file "General/zstd.ML";
+ML_file "Tools/prismjs.ML";
ML_file "Tools/build.ML";
ML_file "Tools/named_thms.ML";
ML_file "Tools/print_operation.ML";
--- a/src/Pure/System/scala.scala Sun Nov 13 14:56:24 2022 +0100
+++ b/src/Pure/System/scala.scala Sun Nov 13 20:28:39 2022 +0100
@@ -383,4 +383,6 @@
Isabelle_System.Isabelle_Id,
Isabelle_Tool.Isabelle_Tools,
isabelle.atp.SystemOnTPTP.List_Systems,
- isabelle.atp.SystemOnTPTP.Run_System)
+ isabelle.atp.SystemOnTPTP.Run_System,
+ Prismjs.Languages,
+ Prismjs.Tokenize)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/Tools/prismjs.ML Sun Nov 13 20:28:39 2022 +0100
@@ -0,0 +1,79 @@
+(* Title: Pure/Tools/prismjs.ML
+ Author: Makarius
+
+Support for the Prism.js syntax highlighter -- via Isabelle/Scala.
+*)
+
+signature PRISMJS =
+sig
+ type language
+ val languages: unit -> language list
+ val language_names: language -> string list
+ val language_name: language -> string
+ val language_alias: language -> string list
+ val check_language: Proof.context -> string * Position.T -> string
+ datatype token = Token of string list * content
+ and content = Atom of string | Nested of token list
+ val decode_token: token XML.Decode.T
+ val decode_content: content XML.Decode.T
+ val token_types: token -> string list
+ val token_content: token -> content
+ val token_string: token -> string
+ val content_string: content -> string
+ val tokenize: {text: string, language: string} -> token list
+end;
+
+structure Prismjs: PRISMJS =
+struct
+
+(* languages *)
+
+abstype language = Language of string list
+with
+
+fun language_names (Language names) = names;
+val language_name = hd o language_names;
+val language_alias = tl o language_names;
+
+fun languages () =
+ \<^scala>\<open>Prismjs.languages\<close> []
+ |> map (split_lines #> Language);
+
+fun check_language ctxt arg =
+ let
+ val langs = languages () |> maps language_names |> sort_strings |> map (rpair ());
+ val name =
+ Completion.check_item Markup.prismjs_languageN
+ (fn (name, _) => Markup.entity Markup.prismjs_languageN name)
+ langs ctxt arg;
+ in name end;
+
+end;
+
+
+(* tokenize *)
+
+datatype token = Token of string list * content
+and content = Atom of string | Nested of token list;
+
+local open XML.Decode in
+
+fun decode_token body = body |> pair (list string) decode_content |> Token
+and decode_content body =
+ body |> variant
+ [fn ([a], []) => Atom a,
+ fn ([], a) => Nested (list decode_token a)]
+
+end;
+
+fun token_types (Token (types, _)) = types;
+fun token_content (Token (_, content)) = content;
+fun token_string tok = content_string (token_content tok)
+and content_string (Atom string) = string
+ | content_string (Nested tokens) = implode (map token_string tokens);
+
+fun tokenize {text, language} =
+ \<^scala>\<open>Prismjs.tokenize\<close> [text, language]
+ |> map (YXML.parse_body #> decode_token);
+
+end;
--- a/src/Pure/Tools/prismjs.scala Sun Nov 13 14:56:24 2022 +0100
+++ b/src/Pure/Tools/prismjs.scala Sun Nov 13 20:28:39 2022 +0100
@@ -75,11 +75,24 @@
sealed case class Token(types: List[String], content: Content) {
def string: String = content.string
+ def yxml: String = YXML.string_of_body(Encode.token(this))
}
sealed abstract class Content { def string: String }
case class Atom(string: String) extends Content
case class Nested(tokens: List[Token]) extends Content {
- def string: String = tokens.map(_.content.string).mkString
+ def string: String = tokens.map(_.string).mkString
+ }
+
+ object Encode {
+ import XML.Encode._
+ def token(tok: Token): XML.Body =
+ pair(list(string), content)(tok.types, tok.content)
+ def content: T[Content] = {
+ variant[Content](List(
+ { case Atom(a) => (List(a), Nil) },
+ { case Nested(a) => (Nil, list(token)(a)) }
+ ))
+ }
}
def decode(json: JSON.T): Option[Token] =
@@ -107,4 +120,22 @@
JSON.Value.List.unapply(json, decode).getOrElse(error("Malformed JSON: " + json))
}
-}
+
+
+ /* Scala functions in ML */
+
+ object Languages extends Scala.Fun_Strings("Prismjs.languages") {
+ val here = Scala_Project.here
+ def apply(args: List[String]): List[String] =
+ languages.map(language => cat_lines(language.names))
+ }
+
+ object Tokenize extends Scala.Fun_Strings("Prismjs.tokenize", thread = true) {
+ val here = Scala_Project.here
+ def apply(args: List[String]): List[String] =
+ args match {
+ case List(text, language) => tokenize(text, language).map(_.yxml)
+ case _ => error("Bad arguments")
+ }
+ }
+}
\ No newline at end of file