# HG changeset patch # User wenzelm # Date 1668367719 -3600 # Node ID 2615cf68f6f4402acae6064df3b0e4833a33a731 # Parent 5a9a82522266c304b94ce9b13485a5fb5dbd44a8 ML support for the Prism.js syntax highlighter -- via Isabelle/Scala; diff -r 5a9a82522266 -r 2615cf68f6f4 src/Pure/PIDE/markup.ML --- a/src/Pure/PIDE/markup.ML Sun Nov 13 14:56:24 2022 +0100 +++ b/src/Pure/PIDE/markup.ML Sun Nov 13 20:28:39 2022 +0100 @@ -141,6 +141,7 @@ val ML_antiquotationN: string val document_antiquotationN: string val document_antiquotation_optionN: string + val prismjs_languageN: string val raw_textN: string val raw_text: T val plain_textN: string val plain_text: T val paragraphN: string val paragraph: T @@ -567,6 +568,8 @@ (* document text *) +val prismjs_languageN = "prismjs_language"; + val (raw_textN, raw_text) = markup_elem "raw_text"; val (plain_textN, plain_text) = markup_elem "plain_text"; diff -r 5a9a82522266 -r 2615cf68f6f4 src/Pure/ROOT.ML --- a/src/Pure/ROOT.ML Sun Nov 13 14:56:24 2022 +0100 +++ b/src/Pure/ROOT.ML Sun Nov 13 20:28:39 2022 +0100 @@ -349,6 +349,7 @@ ML_file "General/base64.ML"; ML_file "General/xz.ML"; ML_file "General/zstd.ML"; +ML_file "Tools/prismjs.ML"; ML_file "Tools/build.ML"; ML_file "Tools/named_thms.ML"; ML_file "Tools/print_operation.ML"; diff -r 5a9a82522266 -r 2615cf68f6f4 src/Pure/System/scala.scala --- a/src/Pure/System/scala.scala Sun Nov 13 14:56:24 2022 +0100 +++ b/src/Pure/System/scala.scala Sun Nov 13 20:28:39 2022 +0100 @@ -383,4 +383,6 @@ Isabelle_System.Isabelle_Id, Isabelle_Tool.Isabelle_Tools, isabelle.atp.SystemOnTPTP.List_Systems, - isabelle.atp.SystemOnTPTP.Run_System) + isabelle.atp.SystemOnTPTP.Run_System, + Prismjs.Languages, + Prismjs.Tokenize) diff -r 5a9a82522266 -r 2615cf68f6f4 src/Pure/Tools/prismjs.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Pure/Tools/prismjs.ML Sun Nov 13 20:28:39 2022 +0100 @@ -0,0 +1,79 @@ +(* Title: Pure/Tools/prismjs.ML + Author: Makarius + +Support for the Prism.js syntax highlighter -- via Isabelle/Scala. +*) + +signature PRISMJS = +sig + type language + val languages: unit -> language list + val language_names: language -> string list + val language_name: language -> string + val language_alias: language -> string list + val check_language: Proof.context -> string * Position.T -> string + datatype token = Token of string list * content + and content = Atom of string | Nested of token list + val decode_token: token XML.Decode.T + val decode_content: content XML.Decode.T + val token_types: token -> string list + val token_content: token -> content + val token_string: token -> string + val content_string: content -> string + val tokenize: {text: string, language: string} -> token list +end; + +structure Prismjs: PRISMJS = +struct + +(* languages *) + +abstype language = Language of string list +with + +fun language_names (Language names) = names; +val language_name = hd o language_names; +val language_alias = tl o language_names; + +fun languages () = + \<^scala>\Prismjs.languages\ [] + |> map (split_lines #> Language); + +fun check_language ctxt arg = + let + val langs = languages () |> maps language_names |> sort_strings |> map (rpair ()); + val name = + Completion.check_item Markup.prismjs_languageN + (fn (name, _) => Markup.entity Markup.prismjs_languageN name) + langs ctxt arg; + in name end; + +end; + + +(* tokenize *) + +datatype token = Token of string list * content +and content = Atom of string | Nested of token list; + +local open XML.Decode in + +fun decode_token body = body |> pair (list string) decode_content |> Token +and decode_content body = + body |> variant + [fn ([a], []) => Atom a, + fn ([], a) => Nested (list decode_token a)] + +end; + +fun token_types (Token (types, _)) = types; +fun token_content (Token (_, content)) = content; +fun token_string tok = content_string (token_content tok) +and content_string (Atom string) = string + | content_string (Nested tokens) = implode (map token_string tokens); + +fun tokenize {text, language} = + \<^scala>\Prismjs.tokenize\ [text, language] + |> map (YXML.parse_body #> decode_token); + +end; diff -r 5a9a82522266 -r 2615cf68f6f4 src/Pure/Tools/prismjs.scala --- a/src/Pure/Tools/prismjs.scala Sun Nov 13 14:56:24 2022 +0100 +++ b/src/Pure/Tools/prismjs.scala Sun Nov 13 20:28:39 2022 +0100 @@ -75,11 +75,24 @@ sealed case class Token(types: List[String], content: Content) { def string: String = content.string + def yxml: String = YXML.string_of_body(Encode.token(this)) } sealed abstract class Content { def string: String } case class Atom(string: String) extends Content case class Nested(tokens: List[Token]) extends Content { - def string: String = tokens.map(_.content.string).mkString + def string: String = tokens.map(_.string).mkString + } + + object Encode { + import XML.Encode._ + def token(tok: Token): XML.Body = + pair(list(string), content)(tok.types, tok.content) + def content: T[Content] = { + variant[Content](List( + { case Atom(a) => (List(a), Nil) }, + { case Nested(a) => (Nil, list(token)(a)) } + )) + } } def decode(json: JSON.T): Option[Token] = @@ -107,4 +120,22 @@ JSON.Value.List.unapply(json, decode).getOrElse(error("Malformed JSON: " + json)) } -} + + + /* Scala functions in ML */ + + object Languages extends Scala.Fun_Strings("Prismjs.languages") { + val here = Scala_Project.here + def apply(args: List[String]): List[String] = + languages.map(language => cat_lines(language.names)) + } + + object Tokenize extends Scala.Fun_Strings("Prismjs.tokenize", thread = true) { + val here = Scala_Project.here + def apply(args: List[String]): List[String] = + args match { + case List(text, language) => tokenize(text, language).map(_.yxml) + case _ => error("Bad arguments") + } + } +} \ No newline at end of file