src/Pure/Tools/prismjs.scala
author wenzelm
Sat, 12 Nov 2022 19:09:41 +0100
changeset 76512 c3b4e5e4c4e5
parent 76511 ec8c04dac257
child 76513 5a9a82522266
permissions -rw-r--r--
proper join without delimiter;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
     1
/*  Title:      Pure/Tools/prismjs.scala
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
     3
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
     4
Support for the Prism.js syntax highlighter -- via external Node.js process.
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
     5
*/
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
     6
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
     7
package isabelle
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
     8
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
     9
import scala.collection.mutable
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    10
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    11
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    12
object Prismjs {
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    13
  /* component resources */
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    14
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    15
  val HOME: Path = Path.explode("$ISABELLE_PRISMJS_HOME")
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    16
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    17
  lazy val available_languages: List[String] = {
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    18
    val components_path = HOME + Path.explode("components.json")
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    19
    val components_json = JSON.parse(File.read(components_path))
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    20
    JSON.value(components_json, "languages") match {
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    21
      case Some(JSON.Object(langs)) =>
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    22
        (for ((lang, JSON.Object(info)) <- langs.iterator if lang != "meta") yield {
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    23
          val alias =
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    24
            JSON.Value.List.unapply(info.getOrElse("alias", Nil), JSON.Value.String.unapply)
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    25
              .getOrElse(Nil)
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    26
          lang :: alias
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    27
        }).toList.flatten.sorted
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    28
      case _ => error("Failed to determine languages from " + components_path)
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    29
    }
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    30
  }
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    31
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    32
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    33
  /* JavaScript prelude */
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    34
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    35
  def prelude(lang: JS.Source): JS.Source =
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    36
    cat_lines(List(
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    37
      Nodejs.require_fs,
76510
b0ad975cd25b clarified JS namespace;
wenzelm
parents: 76508
diff changeset
    38
      Nodejs.require_path("const prismjs", HOME),
b0ad975cd25b clarified JS namespace;
wenzelm
parents: 76508
diff changeset
    39
      Nodejs.require_path("prismjs.load", HOME + Path.explode("components"), dir = true),
b0ad975cd25b clarified JS namespace;
wenzelm
parents: 76508
diff changeset
    40
      JS.function("prismjs.load", lang),
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    41
      """
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    42
function prismjs_content(t) {
76511
ec8c04dac257 more accurate token types;
wenzelm
parents: 76510
diff changeset
    43
  if (t instanceof prismjs.Token) { return prismjs_content(t.content) }
76512
c3b4e5e4c4e5 proper join without delimiter;
wenzelm
parents: 76511
diff changeset
    44
  else if (Array.isArray(t)) { return t.map(prismjs_content).join("") }
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    45
  else { return t }
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    46
}
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    47
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    48
function prismjs_tokenize(text, lang) {
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    49
  return prismjs.tokenize(text, prismjs.languages[lang]).map(function (t) {
76511
ec8c04dac257 more accurate token types;
wenzelm
parents: 76510
diff changeset
    50
    var types = []
ec8c04dac257 more accurate token types;
wenzelm
parents: 76510
diff changeset
    51
    var content = ""
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    52
    if (t instanceof prismjs.Token) {
76511
ec8c04dac257 more accurate token types;
wenzelm
parents: 76510
diff changeset
    53
      types.push(t.type)
ec8c04dac257 more accurate token types;
wenzelm
parents: 76510
diff changeset
    54
      if (Array.isArray(t.alias)) { for (a of t.alias) { types.push(a) } }
ec8c04dac257 more accurate token types;
wenzelm
parents: 76510
diff changeset
    55
      else if (t.alias !== undefined) { types.push(t.alias) }
ec8c04dac257 more accurate token types;
wenzelm
parents: 76510
diff changeset
    56
      content = prismjs_content(t)
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    57
    }
76511
ec8c04dac257 more accurate token types;
wenzelm
parents: 76510
diff changeset
    58
    else { content = t }
ec8c04dac257 more accurate token types;
wenzelm
parents: 76510
diff changeset
    59
    return {"types": types, "content": content}
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    60
  })
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    61
}
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    62
"""))
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    63
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    64
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    65
  /* tokenize */
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    66
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    67
  sealed case class Token(
76511
ec8c04dac257 more accurate token types;
wenzelm
parents: 76510
diff changeset
    68
    types: List[String],
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    69
    content: String,
76511
ec8c04dac257 more accurate token types;
wenzelm
parents: 76510
diff changeset
    70
    range: Text.Range)
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    71
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    72
  def tokenize(text: String, language: String): List[Token] = {
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    73
    if (!available_languages.contains(language)) {
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    74
      error("Unknown language " + quote(language))
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    75
    }
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    76
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    77
    val json =
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    78
      Isabelle_System.with_tmp_file("input", ext = "json") { input =>
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    79
        Isabelle_System.with_tmp_file("output", ext = "json") { output =>
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    80
          File.write(input, text)
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    81
          val lang = JS.value(language)
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    82
          Nodejs.execute(
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    83
            prelude(lang) + "\n" +
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    84
            Nodejs.write_file(output,
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    85
              JS.json_print(JS.function("prismjs_tokenize", Nodejs.read_file(input), lang))))
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    86
          JSON.parse(File.read(output))
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    87
        }
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    88
      }
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    89
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    90
    def parse_token(t: JSON.T): Token =
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    91
      (for {
76511
ec8c04dac257 more accurate token types;
wenzelm
parents: 76510
diff changeset
    92
        types <- JSON.strings(t, "types")
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    93
        content <- JSON.string(t, "content")
76511
ec8c04dac257 more accurate token types;
wenzelm
parents: 76510
diff changeset
    94
      } yield Token(types, content, Text.Range.zero))
ec8c04dac257 more accurate token types;
wenzelm
parents: 76510
diff changeset
    95
        .getOrElse(error("Malformed JSON token: " + t))
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    96
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    97
    val tokens0 =
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    98
      JSON.Value.List.unapply(json, t => Some(parse_token(t)))
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
    99
        .getOrElse(error("Malformed JSON: array expected"))
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   100
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   101
    var stop = 0
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   102
    val tokens = new mutable.ListBuffer[Token]
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   103
    for (tok <- tokens0) {
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   104
      val start = stop
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   105
      stop += tok.content.length
76511
ec8c04dac257 more accurate token types;
wenzelm
parents: 76510
diff changeset
   106
      tokens += tok.copy(range = Text.Range(start, stop))
76508
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   107
    }
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   108
    tokens.toList
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   109
  }
ecb9e6d29698 support for the Prism.js syntax highlighter -- via external Node.js process;
wenzelm
parents:
diff changeset
   110
}