minor performance tuning: allow recode operation during YXML parsing;
authorwenzelm
Fri, 28 Jun 2024 16:51:55 +0200
changeset 80447 325907d85977
parent 80446 996b5eb7c89e
child 80448 acbd22e7e3ec
minor performance tuning: allow recode operation during YXML parsing;
src/Pure/Build/build.scala
src/Pure/Build/export.scala
src/Pure/Build/export_theory.scala
src/Pure/General/symbol.scala
src/Pure/PIDE/yxml.scala
--- a/src/Pure/Build/build.scala	Fri Jun 28 15:59:45 2024 +0200
+++ b/src/Pure/Build/build.scala	Fri Jun 28 16:51:55 2024 +0200
@@ -714,13 +714,12 @@
     theory_context: Export.Theory_Context,
     unicode_symbols: Boolean = false
   ): Option[Document.Snapshot] = {
-    def decode_bytes(bytes: Bytes): String =
-      Symbol.output(unicode_symbols, bytes.text)
+    def decode(str: String): String = Symbol.output(unicode_symbols, str)
 
     def read(name: String): Export.Entry = theory_context(name, permissive = true)
 
     def read_xml(name: String): XML.Body =
-      YXML.parse_body(decode_bytes(read(name).bytes), cache = theory_context.cache)
+      YXML.parse_body(read(name).bytes.text, recode = decode, cache = theory_context.cache)
 
     def read_source_file(name: String): Store.Source_File =
       theory_context.session_context.source_file(name)
@@ -741,14 +740,14 @@
 
           val file = read_source_file(name)
           val bytes = file.bytes
-          val text = decode_bytes(bytes)
+          val text = decode(bytes.text)
           val chunk = Symbol.Text_Chunk(text)
 
           Command.Blob(Document.Node.Name(name), src_path, Some((file.digest, chunk))) ->
             Document.Blobs.Item(bytes, text, chunk, changed = false)
         }
 
-      val thy_source = decode_bytes(read_source_file(thy_file).bytes)
+      val thy_source = decode(read_source_file(thy_file).bytes.text)
       val thy_xml = read_xml(Export.MARKUP)
       val blobs_xml =
         for (i <- (1 to blobs.length).toList)
--- a/src/Pure/Build/export.scala	Fri Jun 28 15:59:45 2024 +0200
+++ b/src/Pure/Build/export.scala	Fri Jun 28 16:51:55 2024 +0200
@@ -194,7 +194,8 @@
 
     def text: String = bytes.text
 
-    def yxml: XML.Body = YXML.parse_body(bytes.text, cache = cache)
+    def yxml(recode: String => String = identity): XML.Body =
+      YXML.parse_body(bytes.text, recode = recode, cache = cache)
   }
 
   def make_regex(pattern: String): Regex = {
@@ -536,9 +537,9 @@
     def apply(name: String, permissive: Boolean = false): Entry =
       session_context.apply(theory, name, permissive = permissive)
 
-    def yxml(name: String): XML.Body =
+    def yxml(name: String, recode: String => String = identity): XML.Body =
       get(name) match {
-        case Some(entry) => entry.yxml
+        case Some(entry) => entry.yxml(recode = recode)
         case None => Nil
       }
 
--- a/src/Pure/Build/export_theory.scala	Fri Jun 28 15:59:45 2024 +0200
+++ b/src/Pure/Build/export_theory.scala	Fri Jun 28 16:51:55 2024 +0200
@@ -391,7 +391,7 @@
 
     for { entry <- theory_context.get(Export.PROOFS_PREFIX + id.serial) }
     yield {
-      val body = entry.yxml
+      val body = entry.yxml()
       val (typargs, (args, (prop_body, proof_body))) = {
         import XML.Decode._
         import Term_XML.Decode._
--- a/src/Pure/General/symbol.scala	Fri Jun 28 15:59:45 2024 +0200
+++ b/src/Pure/General/symbol.scala	Fri Jun 28 16:51:55 2024 +0200
@@ -541,10 +541,10 @@
   def encode(text: String): String = symbols.encode(text)
 
   def decode_yxml(text: String, cache: XML.Cache = XML.Cache.none): XML.Body =
-    YXML.parse_body(decode(text), cache = cache)
+    YXML.parse_body(text, recode = decode, cache = cache)
 
   def decode_yxml_failsafe(text: String, cache: XML.Cache = XML.Cache.none): XML.Body =
-    YXML.parse_body_failsafe(decode(text), cache = cache)
+    YXML.parse_body_failsafe(text, recode = decode, cache = cache)
 
   def encode_yxml(body: XML.Body): String =
     YXML.string_of_body(body, recode = encode)
--- a/src/Pure/PIDE/yxml.scala	Fri Jun 28 15:59:45 2024 +0200
+++ b/src/Pure/PIDE/yxml.scala	Fri Jun 28 16:51:55 2024 +0200
@@ -93,11 +93,19 @@
     if (name == "") err("unbalanced element")
     else err("unbalanced element " + quote(name))
 
-  private def parse_attrib(source: CharSequence): (String, String) =
-    Properties.Eq.unapply(source.toString) getOrElse err_attribute()
+  def parse_body(
+    source: CharSequence,
+    recode: String => String = identity,
+    cache: XML.Cache = XML.Cache.none
+  ): XML.Body = {
+    /* parse + recode */
+
+    def parse_string(s: CharSequence): String = recode(s.toString)
+
+    def parse_attrib(s: CharSequence): (String, String) =
+      Properties.Eq.unapply(parse_string(s)) getOrElse err_attribute()
 
 
-  def parse_body(source: CharSequence, cache: XML.Cache = XML.Cache.none): XML.Body = {
     /* stack operations */
 
     def buffer(): mutable.ListBuffer[XML.Tree] = new mutable.ListBuffer[XML.Tree]
@@ -126,8 +134,9 @@
       else {
         Library.separated_chunks(is_Y, chunk).toList match {
           case ch :: name :: atts if ch.length == 0 =>
-            push(name.toString, atts.map(parse_attrib))
-          case txts => for (txt <- txts) add(cache.tree0(XML.Text(cache.string(txt.toString))))
+            push(parse_string(name), atts.map(parse_attrib))
+          case txts =>
+            for (txt <- txts) add(cache.tree0(XML.Text(cache.string(parse_string(txt)))))
         }
       }
     }
@@ -137,15 +146,23 @@
     }
   }
 
-  def parse(source: CharSequence, cache: XML.Cache = XML.Cache.none): XML.Tree =
-    parse_body(source, cache = cache) match {
+  def parse(
+    source: CharSequence,
+    recode: String => String = identity,
+    cache: XML.Cache = XML.Cache.none
+  ): XML.Tree =
+    parse_body(source, recode = recode, cache = cache) match {
       case List(result) => result
       case Nil => XML.no_text
       case _ => err("multiple XML trees")
     }
 
-  def parse_elem(source: CharSequence, cache: XML.Cache = XML.Cache.none): XML.Tree =
-    parse_body(source, cache = cache) match {
+  def parse_elem(
+    source: CharSequence,
+    recode: String => String = identity,
+    cache: XML.Cache = XML.Cache.none
+  ): XML.Tree =
+    parse_body(source, recode = recode, cache = cache) match {
       case List(elem: XML.Elem) => elem
       case _ => err("single XML element expected")
     }
@@ -156,13 +173,21 @@
   private def markup_broken(source: CharSequence) =
     XML.Elem(Markup.Broken, List(XML.Text(source.toString)))
 
-  def parse_body_failsafe(source: CharSequence, cache: XML.Cache = XML.Cache.none): XML.Body = {
-    try { parse_body(source, cache = cache) }
+  def parse_body_failsafe(
+    source: CharSequence,
+    recode: String => String = identity,
+    cache: XML.Cache = XML.Cache.none
+  ): XML.Body = {
+    try { parse_body(source, recode = recode, cache = cache) }
     catch { case ERROR(_) => List(markup_broken(source)) }
   }
 
-  def parse_failsafe(source: CharSequence, cache: XML.Cache = XML.Cache.none): XML.Tree = {
-    try { parse(source, cache = cache) }
+  def parse_failsafe(
+    source: CharSequence,
+    recode: String => String = identity,
+    cache: XML.Cache = XML.Cache.none
+  ): XML.Tree = {
+    try { parse(source, recode = recode, cache = cache) }
     catch { case ERROR(_) => markup_broken(source) }
   }
 }