merged
authornipkow
Tue, 08 Mar 2022 09:35:39 +0100
changeset 75242 810d16927cdc
parent 75240 83197a0ac6df (diff)
parent 75241 21b2e37e0300 (current diff)
child 75243 a2b8394ce1f1
merged
--- a/lib/Tools/vscode	Mon Mar 07 15:28:53 2022 +0100
+++ b/lib/Tools/vscode	Tue Mar 08 09:35:39 2022 +0100
@@ -4,6 +4,8 @@
 #
 # DESCRIPTION: run Isabelle/VSCode using local VSCodium installation
 
+export ISABELLE_VSCODE_SYMBOLS="$(platform_path "$ISABELLE_VSCODE_WORKSPACE/symbols.json")"
+
 DIR="$(isabelle vscode_setup -C)" || exit "$?"
 exec "$DIR/bin/codium" \
   --locale en-US \
--- a/src/Pure/General/scan.scala	Mon Mar 07 15:28:53 2022 +0100
+++ b/src/Pure/General/scan.scala	Tue Mar 08 09:35:39 2022 +0100
@@ -61,9 +61,8 @@
           var count = 0
           var finished = false
           while (!finished && i < end && count < max_count) {
-            val n = matcher(i, end)
-            val sym = in.source.subSequence(i, i + n).toString
-            if (pred(sym)) { i += n; count += 1 }
+            val sym = matcher.match_symbol(i)
+            if (pred(sym)) { i += sym.length; count += 1 }
             else finished = true
           }
           if (count < min_count) Failure("bad input", in)
@@ -151,8 +150,8 @@
         var d = depth
         var finished = false
         while (!finished && i < end) {
-          val n = matcher(i, end)
-          val sym = in.source.subSequence(i, i + n).toString
+          val sym = matcher.match_symbol(i)
+          val n = sym.length
           if (Symbol.is_open(sym)) { i += n; d += 1 }
           else if (Symbol.is_close(sym) && d > 0) { i += n; d -= 1; if (d == 0) finished = true }
           else if (d > 0) i += n
--- a/src/Pure/General/symbol.scala	Mon Mar 07 15:28:53 2022 +0100
+++ b/src/Pure/General/symbol.scala	Tue Mar 08 09:35:39 2022 +0100
@@ -36,6 +36,16 @@
   }
 
 
+  /* char symbols */
+
+  private val char_symbols: Array[Symbol] =
+    (0 until 0x500).iterator.map(i => new String(Array(i.toChar))).toArray
+
+  def char_symbol(c: Char): String =
+    if (c < char_symbols.length) char_symbols(c)
+    else c.toString
+
+
   /* ASCII characters */
 
   def is_ascii_printable(c: Char): Boolean = space_char <= c && c <= '~'
@@ -62,7 +72,7 @@
   def ascii(c: Char): Symbol =
   {
     if (c > 127) error("Non-ASCII character: " + quote(c.toString))
-    else char_symbols(c.toInt)
+    else char_symbol(c)
   }
 
   def is_ascii(s: Symbol): Boolean = s.length == 1 && s(0) < 128
@@ -70,13 +80,6 @@
 
   /* symbol matching */
 
-  private val symbol_total =
-    new Regex("(?xs) [\ud800-\udbff][\udc00-\udfff] " +
-      """ | \r\n | \\ < \^? ([A-Za-z][A-Za-z0-9_']*)? >? | .""")
-
-  private def is_plain(c: Char): Boolean =
-    !(c == '\r' || c == '\\' || Character.isHighSurrogate(c))
-
   def is_malformed(s: Symbol): Boolean =
     s.length match {
       case 1 =>
@@ -94,24 +97,37 @@
 
   class Matcher(text: CharSequence)
   {
-    private val matcher = symbol_total.pattern.matcher(text)
-    def apply(start: Int, end: Int): Int =
+    private def ok(i: Int): Boolean = 0 <= i && i < text.length
+    private def char(i: Int): Char = if (ok(i)) text.charAt(i) else 0
+    private def maybe_char(c: Char, i: Int): Int = if (char(i) == c) i + 1 else i
+
+    @tailrec private def many_ascii_letdig(i: Int): Int =
+      if (is_ascii_letdig(char(i))) many_ascii_letdig(i + 1) else i
+    private def maybe_ascii_id(i: Int): Int =
+      if (is_ascii_letter(char(i))) many_ascii_letdig(i + 1) else i
+
+    def match_length(i: Int): Int =
     {
-      require(0 <= start && start < end && end <= text.length, "bad matcher range")
-      if (is_plain(text.charAt(start))) 1
-      else {
-        matcher.region(start, end).lookingAt
-        matcher.group.length
+      val a = char(i)
+      val b = char(i + 1)
+
+      if (Character.isHighSurrogate(a) && Character.isLowSurrogate(b) || a == '\r' && b == '\n') 2
+      else if (a == '\\' && b == '<') maybe_char('>', maybe_ascii_id(maybe_char('^', i + 2))) - i
+      else if (ok(i)) 1
+      else 0
+    }
+
+    def match_symbol(i: Int): String =
+      match_length(i) match {
+        case 0 => ""
+        case 1 => char_symbol(text.charAt(i))
+        case n => text.subSequence(i, i + n).toString
       }
-    }
   }
 
 
   /* iterator */
 
-  private val char_symbols: Array[Symbol] =
-    (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray
-
   def iterator(text: CharSequence): Iterator[Symbol] =
     new Iterator[Symbol]
     {
@@ -120,16 +136,8 @@
       def hasNext: Boolean = i < text.length
       def next(): Symbol =
       {
-        val n = matcher(i, text.length)
-        val s =
-          if (n == 0) ""
-          else if (n == 1) {
-            val c = text.charAt(i)
-            if (c < char_symbols.length) char_symbols(c)
-            else text.subSequence(i, i + n).toString
-          }
-          else text.subSequence(i, i + n).toString
-        i += n
+        val s = matcher.match_symbol(i)
+        i += s.length
         s
       }
     }
@@ -163,7 +171,7 @@
       var chr = 0
       var sym = 0
       while (chr < text.length) {
-        val n = matcher(chr, text.length)
+        val n = matcher.match_length(chr)
         chr += n
         sym += 1
         if (n > 1) buf += Entry(chr, sym)
@@ -276,16 +284,15 @@
     def recode(text: String): String =
     {
       val len = text.length
-      val matcher = symbol_total.pattern.matcher(text)
+      val matcher = new Symbol.Matcher(text)
       val result = new StringBuilder(len)
       var i = 0
       while (i < len) {
         val c = text(i)
         if (min <= c && c <= max) {
-          matcher.region(i, len).lookingAt
-          val x = matcher.group
-          result.append(table.getOrElse(x, x))
-          i = matcher.end
+          val s = matcher.match_symbol(i)
+          result.append(table.getOrElse(s, s))
+          i += s.length
         }
         else { result.append(c); i += 1 }
       }
--- a/src/Tools/VSCode/etc/settings	Mon Mar 07 15:28:53 2022 +0100
+++ b/src/Tools/VSCode/etc/settings	Tue Mar 08 09:35:39 2022 +0100
@@ -1,6 +1,6 @@
 # -*- shell-script -*- :mode=shellscript:
 
-ISABELLE_VSCODE_VERSION="1.64.2"
+ISABELLE_VSCODE_VERSION="1.65.0"
 ISABELLE_VSCODE_HOME="$ISABELLE_HOME/src/Tools/VSCode"
 ISABELLE_VSCODE_SETTINGS="$ISABELLE_HOME_USER/vscode"
 ISABELLE_VSCODE_WORKSPACE="$ISABELLE_VSCODE_SETTINGS/workspace"
--- a/src/Tools/VSCode/extension/package-lock.json	Mon Mar 07 15:28:53 2022 +0100
+++ b/src/Tools/VSCode/extension/package-lock.json	Tue Mar 08 09:35:39 2022 +0100
@@ -15,12 +15,12 @@
             "devDependencies": {
                 "@types/mocha": "^9.1.0",
                 "@types/node": "^17.0.19",
-                "@types/vscode": "1.64.0",
+                "@types/vscode": "1.65.0",
                 "mocha": "^9.2.1",
                 "typescript": "^4.5.5"
             },
             "engines": {
-                "vscode": "1.64.0"
+                "vscode": "1.65.0"
             }
         },
         "node_modules/@types/mocha": {
@@ -36,9 +36,9 @@
             "dev": true
         },
         "node_modules/@types/vscode": {
-            "version": "1.64.0",
-            "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.64.0.tgz",
-            "integrity": "sha512-bSlAWz5WtcSL3cO9tAT/KpEH9rv5OBnm93OIIFwdCshaAiqr2bp1AUyEwW9MWeCvZBHEXc3V0fTYVdVyzDNwHA==",
+            "version": "1.65.0",
+            "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.65.0.tgz",
+            "integrity": "sha512-wQhExnh2nEzpjDMSKhUvnNmz3ucpd3E+R7wJkOhBNK3No6fG3VUdmVmMOKD0A8NDZDDDiQcLNxe3oGmX5SjJ5w==",
             "dev": true
         },
         "node_modules/@ungap/promise-all-settled": {
@@ -1048,9 +1048,9 @@
             "dev": true
         },
         "@types/vscode": {
-            "version": "1.64.0",
-            "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.64.0.tgz",
-            "integrity": "sha512-bSlAWz5WtcSL3cO9tAT/KpEH9rv5OBnm93OIIFwdCshaAiqr2bp1AUyEwW9MWeCvZBHEXc3V0fTYVdVyzDNwHA==",
+            "version": "1.65.0",
+            "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.65.0.tgz",
+            "integrity": "sha512-wQhExnh2nEzpjDMSKhUvnNmz3ucpd3E+R7wJkOhBNK3No6fG3VUdmVmMOKD0A8NDZDDDiQcLNxe3oGmX5SjJ5w==",
             "dev": true
         },
         "@ungap/promise-all-settled": {
--- a/src/Tools/VSCode/extension/package.json	Mon Mar 07 15:28:53 2022 +0100
+++ b/src/Tools/VSCode/extension/package.json	Tue Mar 08 09:35:39 2022 +0100
@@ -17,7 +17,7 @@
         "url": "https://isabelle-dev.sketis.net"
     },
     "engines": {
-        "vscode": "1.64.0"
+        "vscode": "1.65.0"
     },
     "categories": [
         "Programming Languages"
@@ -318,7 +318,7 @@
     "devDependencies": {
         "@types/mocha": "^9.1.0",
         "@types/node": "^17.0.19",
-        "@types/vscode": "1.64.0",
+        "@types/vscode": "1.65.0",
         "mocha": "^9.2.1",
         "typescript": "^4.5.5"
     },
--- a/src/Tools/VSCode/extension/src/extension.ts	Mon Mar 07 15:28:53 2022 +0100
+++ b/src/Tools/VSCode/extension/src/extension.ts	Tue Mar 08 09:35:39 2022 +0100
@@ -33,8 +33,7 @@
   try {
     const isabelle_home = library.getenv_strict("ISABELLE_HOME")
 
-    const symbols = await symbol.load_symbols(library.workspace_path("symbols.json"))
-    const workspace_dir = await Isabelle_Workspace.register(context, symbols)
+    const workspace_dir = await Isabelle_Workspace.register(context, symbol.symbols)
     const roots = workspace.workspaceFile === undefined ? await workspace.findFiles("{ROOT,ROOTS}") : []
 
     const isabelle_tool = isabelle_home + "/bin/isabelle"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Tools/VSCode/extension/src/isabelle_encoding.ts	Tue Mar 08 09:35:39 2022 +0100
@@ -0,0 +1,64 @@
+/*  Author:     Makarius
+
+UTF-8-Isabelle symbol encoding: minimal dependencies, for use inside VSCode.
+*/
+
+'use strict';
+
+import * as fs from 'fs'
+import { TextEncoder, TextDecoder } from 'util'
+
+
+/* defined symbols */
+
+export interface Symbol_Entry
+{
+  symbol: string,
+  name: string,
+  code: number,
+  abbrevs: string[]
+}
+
+const symbols_file = process.env["ISABELLE_VSCODE_SYMBOLS"]
+
+export const symbols: [Symbol_Entry] =
+  symbols_file ? JSON.parse(fs.readFileSync(symbols_file).toString()) : []
+
+export const symbols_decode: Map<string, string> =
+  new Map(symbols.map((s: Symbol_Entry) => [s.symbol, String.fromCharCode(s.code)]))
+
+export const symbols_encode: Map<string, string> =
+  new Map(symbols.map((s: Symbol_Entry) => [String.fromCharCode(s.code), s.symbol]))
+
+
+/* encoding */
+
+export const UTF8_Isabelle = 'utf8-isabelle'
+
+export interface Options {
+  stripBOM?: boolean;
+  addBOM?: boolean;
+  defaultEncoding?: string;
+}
+
+export interface EncoderStream {
+  write(str: string): Uint8Array
+  end(): Uint8Array | undefined
+}
+
+export interface DecoderStream {
+  write(buf: Uint8Array): string
+  end(): string | undefined
+}
+
+export function getEncoder(encoding: string, options?: Options): EncoderStream
+{
+  const utf8_encoder = new TextEncoder
+  return null
+}
+
+export function getDecoder(encoding: string, options?: Options): DecoderStream
+{
+  const utf8_decoder = new TextDecoder
+  return null
+}
--- a/src/Tools/VSCode/extension/src/symbol.ts	Mon Mar 07 15:28:53 2022 +0100
+++ b/src/Tools/VSCode/extension/src/symbol.ts	Tue Mar 08 09:35:39 2022 +0100
@@ -1,12 +1,16 @@
 /*  Author:     Makarius
 
-Isabelle text symbols (see Pure/General/symbol.scala).
+Isabelle text symbols versus UTF-8/Unicode encoding. See also:
+
+  - Pure/General/symbol.scala
+  - Pure/General/utf8.scala
+  - https://encoding.spec.whatwg.org
 */
 
 'use strict';
 
-import * as library from './library'
 import * as file from './file'
+import * as isabelle_encoding from './isabelle_encoding'
 
 
 /* ASCII characters */
@@ -41,13 +45,7 @@
 
 /* defined symbols */
 
-export interface Entry
-{
-  symbol: Symbol,
-  name: string,
-  code: number,
-  abbrevs: string[]
-}
+export type Entry = isabelle_encoding.Symbol_Entry
 
 export class Symbols
 {
@@ -72,17 +70,6 @@
   {
     return this.entries_map.has(sym)
   }
-
-  public decode(sym: Symbol): string | undefined
-  {
-    const entry = this.get(sym)
-    const code = entry ? entry.code : undefined
-    return code ? String.fromCharCode(code) : undefined
-  }
 }
 
-export async function load_symbols(path: string): Promise<Symbols>
-{
-  const entries = await file.read_json<[Entry]>(path)
-  return new Symbols(entries)
-}
+export const symbols: Symbols = new Symbols(isabelle_encoding.symbols)