# HG changeset patch # User nipkow # Date 1646728539 -3600 # Node ID 810d16927cdc575d0f0230cb158bf689de64e77e # Parent 83197a0ac6df75cabb1c8d45090db5adad802663# Parent 21b2e37e03007caa90ee85ebb2ad9d5b46df0635 merged diff -r 21b2e37e0300 -r 810d16927cdc lib/Tools/vscode --- a/lib/Tools/vscode Mon Mar 07 15:28:53 2022 +0100 +++ b/lib/Tools/vscode Tue Mar 08 09:35:39 2022 +0100 @@ -4,6 +4,8 @@ # # DESCRIPTION: run Isabelle/VSCode using local VSCodium installation +export ISABELLE_VSCODE_SYMBOLS="$(platform_path "$ISABELLE_VSCODE_WORKSPACE/symbols.json")" + DIR="$(isabelle vscode_setup -C)" || exit "$?" exec "$DIR/bin/codium" \ --locale en-US \ diff -r 21b2e37e0300 -r 810d16927cdc src/Pure/General/scan.scala --- a/src/Pure/General/scan.scala Mon Mar 07 15:28:53 2022 +0100 +++ b/src/Pure/General/scan.scala Tue Mar 08 09:35:39 2022 +0100 @@ -61,9 +61,8 @@ var count = 0 var finished = false while (!finished && i < end && count < max_count) { - val n = matcher(i, end) - val sym = in.source.subSequence(i, i + n).toString - if (pred(sym)) { i += n; count += 1 } + val sym = matcher.match_symbol(i) + if (pred(sym)) { i += sym.length; count += 1 } else finished = true } if (count < min_count) Failure("bad input", in) @@ -151,8 +150,8 @@ var d = depth var finished = false while (!finished && i < end) { - val n = matcher(i, end) - val sym = in.source.subSequence(i, i + n).toString + val sym = matcher.match_symbol(i) + val n = sym.length if (Symbol.is_open(sym)) { i += n; d += 1 } else if (Symbol.is_close(sym) && d > 0) { i += n; d -= 1; if (d == 0) finished = true } else if (d > 0) i += n diff -r 21b2e37e0300 -r 810d16927cdc src/Pure/General/symbol.scala --- a/src/Pure/General/symbol.scala Mon Mar 07 15:28:53 2022 +0100 +++ b/src/Pure/General/symbol.scala Tue Mar 08 09:35:39 2022 +0100 @@ -36,6 +36,16 @@ } + /* char symbols */ + + private val char_symbols: Array[Symbol] = + (0 until 0x500).iterator.map(i => new String(Array(i.toChar))).toArray + + def char_symbol(c: Char): String = + if (c < char_symbols.length) char_symbols(c) + else c.toString + + /* ASCII characters */ def is_ascii_printable(c: Char): Boolean = space_char <= c && c <= '~' @@ -62,7 +72,7 @@ def ascii(c: Char): Symbol = { if (c > 127) error("Non-ASCII character: " + quote(c.toString)) - else char_symbols(c.toInt) + else char_symbol(c) } def is_ascii(s: Symbol): Boolean = s.length == 1 && s(0) < 128 @@ -70,13 +80,6 @@ /* symbol matching */ - private val symbol_total = - new Regex("(?xs) [\ud800-\udbff][\udc00-\udfff] " + - """ | \r\n | \\ < \^? ([A-Za-z][A-Za-z0-9_']*)? >? | .""") - - private def is_plain(c: Char): Boolean = - !(c == '\r' || c == '\\' || Character.isHighSurrogate(c)) - def is_malformed(s: Symbol): Boolean = s.length match { case 1 => @@ -94,24 +97,37 @@ class Matcher(text: CharSequence) { - private val matcher = symbol_total.pattern.matcher(text) - def apply(start: Int, end: Int): Int = + private def ok(i: Int): Boolean = 0 <= i && i < text.length + private def char(i: Int): Char = if (ok(i)) text.charAt(i) else 0 + private def maybe_char(c: Char, i: Int): Int = if (char(i) == c) i + 1 else i + + @tailrec private def many_ascii_letdig(i: Int): Int = + if (is_ascii_letdig(char(i))) many_ascii_letdig(i + 1) else i + private def maybe_ascii_id(i: Int): Int = + if (is_ascii_letter(char(i))) many_ascii_letdig(i + 1) else i + + def match_length(i: Int): Int = { - require(0 <= start && start < end && end <= text.length, "bad matcher range") - if (is_plain(text.charAt(start))) 1 - else { - matcher.region(start, end).lookingAt - matcher.group.length + val a = char(i) + val b = char(i + 1) + + if (Character.isHighSurrogate(a) && Character.isLowSurrogate(b) || a == '\r' && b == '\n') 2 + else if (a == '\\' && b == '<') maybe_char('>', maybe_ascii_id(maybe_char('^', i + 2))) - i + else if (ok(i)) 1 + else 0 + } + + def match_symbol(i: Int): String = + match_length(i) match { + case 0 => "" + case 1 => char_symbol(text.charAt(i)) + case n => text.subSequence(i, i + n).toString } - } } /* iterator */ - private val char_symbols: Array[Symbol] = - (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray - def iterator(text: CharSequence): Iterator[Symbol] = new Iterator[Symbol] { @@ -120,16 +136,8 @@ def hasNext: Boolean = i < text.length def next(): Symbol = { - val n = matcher(i, text.length) - val s = - if (n == 0) "" - else if (n == 1) { - val c = text.charAt(i) - if (c < char_symbols.length) char_symbols(c) - else text.subSequence(i, i + n).toString - } - else text.subSequence(i, i + n).toString - i += n + val s = matcher.match_symbol(i) + i += s.length s } } @@ -163,7 +171,7 @@ var chr = 0 var sym = 0 while (chr < text.length) { - val n = matcher(chr, text.length) + val n = matcher.match_length(chr) chr += n sym += 1 if (n > 1) buf += Entry(chr, sym) @@ -276,16 +284,15 @@ def recode(text: String): String = { val len = text.length - val matcher = symbol_total.pattern.matcher(text) + val matcher = new Symbol.Matcher(text) val result = new StringBuilder(len) var i = 0 while (i < len) { val c = text(i) if (min <= c && c <= max) { - matcher.region(i, len).lookingAt - val x = matcher.group - result.append(table.getOrElse(x, x)) - i = matcher.end + val s = matcher.match_symbol(i) + result.append(table.getOrElse(s, s)) + i += s.length } else { result.append(c); i += 1 } } diff -r 21b2e37e0300 -r 810d16927cdc src/Tools/VSCode/etc/settings --- a/src/Tools/VSCode/etc/settings Mon Mar 07 15:28:53 2022 +0100 +++ b/src/Tools/VSCode/etc/settings Tue Mar 08 09:35:39 2022 +0100 @@ -1,6 +1,6 @@ # -*- shell-script -*- :mode=shellscript: -ISABELLE_VSCODE_VERSION="1.64.2" +ISABELLE_VSCODE_VERSION="1.65.0" ISABELLE_VSCODE_HOME="$ISABELLE_HOME/src/Tools/VSCode" ISABELLE_VSCODE_SETTINGS="$ISABELLE_HOME_USER/vscode" ISABELLE_VSCODE_WORKSPACE="$ISABELLE_VSCODE_SETTINGS/workspace" diff -r 21b2e37e0300 -r 810d16927cdc src/Tools/VSCode/extension/package-lock.json --- a/src/Tools/VSCode/extension/package-lock.json Mon Mar 07 15:28:53 2022 +0100 +++ b/src/Tools/VSCode/extension/package-lock.json Tue Mar 08 09:35:39 2022 +0100 @@ -15,12 +15,12 @@ "devDependencies": { "@types/mocha": "^9.1.0", "@types/node": "^17.0.19", - "@types/vscode": "1.64.0", + "@types/vscode": "1.65.0", "mocha": "^9.2.1", "typescript": "^4.5.5" }, "engines": { - "vscode": "1.64.0" + "vscode": "1.65.0" } }, "node_modules/@types/mocha": { @@ -36,9 +36,9 @@ "dev": true }, "node_modules/@types/vscode": { - "version": "1.64.0", - "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.64.0.tgz", - "integrity": "sha512-bSlAWz5WtcSL3cO9tAT/KpEH9rv5OBnm93OIIFwdCshaAiqr2bp1AUyEwW9MWeCvZBHEXc3V0fTYVdVyzDNwHA==", + "version": "1.65.0", + "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.65.0.tgz", + "integrity": "sha512-wQhExnh2nEzpjDMSKhUvnNmz3ucpd3E+R7wJkOhBNK3No6fG3VUdmVmMOKD0A8NDZDDDiQcLNxe3oGmX5SjJ5w==", "dev": true }, "node_modules/@ungap/promise-all-settled": { @@ -1048,9 +1048,9 @@ "dev": true }, "@types/vscode": { - "version": "1.64.0", - "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.64.0.tgz", - "integrity": "sha512-bSlAWz5WtcSL3cO9tAT/KpEH9rv5OBnm93OIIFwdCshaAiqr2bp1AUyEwW9MWeCvZBHEXc3V0fTYVdVyzDNwHA==", + "version": "1.65.0", + "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.65.0.tgz", + "integrity": "sha512-wQhExnh2nEzpjDMSKhUvnNmz3ucpd3E+R7wJkOhBNK3No6fG3VUdmVmMOKD0A8NDZDDDiQcLNxe3oGmX5SjJ5w==", "dev": true }, "@ungap/promise-all-settled": { diff -r 21b2e37e0300 -r 810d16927cdc src/Tools/VSCode/extension/package.json --- a/src/Tools/VSCode/extension/package.json Mon Mar 07 15:28:53 2022 +0100 +++ b/src/Tools/VSCode/extension/package.json Tue Mar 08 09:35:39 2022 +0100 @@ -17,7 +17,7 @@ "url": "https://isabelle-dev.sketis.net" }, "engines": { - "vscode": "1.64.0" + "vscode": "1.65.0" }, "categories": [ "Programming Languages" @@ -318,7 +318,7 @@ "devDependencies": { "@types/mocha": "^9.1.0", "@types/node": "^17.0.19", - "@types/vscode": "1.64.0", + "@types/vscode": "1.65.0", "mocha": "^9.2.1", "typescript": "^4.5.5" }, diff -r 21b2e37e0300 -r 810d16927cdc src/Tools/VSCode/extension/src/extension.ts --- a/src/Tools/VSCode/extension/src/extension.ts Mon Mar 07 15:28:53 2022 +0100 +++ b/src/Tools/VSCode/extension/src/extension.ts Tue Mar 08 09:35:39 2022 +0100 @@ -33,8 +33,7 @@ try { const isabelle_home = library.getenv_strict("ISABELLE_HOME") - const symbols = await symbol.load_symbols(library.workspace_path("symbols.json")) - const workspace_dir = await Isabelle_Workspace.register(context, symbols) + const workspace_dir = await Isabelle_Workspace.register(context, symbol.symbols) const roots = workspace.workspaceFile === undefined ? await workspace.findFiles("{ROOT,ROOTS}") : [] const isabelle_tool = isabelle_home + "/bin/isabelle" diff -r 21b2e37e0300 -r 810d16927cdc src/Tools/VSCode/extension/src/isabelle_encoding.ts --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Tools/VSCode/extension/src/isabelle_encoding.ts Tue Mar 08 09:35:39 2022 +0100 @@ -0,0 +1,64 @@ +/* Author: Makarius + +UTF-8-Isabelle symbol encoding: minimal dependencies, for use inside VSCode. +*/ + +'use strict'; + +import * as fs from 'fs' +import { TextEncoder, TextDecoder } from 'util' + + +/* defined symbols */ + +export interface Symbol_Entry +{ + symbol: string, + name: string, + code: number, + abbrevs: string[] +} + +const symbols_file = process.env["ISABELLE_VSCODE_SYMBOLS"] + +export const symbols: [Symbol_Entry] = + symbols_file ? JSON.parse(fs.readFileSync(symbols_file).toString()) : [] + +export const symbols_decode: Map = + new Map(symbols.map((s: Symbol_Entry) => [s.symbol, String.fromCharCode(s.code)])) + +export const symbols_encode: Map = + new Map(symbols.map((s: Symbol_Entry) => [String.fromCharCode(s.code), s.symbol])) + + +/* encoding */ + +export const UTF8_Isabelle = 'utf8-isabelle' + +export interface Options { + stripBOM?: boolean; + addBOM?: boolean; + defaultEncoding?: string; +} + +export interface EncoderStream { + write(str: string): Uint8Array + end(): Uint8Array | undefined +} + +export interface DecoderStream { + write(buf: Uint8Array): string + end(): string | undefined +} + +export function getEncoder(encoding: string, options?: Options): EncoderStream +{ + const utf8_encoder = new TextEncoder + return null +} + +export function getDecoder(encoding: string, options?: Options): DecoderStream +{ + const utf8_decoder = new TextDecoder + return null +} diff -r 21b2e37e0300 -r 810d16927cdc src/Tools/VSCode/extension/src/symbol.ts --- a/src/Tools/VSCode/extension/src/symbol.ts Mon Mar 07 15:28:53 2022 +0100 +++ b/src/Tools/VSCode/extension/src/symbol.ts Tue Mar 08 09:35:39 2022 +0100 @@ -1,12 +1,16 @@ /* Author: Makarius -Isabelle text symbols (see Pure/General/symbol.scala). +Isabelle text symbols versus UTF-8/Unicode encoding. See also: + + - Pure/General/symbol.scala + - Pure/General/utf8.scala + - https://encoding.spec.whatwg.org */ 'use strict'; -import * as library from './library' import * as file from './file' +import * as isabelle_encoding from './isabelle_encoding' /* ASCII characters */ @@ -41,13 +45,7 @@ /* defined symbols */ -export interface Entry -{ - symbol: Symbol, - name: string, - code: number, - abbrevs: string[] -} +export type Entry = isabelle_encoding.Symbol_Entry export class Symbols { @@ -72,17 +70,6 @@ { return this.entries_map.has(sym) } - - public decode(sym: Symbol): string | undefined - { - const entry = this.get(sym) - const code = entry ? entry.code : undefined - return code ? String.fromCharCode(code) : undefined - } } -export async function load_symbols(path: string): Promise -{ - const entries = await file.read_json<[Entry]>(path) - return new Symbols(entries) -} +export const symbols: Symbols = new Symbols(isabelle_encoding.symbols)