# HG changeset patch # User wenzelm # Date 1646939779 -3600 # Node ID ed83c58c612ad177f43d9ae533a9475996a32aaa # Parent 5a15a2ceafdfdf9c98f0e022cb171297e42f9c62 actually decode/encode symbols; diff -r 5a15a2ceafdf -r ed83c58c612a src/Tools/VSCode/patches/isabelle_encoding.ts --- a/src/Tools/VSCode/patches/isabelle_encoding.ts Thu Mar 10 12:34:02 2022 +0100 +++ b/src/Tools/VSCode/patches/isabelle_encoding.ts Thu Mar 10 20:16:19 2022 +0100 @@ -6,26 +6,7 @@ 'use strict'; -/* defined symbols */ - -export interface Symbol_Code { - symbol: string; - code: number; -} - -export const static_symbols: Symbol_Code[] = [/*symbols*/]; - -export const symbols_decode: Map = - new Map(static_symbols.map((s: Symbol_Code) => [s.symbol, String.fromCharCode(s.code)])); - -export const symbols_encode: Map = - new Map(static_symbols.map((s: Symbol_Code) => [String.fromCharCode(s.code), s.symbol])); - - -/* encoding */ - -export const ENCODING = 'utf8isabelle'; -export const LABEL = 'UTF-8-Isabelle'; +/* VSCode interfaces */ export interface Options { stripBOM?: boolean; @@ -33,36 +14,191 @@ defaultEncoding?: string; } -export interface IEncoderStream { - write(input: string): Uint8Array; - end(): Uint8Array | undefined; -} - export interface IDecoderStream { write(input: Uint8Array): string; end(): string | undefined; } -export function getEncoder(): IEncoderStream { - const utf8_encoder = new TextEncoder(); +export interface IEncoderStream { + write(input: string): Uint8Array; + end(): Uint8Array | undefined; +} + + +/* ASCII characters */ + +function is_ascii(c: number): boolean { + return 0 <= c && c <= 127; +} + +function is_ascii_letter(c: number): boolean { + return 65 <= c && c <= 90 || 97 <= c && c <= 122; +} + +function is_ascii_digit(c: number): boolean { + return 48 <= c && c <= 57; +} + +function is_ascii_quasi(c: number): boolean { + return c === 95 || c === 39; +} + +function is_ascii_letdig(c: number): boolean { + return is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c); +} + + +/* string buffer */ + +class String_Buffer { + state: string[] + + constructor() { + this.state = []; + } + + add(s: string) { + this.state.push(s); + } + + end(): string { + const s = this.state.join(''); + this.state = []; + return s; + } +} + + +/* Isabelle symbol codes */ + +function codepoint_string(c: number): string { + return String.fromCodePoint(c); +} + +const backslash: number = 92; +const caret: number = 94; +const bg_symbol: number = 60; +const en_symbol: number = 62; + +interface Symbol_Code { + symbol: string; + code: number; +} + +export class Symbol_Codes { + decode_table: Map; + encode_table: Map; + + constructor(symbols: Symbol_Code[]) { + this.decode_table = new Map(symbols.map(s => [s.symbol, codepoint_string(s.code)])); + this.encode_table = new Map(symbols.map(s => [codepoint_string(s.code), s.symbol])); + } + + recode(str: string, do_encode: boolean): string { + function ok(i: number): boolean { + return 0 <= i && i < str.length; + } + function char(i: number): number { + return ok(i) ? str.charCodeAt(i) : 0; + } + function maybe_char(c: number, i: number): number { + return char(i) === c ? i + 1 : i; + } + + function many_ascii_letdig(i: number): number { + let k = i; + while (is_ascii_letdig(char(k))) { k += 1 }; + return k; + } + function maybe_ascii_id(i: number): number { + return is_ascii_letter(char(i)) ? many_ascii_letdig(i + 1) : i + } + + function scan_ascii(start: number): string { + let i = start; + while (ok(i)) { + const a = char(i); + const b = char(i + 1); + if (!is_ascii(a) || a === backslash && b === bg_symbol) { break; } + else { i += 1; } + } + return str.substring(start, i); + } + + function scan_symbol(i: number): string { + const a = char(i); + const b = char(i + 1); + if (a === backslash && b === bg_symbol) { + const j = maybe_char(en_symbol, maybe_ascii_id(maybe_char(caret, i + 2))); + return str.substring(i, j); + } + else { return ''; } + } + + function scan_codepoint(i: number): string { + const c = str.codePointAt(i); + return c === undefined ? '' : codepoint_string(c); + } + + + const table = do_encode ? this.encode_table : this.decode_table; + const result = new String_Buffer(); + let i = 0; + while (ok(i)) { + const ascii = scan_ascii(i) + if (ascii) { + result.add(ascii) + i += ascii.length + } + else { + const s = scan_symbol(i) || scan_codepoint(i); + if (s) { + const r = table.get(s); + result.add(r === undefined ? s : r); + i += s.length; + } + } + } + return result.end(); + } +} + +const symbol_codes: Symbol_Codes = new Symbol_Codes([/*symbols*/]); + + +/* main API */ + +export const ENCODING = 'utf8isabelle'; +export const LABEL = 'UTF-8-Isabelle'; + +export function getDecoder(): IDecoderStream { + const utf8_decoder = new TextDecoder(); + const buffer = new String_Buffer(); return { - write(input: string): Uint8Array { - return utf8_encoder.encode(input); + write(input: Uint8Array): string { + buffer.add(utf8_decoder.decode(input, { stream: true })); + return ''; }, - end(): Uint8Array | undefined { - return utf8_encoder.encode(); + end(): string | undefined { + buffer.add(utf8_decoder.decode()); + const s = buffer.end(); + return symbol_codes.recode(s, false); } }; } -export function getDecoder(): IDecoderStream { - const utf8TextDecoder = new TextDecoder(); +export function getEncoder(): IEncoderStream { + const utf8_encoder = new TextEncoder(); + const empty = new Uint8Array(0); + const buffer = new String_Buffer(); return { - write(input: Uint8Array): string { - return utf8TextDecoder.decode(input, { stream: true }); + write(input: string): Uint8Array { + buffer.add(input); + return empty; }, - end(): string | undefined { - return utf8TextDecoder.decode(); + end(): Uint8Array | undefined { + const s = buffer.end(); + return utf8_encoder.encode(symbol_codes.recode(s, true)); } - }; + } }