src/Tools/VSCode/patches/isabelle_encoding.ts
changeset 75261 ed83c58c612a
parent 75253 1b1b60db9dda
equal deleted inserted replaced
75260:5a15a2ceafdf 75261:ed83c58c612a
     4 */
     4 */
     5 
     5 
     6 'use strict';
     6 'use strict';
     7 
     7 
     8 
     8 
     9 /* defined symbols */
     9 /* VSCode interfaces */
    10 
       
    11 export interface Symbol_Code {
       
    12   symbol: string;
       
    13   code: number;
       
    14 }
       
    15 
       
    16 export const static_symbols: Symbol_Code[] = [/*symbols*/];
       
    17 
       
    18 export const symbols_decode: Map<string, string> =
       
    19   new Map(static_symbols.map((s: Symbol_Code) => [s.symbol, String.fromCharCode(s.code)]));
       
    20 
       
    21 export const symbols_encode: Map<string, string> =
       
    22   new Map(static_symbols.map((s: Symbol_Code) => [String.fromCharCode(s.code), s.symbol]));
       
    23 
       
    24 
       
    25 /* encoding */
       
    26 
       
    27 export const ENCODING = 'utf8isabelle';
       
    28 export const LABEL = 'UTF-8-Isabelle';
       
    29 
    10 
    30 export interface Options {
    11 export interface Options {
    31   stripBOM?: boolean;
    12   stripBOM?: boolean;
    32   addBOM?: boolean;
    13   addBOM?: boolean;
    33   defaultEncoding?: string;
    14   defaultEncoding?: string;
    34 }
    15 }
    35 
    16 
       
    17 export interface IDecoderStream {
       
    18   write(input: Uint8Array): string;
       
    19   end(): string | undefined;
       
    20 }
       
    21 
    36 export interface IEncoderStream {
    22 export interface IEncoderStream {
    37   write(input: string): Uint8Array;
    23   write(input: string): Uint8Array;
    38   end(): Uint8Array | undefined;
    24   end(): Uint8Array | undefined;
    39 }
    25 }
    40 
    26 
    41 export interface IDecoderStream {
    27 
    42   write(input: Uint8Array): string;
    28 /* ASCII characters */
    43   end(): string | undefined;
    29 
       
    30 function is_ascii(c: number): boolean {
       
    31   return 0 <= c && c <= 127;
       
    32 }
       
    33 
       
    34 function is_ascii_letter(c: number): boolean {
       
    35   return 65 <= c && c <= 90 || 97 <= c && c <= 122;
       
    36 }
       
    37 
       
    38 function is_ascii_digit(c: number): boolean {
       
    39   return 48 <= c && c <= 57;
       
    40 }
       
    41 
       
    42 function is_ascii_quasi(c: number): boolean {
       
    43   return c === 95 || c === 39;
       
    44 }
       
    45 
       
    46 function is_ascii_letdig(c: number): boolean {
       
    47   return is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c);
       
    48 }
       
    49 
       
    50 
       
    51 /* string buffer */
       
    52 
       
    53 class String_Buffer {
       
    54   state: string[]
       
    55 
       
    56   constructor() {
       
    57     this.state = [];
       
    58   }
       
    59 
       
    60   add(s: string) {
       
    61     this.state.push(s);
       
    62   }
       
    63 
       
    64   end(): string {
       
    65     const s = this.state.join('');
       
    66     this.state = [];
       
    67     return s;
       
    68   }
       
    69 }
       
    70 
       
    71 
       
    72 /* Isabelle symbol codes */
       
    73 
       
    74 function codepoint_string(c: number): string {
       
    75   return String.fromCodePoint(c);
       
    76 }
       
    77 
       
    78 const backslash: number = 92;
       
    79 const caret: number = 94;
       
    80 const bg_symbol: number = 60;
       
    81 const en_symbol: number = 62;
       
    82 
       
    83 interface Symbol_Code {
       
    84   symbol: string;
       
    85   code: number;
       
    86 }
       
    87 
       
    88 export class Symbol_Codes {
       
    89   decode_table: Map<string, string>;
       
    90   encode_table: Map<string, string>;
       
    91 
       
    92   constructor(symbols: Symbol_Code[]) {
       
    93     this.decode_table = new Map(symbols.map(s => [s.symbol, codepoint_string(s.code)]));
       
    94     this.encode_table = new Map(symbols.map(s => [codepoint_string(s.code), s.symbol]));
       
    95   }
       
    96 
       
    97   recode(str: string, do_encode: boolean): string {
       
    98     function ok(i: number): boolean {
       
    99       return 0 <= i && i < str.length;
       
   100     }
       
   101     function char(i: number): number {
       
   102       return ok(i) ? str.charCodeAt(i) : 0;
       
   103     }
       
   104     function maybe_char(c: number, i: number): number {
       
   105       return char(i) === c ? i + 1 : i;
       
   106     }
       
   107 
       
   108     function many_ascii_letdig(i: number): number {
       
   109       let k = i;
       
   110       while (is_ascii_letdig(char(k))) { k += 1 };
       
   111       return k;
       
   112     }
       
   113     function maybe_ascii_id(i: number): number {
       
   114       return is_ascii_letter(char(i)) ? many_ascii_letdig(i + 1) : i
       
   115     }
       
   116 
       
   117     function scan_ascii(start: number): string {
       
   118       let i = start;
       
   119       while (ok(i)) {
       
   120         const a = char(i);
       
   121         const b = char(i + 1);
       
   122         if (!is_ascii(a) || a === backslash && b === bg_symbol) { break; }
       
   123         else { i += 1; }
       
   124       }
       
   125       return str.substring(start, i);
       
   126     }
       
   127 
       
   128     function scan_symbol(i: number): string {
       
   129       const a = char(i);
       
   130       const b = char(i + 1);
       
   131       if (a === backslash && b === bg_symbol) {
       
   132         const j = maybe_char(en_symbol, maybe_ascii_id(maybe_char(caret, i + 2)));
       
   133         return str.substring(i, j);
       
   134       }
       
   135       else { return ''; }
       
   136     }
       
   137 
       
   138     function scan_codepoint(i: number): string {
       
   139       const c = str.codePointAt(i);
       
   140       return c === undefined ? '' : codepoint_string(c);
       
   141     }
       
   142 
       
   143 
       
   144     const table = do_encode ? this.encode_table : this.decode_table;
       
   145     const result = new String_Buffer();
       
   146     let i = 0;
       
   147     while (ok(i)) {
       
   148       const ascii = scan_ascii(i)
       
   149       if (ascii) {
       
   150         result.add(ascii)
       
   151         i += ascii.length
       
   152       }
       
   153       else {
       
   154         const s = scan_symbol(i) || scan_codepoint(i);
       
   155         if (s) {
       
   156           const r = table.get(s);
       
   157           result.add(r === undefined ? s : r);
       
   158           i += s.length;
       
   159         }
       
   160       }
       
   161     }
       
   162     return result.end();
       
   163   }
       
   164 }
       
   165 
       
   166 const symbol_codes: Symbol_Codes = new Symbol_Codes([/*symbols*/]);
       
   167 
       
   168 
       
   169 /* main API */
       
   170 
       
   171 export const ENCODING = 'utf8isabelle';
       
   172 export const LABEL = 'UTF-8-Isabelle';
       
   173 
       
   174 export function getDecoder(): IDecoderStream {
       
   175   const utf8_decoder = new TextDecoder();
       
   176   const buffer = new String_Buffer();
       
   177   return {
       
   178     write(input: Uint8Array): string {
       
   179       buffer.add(utf8_decoder.decode(input, { stream: true }));
       
   180       return '';
       
   181     },
       
   182     end(): string | undefined {
       
   183       buffer.add(utf8_decoder.decode());
       
   184       const s = buffer.end();
       
   185       return symbol_codes.recode(s, false);
       
   186     }
       
   187   };
    44 }
   188 }
    45 
   189 
    46 export function getEncoder(): IEncoderStream {
   190 export function getEncoder(): IEncoderStream {
    47   const utf8_encoder = new TextEncoder();
   191   const utf8_encoder = new TextEncoder();
       
   192   const empty = new Uint8Array(0);
       
   193   const buffer = new String_Buffer();
    48   return {
   194   return {
    49     write(input: string): Uint8Array {
   195     write(input: string): Uint8Array {
    50       return utf8_encoder.encode(input);
   196       buffer.add(input);
       
   197       return empty;
    51     },
   198     },
    52     end(): Uint8Array | undefined {
   199     end(): Uint8Array | undefined {
    53       return utf8_encoder.encode();
   200       const s = buffer.end();
    54     }
   201       return utf8_encoder.encode(symbol_codes.recode(s, true));
    55   };
   202     }
    56 }
   203   }
    57 
   204 }
    58 export function getDecoder(): IDecoderStream {
       
    59   const utf8TextDecoder = new TextDecoder();
       
    60   return {
       
    61     write(input: Uint8Array): string {
       
    62       return utf8TextDecoder.decode(input, { stream: true });
       
    63     },
       
    64     end(): string | undefined {
       
    65       return utf8TextDecoder.decode();
       
    66     }
       
    67   };
       
    68 }