4 */ |
4 */ |
5 |
5 |
6 'use strict'; |
6 'use strict'; |
7 |
7 |
8 |
8 |
9 /* defined symbols */ |
9 /* VSCode interfaces */ |
10 |
|
11 export interface Symbol_Code { |
|
12 symbol: string; |
|
13 code: number; |
|
14 } |
|
15 |
|
16 export const static_symbols: Symbol_Code[] = [/*symbols*/]; |
|
17 |
|
18 export const symbols_decode: Map<string, string> = |
|
19 new Map(static_symbols.map((s: Symbol_Code) => [s.symbol, String.fromCharCode(s.code)])); |
|
20 |
|
21 export const symbols_encode: Map<string, string> = |
|
22 new Map(static_symbols.map((s: Symbol_Code) => [String.fromCharCode(s.code), s.symbol])); |
|
23 |
|
24 |
|
25 /* encoding */ |
|
26 |
|
27 export const ENCODING = 'utf8isabelle'; |
|
28 export const LABEL = 'UTF-8-Isabelle'; |
|
29 |
10 |
30 export interface Options { |
11 export interface Options { |
31 stripBOM?: boolean; |
12 stripBOM?: boolean; |
32 addBOM?: boolean; |
13 addBOM?: boolean; |
33 defaultEncoding?: string; |
14 defaultEncoding?: string; |
34 } |
15 } |
35 |
16 |
|
17 export interface IDecoderStream { |
|
18 write(input: Uint8Array): string; |
|
19 end(): string | undefined; |
|
20 } |
|
21 |
36 export interface IEncoderStream { |
22 export interface IEncoderStream { |
37 write(input: string): Uint8Array; |
23 write(input: string): Uint8Array; |
38 end(): Uint8Array | undefined; |
24 end(): Uint8Array | undefined; |
39 } |
25 } |
40 |
26 |
41 export interface IDecoderStream { |
27 |
42 write(input: Uint8Array): string; |
28 /* ASCII characters */ |
43 end(): string | undefined; |
29 |
|
30 function is_ascii(c: number): boolean { |
|
31 return 0 <= c && c <= 127; |
|
32 } |
|
33 |
|
34 function is_ascii_letter(c: number): boolean { |
|
35 return 65 <= c && c <= 90 || 97 <= c && c <= 122; |
|
36 } |
|
37 |
|
38 function is_ascii_digit(c: number): boolean { |
|
39 return 48 <= c && c <= 57; |
|
40 } |
|
41 |
|
42 function is_ascii_quasi(c: number): boolean { |
|
43 return c === 95 || c === 39; |
|
44 } |
|
45 |
|
46 function is_ascii_letdig(c: number): boolean { |
|
47 return is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c); |
|
48 } |
|
49 |
|
50 |
|
51 /* string buffer */ |
|
52 |
|
53 class String_Buffer { |
|
54 state: string[] |
|
55 |
|
56 constructor() { |
|
57 this.state = []; |
|
58 } |
|
59 |
|
60 add(s: string) { |
|
61 this.state.push(s); |
|
62 } |
|
63 |
|
64 end(): string { |
|
65 const s = this.state.join(''); |
|
66 this.state = []; |
|
67 return s; |
|
68 } |
|
69 } |
|
70 |
|
71 |
|
72 /* Isabelle symbol codes */ |
|
73 |
|
74 function codepoint_string(c: number): string { |
|
75 return String.fromCodePoint(c); |
|
76 } |
|
77 |
|
78 const backslash: number = 92; |
|
79 const caret: number = 94; |
|
80 const bg_symbol: number = 60; |
|
81 const en_symbol: number = 62; |
|
82 |
|
83 interface Symbol_Code { |
|
84 symbol: string; |
|
85 code: number; |
|
86 } |
|
87 |
|
88 export class Symbol_Codes { |
|
89 decode_table: Map<string, string>; |
|
90 encode_table: Map<string, string>; |
|
91 |
|
92 constructor(symbols: Symbol_Code[]) { |
|
93 this.decode_table = new Map(symbols.map(s => [s.symbol, codepoint_string(s.code)])); |
|
94 this.encode_table = new Map(symbols.map(s => [codepoint_string(s.code), s.symbol])); |
|
95 } |
|
96 |
|
97 recode(str: string, do_encode: boolean): string { |
|
98 function ok(i: number): boolean { |
|
99 return 0 <= i && i < str.length; |
|
100 } |
|
101 function char(i: number): number { |
|
102 return ok(i) ? str.charCodeAt(i) : 0; |
|
103 } |
|
104 function maybe_char(c: number, i: number): number { |
|
105 return char(i) === c ? i + 1 : i; |
|
106 } |
|
107 |
|
108 function many_ascii_letdig(i: number): number { |
|
109 let k = i; |
|
110 while (is_ascii_letdig(char(k))) { k += 1 }; |
|
111 return k; |
|
112 } |
|
113 function maybe_ascii_id(i: number): number { |
|
114 return is_ascii_letter(char(i)) ? many_ascii_letdig(i + 1) : i |
|
115 } |
|
116 |
|
117 function scan_ascii(start: number): string { |
|
118 let i = start; |
|
119 while (ok(i)) { |
|
120 const a = char(i); |
|
121 const b = char(i + 1); |
|
122 if (!is_ascii(a) || a === backslash && b === bg_symbol) { break; } |
|
123 else { i += 1; } |
|
124 } |
|
125 return str.substring(start, i); |
|
126 } |
|
127 |
|
128 function scan_symbol(i: number): string { |
|
129 const a = char(i); |
|
130 const b = char(i + 1); |
|
131 if (a === backslash && b === bg_symbol) { |
|
132 const j = maybe_char(en_symbol, maybe_ascii_id(maybe_char(caret, i + 2))); |
|
133 return str.substring(i, j); |
|
134 } |
|
135 else { return ''; } |
|
136 } |
|
137 |
|
138 function scan_codepoint(i: number): string { |
|
139 const c = str.codePointAt(i); |
|
140 return c === undefined ? '' : codepoint_string(c); |
|
141 } |
|
142 |
|
143 |
|
144 const table = do_encode ? this.encode_table : this.decode_table; |
|
145 const result = new String_Buffer(); |
|
146 let i = 0; |
|
147 while (ok(i)) { |
|
148 const ascii = scan_ascii(i) |
|
149 if (ascii) { |
|
150 result.add(ascii) |
|
151 i += ascii.length |
|
152 } |
|
153 else { |
|
154 const s = scan_symbol(i) || scan_codepoint(i); |
|
155 if (s) { |
|
156 const r = table.get(s); |
|
157 result.add(r === undefined ? s : r); |
|
158 i += s.length; |
|
159 } |
|
160 } |
|
161 } |
|
162 return result.end(); |
|
163 } |
|
164 } |
|
165 |
|
166 const symbol_codes: Symbol_Codes = new Symbol_Codes([/*symbols*/]); |
|
167 |
|
168 |
|
169 /* main API */ |
|
170 |
|
171 export const ENCODING = 'utf8isabelle'; |
|
172 export const LABEL = 'UTF-8-Isabelle'; |
|
173 |
|
174 export function getDecoder(): IDecoderStream { |
|
175 const utf8_decoder = new TextDecoder(); |
|
176 const buffer = new String_Buffer(); |
|
177 return { |
|
178 write(input: Uint8Array): string { |
|
179 buffer.add(utf8_decoder.decode(input, { stream: true })); |
|
180 return ''; |
|
181 }, |
|
182 end(): string | undefined { |
|
183 buffer.add(utf8_decoder.decode()); |
|
184 const s = buffer.end(); |
|
185 return symbol_codes.recode(s, false); |
|
186 } |
|
187 }; |
44 } |
188 } |
45 |
189 |
46 export function getEncoder(): IEncoderStream { |
190 export function getEncoder(): IEncoderStream { |
47 const utf8_encoder = new TextEncoder(); |
191 const utf8_encoder = new TextEncoder(); |
|
192 const empty = new Uint8Array(0); |
|
193 const buffer = new String_Buffer(); |
48 return { |
194 return { |
49 write(input: string): Uint8Array { |
195 write(input: string): Uint8Array { |
50 return utf8_encoder.encode(input); |
196 buffer.add(input); |
|
197 return empty; |
51 }, |
198 }, |
52 end(): Uint8Array | undefined { |
199 end(): Uint8Array | undefined { |
53 return utf8_encoder.encode(); |
200 const s = buffer.end(); |
54 } |
201 return utf8_encoder.encode(symbol_codes.recode(s, true)); |
55 }; |
202 } |
56 } |
203 } |
57 |
204 } |
58 export function getDecoder(): IDecoderStream { |
|
59 const utf8TextDecoder = new TextDecoder(); |
|
60 return { |
|
61 write(input: Uint8Array): string { |
|
62 return utf8TextDecoder.decode(input, { stream: true }); |
|
63 }, |
|
64 end(): string | undefined { |
|
65 return utf8TextDecoder.decode(); |
|
66 } |
|
67 }; |
|
68 } |
|