src/Tools/WWW_Find/unicode_symbols.ML
author wenzelm
Mon, 20 Jun 2011 23:21:24 +0200
changeset 43485 33a24212a72d
parent 43469 882108e9536a
child 43602 8c89a1fb30f2
permissions -rw-r--r--
more tolerant Symbol.decode;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
33823
24090eae50b6 standardized headers;
wenzelm
parents: 33817
diff changeset
     1
(*  Title:      Tools/WWW_Find/unicode_symbols.ML
33817
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
     2
    Author:     Timothy Bourke, NICTA
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
     3
33823
24090eae50b6 standardized headers;
wenzelm
parents: 33817
diff changeset
     4
Parse the ISABELLE_HOME/etc/symbols file.
33817
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
     5
*)
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
     6
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
     7
signature UNICODE_SYMBOLS =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
     8
sig
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
     9
val symbol_to_unicode : string -> int option
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    10
val abbrev_to_unicode : string -> int option
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    11
val unicode_to_symbol : int -> string option
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    12
val unicode_to_abbrev : int -> string option
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    13
val utf8_to_symbols   : string -> string
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    14
val utf8              : int list -> string
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    15
end;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    16
33823
24090eae50b6 standardized headers;
wenzelm
parents: 33817
diff changeset
    17
structure UnicodeSymbols : UNICODE_SYMBOLS =
33817
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    18
struct
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    19
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    20
local (* Lexer *)
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    21
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    22
open Basic_Symbol_Pos
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    23
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    24
val keywords =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    25
  Scan.make_lexicon (map Symbol.explode ["code", "font", "abbrev", ":"]);
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    26
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    27
datatype token_kind =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    28
  Symbol | AsciiSymbol | Keyword | Name | Code | Space | Comment | EOF;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    29
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    30
datatype token = Token of token_kind * string * Position.range;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    31
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    32
fun token kind ss = Token (kind, Symbol_Pos.content ss, Symbol_Pos.range ss);
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    33
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    34
in
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    35
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    36
fun mk_eof pos = Token (EOF, "", (pos, Position.none));
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    37
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    38
fun str_of_token (Token (_, s, _)) = s;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    39
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    40
fun pos_of_token (Token (_, _, (pos, _))) = pos;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    41
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    42
fun int_of_code (Token (Code, s, _)) = #value (Lexicon.read_xnum s)
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    43
  | int_of_code _ = error "internal error in UnicodeSymbols.int_of_code"
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    44
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    45
fun is_proper (Token (Space, _, _)) = false
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    46
  | is_proper (Token (Comment, _, _)) = false
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    47
  | is_proper _ = true;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    48
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    49
fun is_keyword kw (Token (Keyword, kw', _)) = (kw = kw')
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    50
  | is_keyword _ _ = false;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    51
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    52
fun is_ascii_sym (Token (AsciiSymbol, _, _)) = true
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    53
  | is_ascii_sym _ = false;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    54
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    55
fun is_hex_code (Token (Code, _, _)) = true
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    56
  | is_hex_code _ = false;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    57
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    58
fun is_symbol (Token (Symbol, _, _)) = true
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    59
  | is_symbol _ = false;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    60
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    61
fun is_name (Token (Name, _, _)) = true
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    62
  | is_name _ = false;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    63
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    64
fun is_eof (Token (EOF, _, _)) = true
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    65
  | is_eof _ = false;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    66
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    67
fun end_position_of (Token (_, _, (_, epos))) = epos;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    68
40527
e0c000e40c05 qualified Symbol_Pos.symbol;
wenzelm
parents: 36960
diff changeset
    69
val is_space = Symbol_Pos.symbol #> (fn s => Symbol.is_blank s andalso s <> "\n");
33817
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    70
val scan_space =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    71
  (Scan.many1 is_space @@@ Scan.optional ($$$ "\n") []
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    72
   ||
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    73
   Scan.many is_space @@@ ($$$ "\n")) >> token Space;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    74
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    75
val scan_code = Lexicon.scan_hex #>> token Code;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    76
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    77
val scan_ascii_symbol = Scan.one
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    78
  ((fn x => Symbol.is_ascii x andalso
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    79
      not (Symbol.is_ascii_letter x
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    80
           orelse Symbol.is_ascii_digit x
40527
e0c000e40c05 qualified Symbol_Pos.symbol;
wenzelm
parents: 36960
diff changeset
    81
           orelse Symbol.is_ascii_blank x)) o Symbol_Pos.symbol)
e0c000e40c05 qualified Symbol_Pos.symbol;
wenzelm
parents: 36960
diff changeset
    82
  -- Scan.many (not o Symbol.is_ascii_blank o Symbol_Pos.symbol)
33817
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    83
  >> (token AsciiSymbol o op ::);
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    84
40627
becf5d5187cc renamed raw "explode" function to "raw_explode" to emphasize its meaning;
wenzelm
parents: 40527
diff changeset
    85
fun not_contains xs c = not (member (op =) (raw_explode xs) (Symbol_Pos.symbol c));
33817
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    86
val scan_comment =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    87
  $$$ "#" |-- (Scan.many (not_contains "\n") @@@ ($$$ "\n"))
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    88
  >> token Comment;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    89
43469
882108e9536a accept control symbols;
wenzelm
parents: 40627
diff changeset
    90
fun is_sym s =
43485
33a24212a72d more tolerant Symbol.decode;
wenzelm
parents: 43469
diff changeset
    91
  (case Symbol.decode s of
33a24212a72d more tolerant Symbol.decode;
wenzelm
parents: 43469
diff changeset
    92
    Symbol.Sym _ => true
33a24212a72d more tolerant Symbol.decode;
wenzelm
parents: 43469
diff changeset
    93
  | Symbol.Ctrl _ => true
33a24212a72d more tolerant Symbol.decode;
wenzelm
parents: 43469
diff changeset
    94
  | _ => false);
43469
882108e9536a accept control symbols;
wenzelm
parents: 40627
diff changeset
    95
33817
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    96
fun tokenize syms =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    97
  let
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
    98
    val scanner =
43469
882108e9536a accept control symbols;
wenzelm
parents: 40627
diff changeset
    99
      Scan.one (is_sym o Symbol_Pos.symbol) >> (token Symbol o single) ||
33817
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   100
      scan_comment ||
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   101
      scan_space ||
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   102
      scan_code ||
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   103
      Scan.literal keywords >> token Keyword ||
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   104
      scan_ascii_symbol ||
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   105
      Lexicon.scan_id >> token Name;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   106
    val scan_token = Symbol_Pos.!!! "Lexical error" (Scan.bulk scanner);
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   107
  in
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   108
    (case Scan.error (Scan.finite Symbol_Pos.stopper scan_token) syms of
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   109
      (toks, []) => toks
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   110
    | (_, ss) => error ("Lexical error at: " ^ Symbol_Pos.content ss
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   111
                        ^ Position.str_of (#1 (Symbol_Pos.range ss))))
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   112
  end;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   113
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   114
val stopper =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   115
  Scan.stopper
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   116
    (fn [] => mk_eof Position.none
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   117
      | toks => mk_eof (end_position_of (List.last toks))) is_eof;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   118
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   119
end;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   120
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   121
local (* Parser *)
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   122
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   123
fun $$$ kw = Scan.one (is_keyword kw) >> str_of_token;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   124
val hex_code = Scan.one is_hex_code >> int_of_code;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   125
val ascii_sym = Scan.one is_ascii_sym >> str_of_token;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   126
val symbol = Scan.one is_symbol >> (fn t => (str_of_token t, pos_of_token t));
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   127
val name = Scan.one is_name >> str_of_token;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   128
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   129
val unicode = $$$ "code" -- $$$ ":" |-- hex_code;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   130
val font = Scan.option ($$$ "font" -- $$$ ":" |-- name);
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   131
val abbr = Scan.option ($$$ "abbrev" -- $$$ ":"
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   132
                        |-- (ascii_sym || $$$ ":" || name));
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   133
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   134
in
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   135
36960
01594f816e3a prefer structure Keyword, Parse, Parse_Spec, Outer_Syntax;
wenzelm
parents: 36692
diff changeset
   136
val line = (symbol -- unicode --| font -- abbr) >> Parse.triple1;
33817
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   137
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   138
val symbols_path =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   139
  [getenv "ISABELLE_HOME", "etc", "symbols"]
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   140
  |> map Path.explode
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   141
  |> Path.appends;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   142
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   143
end;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   144
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   145
local (* build tables *)
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   146
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   147
fun add_entries ((fromsym, fromabbr, tosym, toabbr), ((sym, pos), uni, oabbr)) =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   148
  (case oabbr of
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   149
     NONE =>
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   150
       (Symtab.update_new (sym, uni) fromsym,
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   151
        fromabbr,
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   152
        Inttab.update (uni, sym) tosym,
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   153
        toabbr)
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   154
   | SOME abbr =>
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   155
       (Symtab.update_new (sym, uni) fromsym,
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   156
        Symtab.update_new (abbr, uni) fromabbr,
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   157
        Inttab.update (uni, sym) tosym,
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   158
        Inttab.update (uni, abbr) toabbr))
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   159
  handle Symtab.DUP sym => error ("Duplicate at" ^ Position.str_of pos)
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   160
       | Inttab.DUP sym => error ("Duplicate code at" ^ Position.str_of pos);
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   161
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   162
in
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   163
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   164
fun read_symbols path =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   165
  let
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   166
    val parsed_lines =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   167
      File.read path
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   168
      |> (fn x => Symbol_Pos.explode (x, Position.file (Path.implode path)))
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   169
      |> tokenize
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   170
      |> filter is_proper
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   171
      |> Scan.finite stopper (Scan.repeat line)
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   172
      |> fst;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   173
  in
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   174
    Library.foldl add_entries
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   175
      ((Symtab.empty, Symtab.empty, Inttab.empty, Inttab.empty),
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   176
       parsed_lines)
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   177
  end;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   178
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   179
end;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   180
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   181
local
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   182
val (fromsym, fromabbr, tosym, toabbr) = read_symbols symbols_path;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   183
in
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   184
val symbol_to_unicode = Symtab.lookup fromsym;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   185
val abbrev_to_unicode = Symtab.lookup fromabbr;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   186
val unicode_to_symbol = Inttab.lookup tosym;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   187
val unicode_to_abbrev = Inttab.lookup toabbr;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   188
end;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   189
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   190
fun utf8_to_symbols utf8str =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   191
  let
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   192
    val get_next =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   193
      Substring.getc
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   194
      #> Option.map (apfst Byte.charToByte);
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   195
    val wstr = String.str o Byte.byteToChar;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   196
    val replacement_char = "\<questiondown>";
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   197
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   198
    fun word_to_symbol w =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   199
      (case (unicode_to_symbol o Word32.toInt) w of
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   200
         NONE => "?"
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   201
       | SOME s => s);
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   202
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   203
    fun andb32 (w1, w2) =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   204
      Word8.andb(w1, w2)
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   205
      |> Word8.toLarge
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   206
      |> Word32.fromLarge;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   207
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   208
    fun read_next (ss, 0, c) = (word_to_symbol c, ss)
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   209
      | read_next (ss, n, c) =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   210
          (case get_next ss of
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   211
             NONE => (replacement_char, ss)
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   212
           | SOME (w, ss') =>
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   213
               if Word8.andb (w, 0wxc0) <> 0wx80
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   214
               then (replacement_char, ss')
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   215
               else
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   216
                 let
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   217
                   val w' = (Word8.andb (w, 0wx3f));
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   218
                   val bw = (Word32.fromLarge o Word8.toLarge) w';
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   219
                   val c' = Word32.<< (c, 0wx6);
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   220
                 in read_next (ss', n - 1, Word32.orb (c', bw)) end);
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   221
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   222
    fun do_char (w, ss) =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   223
      if Word8.andb (w, 0wx80) = 0wx00
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   224
      then (wstr w, ss)
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   225
      else if Word8.andb (w, 0wx60) = 0wx40
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   226
      then read_next (ss, 1, andb32 (w, 0wx1f))
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   227
      else if Word8.andb (w, 0wxf0) = 0wxe0
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   228
      then read_next (ss, 2, andb32 (w, 0wx0f))
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   229
      else if Word8.andb (w, 0wxf8) = 0wxf0
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   230
      then read_next (ss, 3, andb32 (w, 0wx07))
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   231
      else (replacement_char, ss);
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   232
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   233
    fun read (rs, ss) =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   234
      (case Option.map do_char (get_next ss) of
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   235
         NONE => (implode o rev) rs
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   236
       | SOME (s, ss') => read (s::rs, ss'));
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   237
  in read ([], Substring.full utf8str) end;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   238
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   239
local
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   240
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   241
fun consec n =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   242
  fn w => (
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   243
    Word32.>> (w, Word.fromInt (n * 6))
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   244
    |> (curry Word32.andb) 0wx3f
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   245
    |> (curry Word32.orb) 0wx80
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   246
    |> Word8.fromLargeWord o Word32.toLargeWord);
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   247
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   248
fun stamp n =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   249
  fn w => (
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   250
    Word32.>> (w, Word.fromInt (n * 6))
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   251
    |> (curry Word32.andb) (Word32.>> (0wx000000ff, Word.fromInt (n + 2)))
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   252
    |> (curry Word32.orb) (Word32.<< (0wxffffffff, Word.fromInt (7 - n)))
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   253
    |> Word8.fromLargeWord o Word32.toLargeWord);
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   254
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   255
fun to_utf8_bytes i =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   256
  if i <= 0x007f
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   257
  then [Word8.fromInt i]
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   258
  else let
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   259
    val w = Word32.fromInt i;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   260
  in
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   261
    if i < 0x07ff
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   262
    then [stamp 1 w, consec 0 w]
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   263
    else if i < 0xffff
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   264
    then [stamp 2 w, consec 1 w, consec 0 w]
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   265
    else if i < 0x10ffff
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   266
    then [stamp 3 w, consec 2 w, consec 1 w, consec 0 w]
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   267
    else []
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   268
  end;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   269
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   270
in
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   271
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   272
fun utf8 is =
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   273
  map to_utf8_bytes is
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   274
  |> flat
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   275
  |> Word8Vector.fromList
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   276
  |> Byte.bytesToString;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   277
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   278
end
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   279
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   280
end;
f6a4da31f2f1 WWW_Find component: find_theorems via web browser
kleing
parents:
diff changeset
   281