src/Pure/General/symbol.ML
changeset 40523 1050315f6ee2
parent 40509 0bc83ae22789
child 40627 becf5d5187cc
equal deleted inserted replaced
40522:37b79d789d91 40523:1050315f6ee2
    22   val is_eof: symbol -> bool
    22   val is_eof: symbol -> bool
    23   val not_eof: symbol -> bool
    23   val not_eof: symbol -> bool
    24   val stopper: symbol Scan.stopper
    24   val stopper: symbol Scan.stopper
    25   val sync: symbol
    25   val sync: symbol
    26   val is_sync: symbol -> bool
    26   val is_sync: symbol -> bool
    27   val malformed: symbol
       
    28   val end_malformed: symbol
       
    29   val separate_chars: string -> string
       
    30   val is_regular: symbol -> bool
    27   val is_regular: symbol -> bool
       
    28   val is_malformed: symbol -> bool
       
    29   val malformed_msg: symbol -> string
    31   val is_ascii: symbol -> bool
    30   val is_ascii: symbol -> bool
    32   val is_ascii_letter: symbol -> bool
    31   val is_ascii_letter: symbol -> bool
    33   val is_ascii_digit: symbol -> bool
    32   val is_ascii_digit: symbol -> bool
    34   val is_ascii_hex: symbol -> bool
    33   val is_ascii_hex: symbol -> bool
    35   val is_ascii_quasi: symbol -> bool
    34   val is_ascii_quasi: symbol -> bool
    40   val to_ascii_lower: symbol -> symbol
    39   val to_ascii_lower: symbol -> symbol
    41   val to_ascii_upper: symbol -> symbol
    40   val to_ascii_upper: symbol -> symbol
    42   val is_raw: symbol -> bool
    41   val is_raw: symbol -> bool
    43   val decode_raw: symbol -> string
    42   val decode_raw: symbol -> string
    44   val encode_raw: string -> string
    43   val encode_raw: string -> string
    45   datatype sym = Char of string | UTF8 of string | Sym of string | Ctrl of string | Raw of string
    44   datatype sym =
       
    45     Char of string | UTF8 of string | Sym of string | Ctrl of string | Raw of string |
       
    46     Malformed of string
    46   val decode: symbol -> sym
    47   val decode: symbol -> sym
    47   datatype kind = Letter | Digit | Quasi | Blank | Other
    48   datatype kind = Letter | Digit | Quasi | Blank | Other
    48   val kind: symbol -> kind
    49   val kind: symbol -> kind
    49   val is_letter: symbol -> bool
    50   val is_letter: symbol -> bool
    50   val is_digit: symbol -> bool
    51   val is_digit: symbol -> bool
    54   val is_letdig: symbol -> bool
    55   val is_letdig: symbol -> bool
    55   val is_ident: symbol list -> bool
    56   val is_ident: symbol list -> bool
    56   val beginning: int -> symbol list -> string
    57   val beginning: int -> symbol list -> string
    57   val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a
    58   val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a
    58   val scan_id: string list -> string * string list
    59   val scan_id: string list -> string * string list
    59   val source: {do_recover: bool} -> (string, 'a) Source.source ->
    60   val source: (string, 'a) Source.source -> (symbol, (string, 'a) Source.source) Source.source
    60     (symbol, (string, 'a) Source.source) Source.source
       
    61   val explode: string -> symbol list
    61   val explode: string -> symbol list
    62   val esc: symbol -> string
    62   val esc: symbol -> string
    63   val escape: string -> string
    63   val escape: string -> string
    64   val strip_blanks: string -> string
    64   val strip_blanks: string -> string
    65   val bump_init: string -> string
    65   val bump_init: string -> string
   109 fun is_char s = size s = 1;
   109 fun is_char s = size s = 1;
   110 
   110 
   111 fun is_utf8 s = size s > 0 andalso forall_string (fn c => ord c >= 128) s;
   111 fun is_utf8 s = size s > 0 andalso forall_string (fn c => ord c >= 128) s;
   112 
   112 
   113 fun is_symbolic s =
   113 fun is_symbolic s =
   114   String.isPrefix "\\<" s andalso not (String.isPrefix "\\<^" s);
   114   String.isPrefix "\\<" s andalso String.isSuffix ">" s andalso not (String.isPrefix "\\<^" s);
   115 
   115 
   116 fun is_printable s =
   116 fun is_printable s =
   117   if is_char s then ord space <= ord s andalso ord s <= ord "~"
   117   if is_char s then ord space <= ord s andalso ord s <= ord "~"
   118   else is_utf8 s orelse not (String.isPrefix "\\<^" s);
   118   else is_utf8 s orelse is_symbolic s;
   119 
   119 
   120 
   120 
   121 (* input source control *)
   121 (* input source control *)
   122 
   122 
   123 val eof = "";
   123 val eof = "";
   126 val stopper = Scan.stopper (K eof) is_eof;
   126 val stopper = Scan.stopper (K eof) is_eof;
   127 
   127 
   128 val sync = "\\<^sync>";
   128 val sync = "\\<^sync>";
   129 fun is_sync s = s = sync;
   129 fun is_sync s = s = sync;
   130 
   130 
   131 val malformed = "[[";
   131 fun is_regular s = not_eof s andalso s <> sync;
   132 val end_malformed = "]]";
   132 
   133 
   133 fun is_malformed s = String.isPrefix "\\<" s andalso not (String.isSuffix ">" s);
   134 val separate_chars = explode #> space_implode space;
   134 fun malformed_msg s = "Malformed symbolic character: " ^ quote s;
   135 fun malformed_msg s = "Malformed symbolic character: " ^ quote (separate_chars s);
       
   136 
       
   137 fun is_regular s =
       
   138   not_eof s andalso s <> sync andalso s <> malformed andalso s <> end_malformed;
       
   139 
   135 
   140 
   136 
   141 (* ascii symbols *)
   137 (* ascii symbols *)
   142 
   138 
   143 fun is_ascii s = is_char s andalso ord s < 128;
   139 fun is_ascii s = is_char s andalso ord s < 128;
   223 
   219 
   224 
   220 
   225 (* symbol variants *)
   221 (* symbol variants *)
   226 
   222 
   227 datatype sym =
   223 datatype sym =
   228   Char of string | UTF8 of string | Sym of string | Ctrl of string | Raw of string;
   224   Char of string | UTF8 of string | Sym of string | Ctrl of string | Raw of string |
       
   225   Malformed of string;
   229 
   226 
   230 fun decode s =
   227 fun decode s =
   231   if is_char s then Char s
   228   if is_char s then Char s
   232   else if is_utf8 s then UTF8 s
   229   else if is_utf8 s then UTF8 s
   233   else if is_raw s then Raw (decode_raw s)
   230   else if is_raw s then Raw (decode_raw s)
       
   231   else if is_malformed s then Malformed s
   234   else if String.isPrefix "\\<^" s then Ctrl (String.substring (s, 3, size s - 4))
   232   else if String.isPrefix "\\<^" s then Ctrl (String.substring (s, 3, size s - 4))
   235   else if String.isPrefix "\\<" s then Sym (String.substring (s, 2, size s - 3))
   233   else Sym (String.substring (s, 2, size s - 3));
   236   else error (malformed_msg s);
       
   237 
   234 
   238 
   235 
   239 (* standard symbol kinds *)
   236 (* standard symbol kinds *)
   240 
   237 
   241 datatype kind = Letter | Digit | Quasi | Blank | Other;
   238 datatype kind = Letter | Digit | Quasi | Blank | Other;
   443 
   440 
   444 val scan_raw =
   441 val scan_raw =
   445   Scan.this_string "raw:" ^^ (Scan.many raw_chr >> implode) ||
   442   Scan.this_string "raw:" ^^ (Scan.many raw_chr >> implode) ||
   446   Scan.this_string "raw" ^^ (Scan.many1 is_ascii_digit >> implode);
   443   Scan.this_string "raw" ^^ (Scan.many1 is_ascii_digit >> implode);
   447 
   444 
   448 val scan =
   445 val scan_total =
   449   Scan.one is_plain ||
   446   Scan.one is_plain ||
   450   Scan.one is_utf8 ::: Scan.many is_utf8_trailer >> implode_pseudo_utf8 ||
   447   Scan.one is_utf8 ::: Scan.many is_utf8_trailer >> implode_pseudo_utf8 ||
   451   scan_encoded_newline ||
   448   scan_encoded_newline ||
   452   ($$ "\\" ^^ $$ "<" ^^
   449   ($$ "\\" ^^ $$ "<" ^^ (($$ "^" ^^ (scan_raw || scan_id) || scan_id) ^^ $$ ">")) ||
   453     !! (fn (cs, _) => malformed_msg (beginning 10 ("\\" :: "<" :: cs)))
   450   Scan.this_string "\\<^" ||
   454       (($$ "^" ^^ (scan_raw || scan_id) || scan_id) ^^ $$ ">")) ||
   451   Scan.this_string "\\<" ||
   455   Scan.one not_eof;
   452   Scan.one not_eof;
   456 
   453 
   457 val scan_resync =
       
   458   Scan.one is_ascii_blank || $$ "\"" || $$ "`" || $$ "\\" ||
       
   459   Scan.this_string "(*" || Scan.this_string "*)" ||
       
   460   Scan.this_string "{*" || Scan.this_string "*}";
       
   461 
       
   462 val recover =
       
   463   Scan.this ["\\", "<"] @@@
       
   464     Scan.repeat (Scan.unless scan_resync (Scan.one not_eof))
       
   465   >> (fn ss => malformed :: ss @ [end_malformed]);
       
   466 
       
   467 in
   454 in
   468 
   455 
   469 fun source {do_recover} src =
   456 fun source src = Source.source stopper (Scan.bulk scan_total) NONE src;
   470   Source.source stopper (Scan.bulk scan)
       
   471     (if do_recover then SOME (false, K recover) else NONE) src;
       
   472 
   457 
   473 end;
   458 end;
   474 
   459 
   475 
   460 
   476 (* explode *)
   461 (* explode *)
   485 in
   470 in
   486 
   471 
   487 fun sym_explode str =
   472 fun sym_explode str =
   488   let val chs = explode str in
   473   let val chs = explode str in
   489     if no_explode chs then chs
   474     if no_explode chs then chs
   490     else Source.exhaust (source {do_recover = false} (Source.of_list chs))
   475     else Source.exhaust (source (Source.of_list chs))
   491   end;
   476   end;
   492 
   477 
   493 end;
   478 end;
   494 
   479 
   495 
   480