src/Pure/General/symbol.ML
changeset 23676 ea9b7e9c2301
parent 23618 32ee8cac5c02
child 23682 cf4773532006
equal deleted inserted replaced
23675:2d618c190466 23676:ea9b7e9c2301
    19   val stopper: symbol * (symbol -> bool)
    19   val stopper: symbol * (symbol -> bool)
    20   val sync: symbol
    20   val sync: symbol
    21   val is_sync: symbol -> bool
    21   val is_sync: symbol -> bool
    22   val not_sync: symbol -> bool
    22   val not_sync: symbol -> bool
    23   val malformed: symbol
    23   val malformed: symbol
       
    24   val end_malformed: symbol
    24   val is_ascii: symbol -> bool
    25   val is_ascii: symbol -> bool
    25   val is_ascii_letter: symbol -> bool
    26   val is_ascii_letter: symbol -> bool
    26   val is_hex_letter: symbol -> bool
    27   val is_hex_letter: symbol -> bool
    27   val is_ascii_digit: symbol -> bool
    28   val is_ascii_digit: symbol -> bool
    28   val is_ascii_quasi: symbol -> bool
    29   val is_ascii_quasi: symbol -> bool
    46   val is_letdig: symbol -> bool
    47   val is_letdig: symbol -> bool
    47   val is_ident: symbol list -> bool
    48   val is_ident: symbol list -> bool
    48   val beginning: int -> symbol list -> string
    49   val beginning: int -> symbol list -> string
    49   val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a
    50   val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a
    50   val scan_id: string list -> string * string list
    51   val scan_id: string list -> string * string list
    51   val scan: string list -> symbol * string list
       
    52   val source: bool -> (string, 'a) Source.source ->
    52   val source: bool -> (string, 'a) Source.source ->
    53     (symbol, (string, 'a) Source.source) Source.source
    53     (symbol, (string, 'a) Source.source) Source.source
    54   val explode: string -> symbol list
    54   val explode: string -> symbol list
    55   val chars_only: string -> bool
       
    56   val escape: string -> string
    55   val escape: string -> string
    57   val strip_blanks: string -> string
    56   val strip_blanks: string -> string
    58   val bump_init: string -> string
    57   val bump_init: string -> string
    59   val bump_string: string -> string
    58   val bump_string: string -> string
    60   val length: symbol list -> int
    59   val length: symbol list -> int
   111 
   110 
   112 val sync = "\\<^sync>";
   111 val sync = "\\<^sync>";
   113 fun is_sync s = s = sync;
   112 fun is_sync s = s = sync;
   114 fun not_sync s = s <> sync;
   113 fun not_sync s = s <> sync;
   115 
   114 
   116 val malformed = "\\<^malformed>";
   115 val malformed = "[[";
       
   116 val end_malformed = "]]";
       
   117 
       
   118 fun malformed_msg s =  (*Output.escape avoids looping errors*)
       
   119   "Malformed symbolic character: " ^ quote (Output.escape s);
   117 
   120 
   118 
   121 
   119 (* ascii symbols *)
   122 (* ascii symbols *)
   120 
   123 
   121 fun is_ascii s = is_char s andalso ord s < 128;
   124 fun is_ascii s = is_char s andalso ord s < 128;
   154   ord space <= ord c andalso ord c <= ord "~" andalso c <> "." andalso c <> ">"
   157   ord space <= ord c andalso ord c <= ord "~" andalso c <> "." andalso c <> ">"
   155   orelse ord c >= 128;
   158   orelse ord c >= 128;
   156 
   159 
   157 fun encode_raw str =
   160 fun encode_raw str =
   158   let
   161   let
   159     val raw1 = enclose "\\<^raw:" ">" o implode;
   162     val raw0 = enclose "\\<^raw:" ">";
       
   163     val raw1 = raw0 o implode;
   160     val raw2 = enclose "\\<^raw" ">" o string_of_int o ord;
   164     val raw2 = enclose "\\<^raw" ">" o string_of_int o ord;
   161 
   165 
   162     fun encode cs = enc (Library.take_prefix raw_chr cs)
   166     fun encode cs = enc (Library.take_prefix raw_chr cs)
   163     and enc ([], []) = []
   167     and enc ([], []) = []
   164       | enc (cs, []) = [raw1 cs]
   168       | enc (cs, []) = [raw1 cs]
   165       | enc ([], d :: ds) = raw2 d :: encode ds
   169       | enc ([], d :: ds) = raw2 d :: encode ds
   166       | enc (cs, d :: ds) = raw1 cs :: raw2 d :: encode ds;
   170       | enc (cs, d :: ds) = raw1 cs :: raw2 d :: encode ds;
   167   in
   171   in
   168     if exists_string (not o raw_chr) str then implode (encode (explode str))
   172     if exists_string (not o raw_chr) str then implode (encode (explode str))
   169     else enclose "\\<^raw:" ">" str
   173     else raw0 str
   170   end;
   174   end;
   171 
   175 
   172 
   176 
   173 (* diagnostics *)
   177 (* diagnostics *)
   174 
   178 
   181     (drop_blanks (Library.take (n, all_cs))
   185     (drop_blanks (Library.take (n, all_cs))
   182       |> map (fn c => if is_ascii_blank c then space else c)
   186       |> map (fn c => if is_ascii_blank c then space else c)
   183       |> implode) ^ dots
   187       |> implode) ^ dots
   184   end;
   188   end;
   185 
   189 
   186 (*raw encoding avoids looping errors!*)
       
   187 fun malformed_symbol s =
       
   188   "Malformed symbolic character specification: " ^ quote (Output.escape s);
       
   189 
       
   190 
   190 
   191 (* decode_raw *)
   191 (* decode_raw *)
   192 
   192 
   193 fun is_raw s =
   193 fun is_raw s =
   194   String.isPrefix "\\<^raw" s andalso String.isSuffix ">" s;
   194   String.isPrefix "\\<^raw" s andalso String.isSuffix ">" s;
   195 
   195 
   196 fun decode_raw s =
   196 fun decode_raw s =
   197   if not (is_raw s) then error (malformed_symbol s)
   197   if not (is_raw s) then error (malformed_msg s)
   198   else if String.isPrefix "\\<^raw:" s then String.substring (s, 7, size s - 8)
   198   else if String.isPrefix "\\<^raw:" s then String.substring (s, 7, size s - 8)
   199   else chr (#1 (Library.read_int (explode (String.substring (s, 6, size s - 7)))));
   199   else chr (#1 (Library.read_int (explode (String.substring (s, 6, size s - 7)))));
   200 
   200 
   201 
   201 
   202 (* symbol variants *)
   202 (* symbol variants *)
   206 fun decode s =
   206 fun decode s =
   207   if is_char s then Char s
   207   if is_char s then Char s
   208   else if is_raw s then Raw (decode_raw s)
   208   else if is_raw s then Raw (decode_raw s)
   209   else if String.isPrefix "\\<^" s then Ctrl (String.substring (s, 3, size s - 4))
   209   else if String.isPrefix "\\<^" s then Ctrl (String.substring (s, 3, size s - 4))
   210   else if String.isPrefix "\\<" s then Sym (String.substring (s, 2, size s - 3))
   210   else if String.isPrefix "\\<" s then Sym (String.substring (s, 2, size s - 3))
   211   else error (malformed_symbol s);
   211   else error (malformed_msg s);
   212 
   212 
   213 
   213 
   214 (* standard symbol kinds *)
   214 (* standard symbol kinds *)
   215 
   215 
   216 datatype kind = Letter | Digit | Quasi | Blank | Other;
   216 datatype kind = Letter | Digit | Quasi | Blank | Other;
   393     (case fin_scan chs of
   393     (case fin_scan chs of
   394       (result, []) => result
   394       (result, []) => result
   395     | (_, rest) => error (message (rest, NONE)))
   395     | (_, rest) => error (message (rest, NONE)))
   396   end;
   396   end;
   397 
   397 
   398 
       
   399 (* scan *)
       
   400 
       
   401 val scan_id = Scan.one is_letter ^^ (Scan.many is_letdig >> implode);
   398 val scan_id = Scan.one is_letter ^^ (Scan.many is_letdig >> implode);
   402 
   399 
       
   400 
       
   401 (* source *)
       
   402 
   403 local
   403 local
       
   404 
       
   405 fun is_plain s = s <> "\^M" andalso s <> "\\" andalso not_eof s;
   404 
   406 
   405 val scan_encoded_newline =
   407 val scan_encoded_newline =
   406   $$ "\^M" -- $$ "\n" >> K "\n" ||
   408   $$ "\^M" -- $$ "\n" >> K "\n" ||
   407   $$ "\^M" >> K "\n" ||
   409   $$ "\^M" >> K "\n" ||
   408   $$ "\\" -- Scan.optional ($$ "\\") "" -- Scan.this_string "<^newline>" >> K "\n";
   410   $$ "\\" -- Scan.optional ($$ "\\") "" -- Scan.this_string "<^newline>" >> K "\n";
   409 
   411 
   410 val scan_raw =
   412 val scan_raw =
   411   Scan.this_string "raw:" ^^ (Scan.many raw_chr >> implode) ||
   413   Scan.this_string "raw:" ^^ (Scan.many raw_chr >> implode) ||
   412   Scan.this_string "raw" ^^ (Scan.many1 is_ascii_digit >> implode);
   414   Scan.this_string "raw" ^^ (Scan.many1 is_ascii_digit >> implode);
   413 
   415 
   414 in
       
   415 
       
   416 val scan =
   416 val scan =
       
   417   Scan.one is_plain ||
   417   scan_encoded_newline ||
   418   scan_encoded_newline ||
   418   (($$ "\\" --| Scan.optional ($$ "\\") "") ^^ $$ "<" ^^
   419   (($$ "\\" --| Scan.optional ($$ "\\") "") ^^ $$ "<" ^^
   419     !! (fn (cs, _) => malformed_symbol (quote (beginning 10 ("\\" :: "<" :: cs))))
   420     !! (fn (cs, _) => malformed_msg (beginning 10 ("\\" :: "<" :: cs)))
   420       (($$ "^" ^^ (scan_raw || scan_id) || scan_id) ^^ $$ ">")) ||
   421       (($$ "^" ^^ (scan_raw || scan_id) || scan_id) ^^ $$ ">")) ||
   421   Scan.one not_eof;
   422   Scan.one not_eof;
   422 
   423 
       
   424 val recover =
       
   425   Scan.many (fn s => not (is_blank s) andalso s <> "\"" andalso not_eof s)
       
   426     >> (fn ss => malformed :: ss @ [end_malformed]);
       
   427 
       
   428 in
       
   429 
       
   430 fun source do_recover src =
       
   431   Source.source stopper (Scan.bulk scan) (if do_recover then SOME (K recover) else NONE) src;
       
   432 
   423 end;
   433 end;
   424 
   434 
   425 
   435 
   426 (* source *)
       
   427 
       
   428 val recover =
       
   429   Scan.many (fn s => not (is_blank s) andalso s <> "\"" andalso not_eof s) >> K [malformed];
       
   430 
       
   431 fun source do_recover src =
       
   432   Source.source stopper (Scan.bulk scan) (if do_recover then SOME recover else NONE) src;
       
   433 
       
   434 
       
   435 (* explode *)
   436 (* explode *)
       
   437 
       
   438 local
   436 
   439 
   437 fun no_explode [] = true
   440 fun no_explode [] = true
   438   | no_explode ("\\" :: "<" :: _) = false
   441   | no_explode ("\\" :: "<" :: _) = false
   439   | no_explode ("\^M" :: _) = false
   442   | no_explode ("\^M" :: _) = false
   440   | no_explode (_ :: cs) = no_explode cs;
   443   | no_explode (_ :: cs) = no_explode cs;
   441 
   444 
       
   445 in
       
   446 
   442 fun sym_explode str =
   447 fun sym_explode str =
   443   let val chs = explode str in
   448   let val chs = explode str in
   444     if no_explode chs then chs
   449     if no_explode chs then chs
   445     else the (Scan.read stopper (Scan.repeat scan) chs)
   450     else Source.exhaust (source false (Source.of_list chs))
   446   end;
   451   end;
   447 
   452 
   448 val chars_only = not o exists_string (fn s => s = "\\");
   453 end;
   449 
   454 
   450 
   455 
   451 (* escape *)
   456 (* escape *)
   452 
   457 
   453 val escape = implode o map (fn s => if is_char s then s else "\\" ^ s) o sym_explode;
   458 val escape = implode o map (fn s => if is_char s then s else "\\" ^ s) o sym_explode;