src/Pure/General/symbol.ML
changeset 50237 e356f86729bc
parent 50236 476a3350589c
child 50238 98d35a7368bd
equal deleted inserted replaced
50236:476a3350589c 50237:e356f86729bc
    52   val is_block_ctrl: symbol -> bool
    52   val is_block_ctrl: symbol -> bool
    53   val is_quasi_letter: symbol -> bool
    53   val is_quasi_letter: symbol -> bool
    54   val is_letdig: symbol -> bool
    54   val is_letdig: symbol -> bool
    55   val is_ident: symbol list -> bool
    55   val is_ident: symbol list -> bool
    56   val beginning: int -> symbol list -> string
    56   val beginning: int -> symbol list -> string
    57   val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a
       
    58   val source: (string, 'a) Source.source -> (symbol, (string, 'a) Source.source) Source.source
    57   val source: (string, 'a) Source.source -> (symbol, (string, 'a) Source.source) Source.source
    59   val explode: string -> symbol list
    58   val explode: string -> symbol list
       
    59   val esc: symbol -> string
       
    60   val escape: string -> string
       
    61   val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a
    60   val split_words: symbol list -> string list
    62   val split_words: symbol list -> string list
    61   val explode_words: string -> string list
    63   val explode_words: string -> string list
    62   val esc: symbol -> string
       
    63   val escape: string -> string
       
    64   val strip_blanks: string -> string
    64   val strip_blanks: string -> string
    65   val bump_init: string -> string
    65   val bump_init: string -> string
    66   val bump_string: string -> string
    66   val bump_string: string -> string
    67   val length: symbol list -> int
    67   val length: symbol list -> int
    68   val xsymbolsN: string
    68   val xsymbolsN: string
   397 val is_block_ctrl = member (op =) ["\\<^bsub>", "\\<^esub>", "\\<^bsup>", "\\<^esup>"];
   397 val is_block_ctrl = member (op =) ["\\<^bsub>", "\\<^esub>", "\\<^bsup>", "\\<^esup>"];
   398 
   398 
   399 fun is_quasi_letter s = let val k = kind s in k = Letter orelse k = Quasi end;
   399 fun is_quasi_letter s = let val k = kind s in k = Letter orelse k = Quasi end;
   400 fun is_letdig s = let val k = kind s in k = Letter orelse k = Digit orelse k = Quasi end;
   400 fun is_letdig s = let val k = kind s in k = Letter orelse k = Digit orelse k = Quasi end;
   401 
   401 
   402 fun is_ident [] = false
       
   403   | is_ident (c :: cs) = is_letter c andalso forall is_letdig cs;
       
   404 
       
   405 
   402 
   406 
   403 
   407 (** symbol input **)
   404 (** symbol input **)
   408 
   405 
   409 (* scanning through symbols *)
   406 (* source *)
       
   407 
       
   408 local
       
   409 
       
   410 fun is_plain s = is_ascii s andalso s <> "\^M" andalso s <> "\\";
       
   411 
       
   412 fun is_utf8_trailer s = is_char s andalso 128 <= ord s andalso ord s < 192;
       
   413 
       
   414 fun implode_pseudo_utf8 (cs as ["\192", c]) =
       
   415       if ord c < 160 then chr (ord c - 128) else implode cs
       
   416   | implode_pseudo_utf8 cs = implode cs;
       
   417 
       
   418 val scan_encoded_newline =
       
   419   $$ "\^M" -- $$ "\n" >> K "\n" ||
       
   420   $$ "\^M" >> K "\n" ||
       
   421   Scan.this_string "\\<^newline>" >> K "\n";
       
   422 
       
   423 val scan_raw =
       
   424   Scan.this_string "raw:" ^^ (Scan.many raw_chr >> implode) ||
       
   425   Scan.this_string "raw" ^^ (Scan.many1 is_ascii_digit >> implode);
       
   426 
       
   427 val scan_total =
       
   428   Scan.one is_plain ||
       
   429   Scan.one is_utf8 ::: Scan.many is_utf8_trailer >> implode_pseudo_utf8 ||
       
   430   scan_encoded_newline ||
       
   431   ($$ "\\" ^^ $$ "<" ^^
       
   432     (($$ "^" ^^ Scan.optional (scan_raw || scan_ascii_id) "" || Scan.optional scan_ascii_id "") ^^
       
   433       Scan.optional ($$ ">") "")) ||
       
   434   Scan.one not_eof;
       
   435 
       
   436 in
       
   437 
       
   438 fun source src = Source.source stopper (Scan.bulk scan_total) NONE src;
       
   439 
       
   440 end;
       
   441 
       
   442 
       
   443 (* explode *)
       
   444 
       
   445 local
       
   446 
       
   447 fun no_explode [] = true
       
   448   | no_explode ("\\" :: "<" :: _) = false
       
   449   | no_explode ("\^M" :: _) = false
       
   450   | no_explode (c :: cs) = is_ascii c andalso no_explode cs;
       
   451 
       
   452 in
       
   453 
       
   454 fun sym_explode str =
       
   455   let val chs = raw_explode str in
       
   456     if no_explode chs then chs
       
   457     else Source.exhaust (source (Source.of_list chs))
       
   458   end;
       
   459 
       
   460 end;
       
   461 
       
   462 
       
   463 (* escape *)
       
   464 
       
   465 val esc = fn s =>
       
   466   if is_char s then s
       
   467   else if is_utf8 s then translate_string (fn c => "\\" ^ string_of_int (ord c)) s
       
   468   else "\\" ^ s;
       
   469 
       
   470 val escape = implode o map esc o sym_explode;
       
   471 
       
   472 
       
   473 
       
   474 (** scanning through symbols **)
       
   475 
       
   476 (* scanner *)
   410 
   477 
   411 fun scanner msg scan syms =
   478 fun scanner msg scan syms =
   412   let
   479   let
   413     fun message (ss, NONE) = (fn () => msg ^ ": " ^ quote (beginning 10 ss))
   480     fun message (ss, NONE) = (fn () => msg ^ ": " ^ quote (beginning 10 ss))
   414       | message (ss, SOME msg') = (fn () => msg ^ ", " ^ msg' () ^ ": " ^ quote (beginning 10 ss));
   481       | message (ss, SOME msg') = (fn () => msg ^ ", " ^ msg' () ^ ": " ^ quote (beginning 10 ss));
   418       (result, []) => result
   485       (result, []) => result
   419     | (_, rest) => error (message (rest, NONE) ()))
   486     | (_, rest) => error (message (rest, NONE) ()))
   420   end;
   487   end;
   421 
   488 
   422 
   489 
   423 (* source *)
       
   424 
       
   425 local
       
   426 
       
   427 fun is_plain s = is_ascii s andalso s <> "\^M" andalso s <> "\\";
       
   428 
       
   429 fun is_utf8_trailer s = is_char s andalso 128 <= ord s andalso ord s < 192;
       
   430 
       
   431 fun implode_pseudo_utf8 (cs as ["\192", c]) =
       
   432       if ord c < 160 then chr (ord c - 128) else implode cs
       
   433   | implode_pseudo_utf8 cs = implode cs;
       
   434 
       
   435 val scan_encoded_newline =
       
   436   $$ "\^M" -- $$ "\n" >> K "\n" ||
       
   437   $$ "\^M" >> K "\n" ||
       
   438   Scan.this_string "\\<^newline>" >> K "\n";
       
   439 
       
   440 val scan_raw =
       
   441   Scan.this_string "raw:" ^^ (Scan.many raw_chr >> implode) ||
       
   442   Scan.this_string "raw" ^^ (Scan.many1 is_ascii_digit >> implode);
       
   443 
       
   444 val scan_total =
       
   445   Scan.one is_plain ||
       
   446   Scan.one is_utf8 ::: Scan.many is_utf8_trailer >> implode_pseudo_utf8 ||
       
   447   scan_encoded_newline ||
       
   448   ($$ "\\" ^^ $$ "<" ^^
       
   449     (($$ "^" ^^ Scan.optional (scan_raw || scan_ascii_id) "" || Scan.optional scan_ascii_id "") ^^
       
   450       Scan.optional ($$ ">") "")) ||
       
   451   Scan.one not_eof;
       
   452 
       
   453 in
       
   454 
       
   455 fun source src = Source.source stopper (Scan.bulk scan_total) NONE src;
       
   456 
       
   457 end;
       
   458 
       
   459 
       
   460 (* explode *)
       
   461 
       
   462 local
       
   463 
       
   464 fun no_explode [] = true
       
   465   | no_explode ("\\" :: "<" :: _) = false
       
   466   | no_explode ("\^M" :: _) = false
       
   467   | no_explode (c :: cs) = is_ascii c andalso no_explode cs;
       
   468 
       
   469 in
       
   470 
       
   471 fun sym_explode str =
       
   472   let val chs = raw_explode str in
       
   473     if no_explode chs then chs
       
   474     else Source.exhaust (source (Source.of_list chs))
       
   475   end;
       
   476 
       
   477 end;
       
   478 
       
   479 
       
   480 (* space-separated words *)
   490 (* space-separated words *)
   481 
   491 
   482 val scan_word =
   492 val scan_word =
   483   Scan.many1 is_ascii_blank >> K NONE ||
   493   Scan.many1 is_ascii_blank >> K NONE ||
   484   Scan.many1 (fn s => not (is_ascii_blank s) andalso not_eof s) >> (SOME o implode);
   494   Scan.many1 (fn s => not (is_ascii_blank s) andalso not_eof s) >> (SOME o implode);
   485 
   495 
   486 val split_words = scanner "Bad text" (Scan.repeat scan_word >> map_filter I);
   496 val split_words = scanner "Bad text" (Scan.repeat scan_word >> map_filter I);
   487 
   497 
   488 val explode_words = split_words o sym_explode;
   498 val explode_words = split_words o sym_explode;
   489 
       
   490 
       
   491 (* escape *)
       
   492 
       
   493 val esc = fn s =>
       
   494   if is_char s then s
       
   495   else if is_utf8 s then translate_string (fn c => "\\" ^ string_of_int (ord c)) s
       
   496   else "\\" ^ s;
       
   497 
       
   498 val escape = implode o map esc o sym_explode;
       
   499 
   499 
   500 
   500 
   501 (* blanks *)
   501 (* blanks *)
   502 
   502 
   503 fun strip_blanks s =
   503 fun strip_blanks s =
   504   sym_explode s
   504   sym_explode s
   505   |> take_prefix is_blank |> #2
   505   |> take_prefix is_blank |> #2
   506   |> take_suffix is_blank |> #1
   506   |> take_suffix is_blank |> #1
   507   |> implode;
   507   |> implode;
       
   508 
       
   509 
       
   510 (* identifiers *)
       
   511 
       
   512 fun is_ident [] = false
       
   513   | is_ident (c :: cs) = is_letter c andalso forall is_letdig cs;
   508 
   514 
   509 
   515 
   510 (* bump string -- treat as base 26 or base 1 numbers *)
   516 (* bump string -- treat as base 26 or base 1 numbers *)
   511 
   517 
   512 fun symbolic_end (_ :: "\\<^isub>" :: _) = true
   518 fun symbolic_end (_ :: "\\<^isub>" :: _) = true