scan: changed treatment of malformed symbols, passed to next stage;
authorwenzelm
Mon Jul 09 23:12:42 2007 +0200 (2007-07-09)
changeset 23676ea9b7e9c2301
parent 23675 2d618c190466
child 23677 1114cc909800
scan: changed treatment of malformed symbols, passed to next stage;
tuned sym_explode;
src/Pure/General/symbol.ML
     1.1 --- a/src/Pure/General/symbol.ML	Mon Jul 09 23:12:40 2007 +0200
     1.2 +++ b/src/Pure/General/symbol.ML	Mon Jul 09 23:12:42 2007 +0200
     1.3 @@ -21,6 +21,7 @@
     1.4    val is_sync: symbol -> bool
     1.5    val not_sync: symbol -> bool
     1.6    val malformed: symbol
     1.7 +  val end_malformed: symbol
     1.8    val is_ascii: symbol -> bool
     1.9    val is_ascii_letter: symbol -> bool
    1.10    val is_hex_letter: symbol -> bool
    1.11 @@ -48,11 +49,9 @@
    1.12    val beginning: int -> symbol list -> string
    1.13    val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a
    1.14    val scan_id: string list -> string * string list
    1.15 -  val scan: string list -> symbol * string list
    1.16    val source: bool -> (string, 'a) Source.source ->
    1.17      (symbol, (string, 'a) Source.source) Source.source
    1.18    val explode: string -> symbol list
    1.19 -  val chars_only: string -> bool
    1.20    val escape: string -> string
    1.21    val strip_blanks: string -> string
    1.22    val bump_init: string -> string
    1.23 @@ -113,7 +112,11 @@
    1.24  fun is_sync s = s = sync;
    1.25  fun not_sync s = s <> sync;
    1.26  
    1.27 -val malformed = "\\<^malformed>";
    1.28 +val malformed = "[[";
    1.29 +val end_malformed = "]]";
    1.30 +
    1.31 +fun malformed_msg s =  (*Output.escape avoids looping errors*)
    1.32 +  "Malformed symbolic character: " ^ quote (Output.escape s);
    1.33  
    1.34  
    1.35  (* ascii symbols *)
    1.36 @@ -156,7 +159,8 @@
    1.37  
    1.38  fun encode_raw str =
    1.39    let
    1.40 -    val raw1 = enclose "\\<^raw:" ">" o implode;
    1.41 +    val raw0 = enclose "\\<^raw:" ">";
    1.42 +    val raw1 = raw0 o implode;
    1.43      val raw2 = enclose "\\<^raw" ">" o string_of_int o ord;
    1.44  
    1.45      fun encode cs = enc (Library.take_prefix raw_chr cs)
    1.46 @@ -166,7 +170,7 @@
    1.47        | enc (cs, d :: ds) = raw1 cs :: raw2 d :: encode ds;
    1.48    in
    1.49      if exists_string (not o raw_chr) str then implode (encode (explode str))
    1.50 -    else enclose "\\<^raw:" ">" str
    1.51 +    else raw0 str
    1.52    end;
    1.53  
    1.54  
    1.55 @@ -183,10 +187,6 @@
    1.56        |> implode) ^ dots
    1.57    end;
    1.58  
    1.59 -(*raw encoding avoids looping errors!*)
    1.60 -fun malformed_symbol s =
    1.61 -  "Malformed symbolic character specification: " ^ quote (Output.escape s);
    1.62 -
    1.63  
    1.64  (* decode_raw *)
    1.65  
    1.66 @@ -194,7 +194,7 @@
    1.67    String.isPrefix "\\<^raw" s andalso String.isSuffix ">" s;
    1.68  
    1.69  fun decode_raw s =
    1.70 -  if not (is_raw s) then error (malformed_symbol s)
    1.71 +  if not (is_raw s) then error (malformed_msg s)
    1.72    else if String.isPrefix "\\<^raw:" s then String.substring (s, 7, size s - 8)
    1.73    else chr (#1 (Library.read_int (explode (String.substring (s, 6, size s - 7)))));
    1.74  
    1.75 @@ -208,7 +208,7 @@
    1.76    else if is_raw s then Raw (decode_raw s)
    1.77    else if String.isPrefix "\\<^" s then Ctrl (String.substring (s, 3, size s - 4))
    1.78    else if String.isPrefix "\\<" s then Sym (String.substring (s, 2, size s - 3))
    1.79 -  else error (malformed_symbol s);
    1.80 +  else error (malformed_msg s);
    1.81  
    1.82  
    1.83  (* standard symbol kinds *)
    1.84 @@ -395,13 +395,15 @@
    1.85      | (_, rest) => error (message (rest, NONE)))
    1.86    end;
    1.87  
    1.88 -
    1.89 -(* scan *)
    1.90 -
    1.91  val scan_id = Scan.one is_letter ^^ (Scan.many is_letdig >> implode);
    1.92  
    1.93 +
    1.94 +(* source *)
    1.95 +
    1.96  local
    1.97  
    1.98 +fun is_plain s = s <> "\^M" andalso s <> "\\" andalso not_eof s;
    1.99 +
   1.100  val scan_encoded_newline =
   1.101    $$ "\^M" -- $$ "\n" >> K "\n" ||
   1.102    $$ "\^M" >> K "\n" ||
   1.103 @@ -411,41 +413,44 @@
   1.104    Scan.this_string "raw:" ^^ (Scan.many raw_chr >> implode) ||
   1.105    Scan.this_string "raw" ^^ (Scan.many1 is_ascii_digit >> implode);
   1.106  
   1.107 +val scan =
   1.108 +  Scan.one is_plain ||
   1.109 +  scan_encoded_newline ||
   1.110 +  (($$ "\\" --| Scan.optional ($$ "\\") "") ^^ $$ "<" ^^
   1.111 +    !! (fn (cs, _) => malformed_msg (beginning 10 ("\\" :: "<" :: cs)))
   1.112 +      (($$ "^" ^^ (scan_raw || scan_id) || scan_id) ^^ $$ ">")) ||
   1.113 +  Scan.one not_eof;
   1.114 +
   1.115 +val recover =
   1.116 +  Scan.many (fn s => not (is_blank s) andalso s <> "\"" andalso not_eof s)
   1.117 +    >> (fn ss => malformed :: ss @ [end_malformed]);
   1.118 +
   1.119  in
   1.120  
   1.121 -val scan =
   1.122 -  scan_encoded_newline ||
   1.123 -  (($$ "\\" --| Scan.optional ($$ "\\") "") ^^ $$ "<" ^^
   1.124 -    !! (fn (cs, _) => malformed_symbol (quote (beginning 10 ("\\" :: "<" :: cs))))
   1.125 -      (($$ "^" ^^ (scan_raw || scan_id) || scan_id) ^^ $$ ">")) ||
   1.126 -  Scan.one not_eof;
   1.127 +fun source do_recover src =
   1.128 +  Source.source stopper (Scan.bulk scan) (if do_recover then SOME (K recover) else NONE) src;
   1.129  
   1.130  end;
   1.131  
   1.132  
   1.133 -(* source *)
   1.134 -
   1.135 -val recover =
   1.136 -  Scan.many (fn s => not (is_blank s) andalso s <> "\"" andalso not_eof s) >> K [malformed];
   1.137 +(* explode *)
   1.138  
   1.139 -fun source do_recover src =
   1.140 -  Source.source stopper (Scan.bulk scan) (if do_recover then SOME recover else NONE) src;
   1.141 -
   1.142 -
   1.143 -(* explode *)
   1.144 +local
   1.145  
   1.146  fun no_explode [] = true
   1.147    | no_explode ("\\" :: "<" :: _) = false
   1.148    | no_explode ("\^M" :: _) = false
   1.149    | no_explode (_ :: cs) = no_explode cs;
   1.150  
   1.151 +in
   1.152 +
   1.153  fun sym_explode str =
   1.154    let val chs = explode str in
   1.155      if no_explode chs then chs
   1.156 -    else the (Scan.read stopper (Scan.repeat scan) chs)
   1.157 +    else Source.exhaust (source false (Source.of_list chs))
   1.158    end;
   1.159  
   1.160 -val chars_only = not o exists_string (fn s => s = "\\");
   1.161 +end;
   1.162  
   1.163  
   1.164  (* escape *)