improved position handling due to SymbolPos.T;
authorwenzelm
Thu Aug 07 13:45:05 2008 +0200 (2008-08-07)
changeset 27769ad50c38ef842
parent 27768 398c64b2acef
child 27770 10d84e124a2f
improved position handling due to SymbolPos.T;
SymbolPos.scan_comment_body;
tuned;
src/Pure/Isar/outer_lex.ML
     1.1 --- a/src/Pure/Isar/outer_lex.ML	Thu Aug 07 13:45:03 2008 +0200
     1.2 +++ b/src/Pure/Isar/outer_lex.ML	Thu Aug 07 13:45:05 2008 +0200
     1.3 @@ -36,22 +36,17 @@
     1.4    val unparse: token -> string
     1.5    val text_of: token -> string * string
     1.6    val is_sid: string -> bool
     1.7 -  val !!! : string -> (Position.T * 'a -> 'b) -> Position.T * 'a -> 'b
     1.8 -  val scan_quoted: Position.T * Symbol.symbol list ->
     1.9 -    Symbol.symbol list * (Position.T * Symbol.symbol list)
    1.10 -  val scan: (Scan.lexicon * Scan.lexicon) ->
    1.11 -    Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list)
    1.12 -  val source: bool option -> (unit -> Scan.lexicon * Scan.lexicon) ->
    1.13 -    Position.T -> (Symbol.symbol, 'a) Source.source ->
    1.14 -    (token, Position.T * (Symbol.symbol, 'a) Source.source) Source.source
    1.15 -  val source_proper: (token, 'a) Source.source ->
    1.16 -    (token, (token, 'a) Source.source) Source.source
    1.17 +  val !!! : string -> (SymbolPos.T list -> 'a) -> SymbolPos.T list -> 'a
    1.18 +  val scan_quoted: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list
    1.19 +  val source_proper: (token, 'a) Source.source -> (token, (token, 'a) Source.source) Source.source
    1.20 +  val source: bool Option.option -> (unit -> Scan.lexicon * Scan.lexicon) ->
    1.21 +    Position.T -> (Symbol.symbol, 'a) Source.source -> (token,
    1.22 +      (SymbolPos.T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source
    1.23  end;
    1.24  
    1.25  structure OuterLex: OUTER_LEX =
    1.26  struct
    1.27  
    1.28 -
    1.29  (** tokens **)
    1.30  
    1.31  (* datatype token *)
    1.32 @@ -60,7 +55,7 @@
    1.33    Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | Nat |
    1.34    String | AltString | Verbatim | Space | Comment | Malformed | Error of string | Sync | EOF;
    1.35  
    1.36 -datatype token = Token of (Position.range * string) * (token_kind * string);
    1.37 +datatype token = Token of (string * Position.range) * (token_kind * string);
    1.38  
    1.39  val str_of_kind =
    1.40   fn Command => "command"
    1.41 @@ -85,15 +80,15 @@
    1.42  
    1.43  (* position *)
    1.44  
    1.45 -fun position_of (Token (((pos, _), _), _)) = pos;
    1.46 -fun end_position_of (Token (((_, pos), _), _)) = pos;
    1.47 +fun position_of (Token ((_, (pos, _)), _)) = pos;
    1.48 +fun end_position_of (Token ((_, (_, pos)), _)) = pos;
    1.49  
    1.50  val pos_of = Position.str_of o position_of;
    1.51  
    1.52  
    1.53  (* control tokens *)
    1.54  
    1.55 -fun mk_eof pos = Token (((pos, Position.none), ""), (EOF, ""));
    1.56 +fun mk_eof pos = Token (("", (pos, Position.none)), (EOF, ""));
    1.57  val eof = mk_eof Position.none;
    1.58  
    1.59  fun is_eof (Token (_, (EOF, _))) = true
    1.60 @@ -111,7 +106,6 @@
    1.61  (* kind of token *)
    1.62  
    1.63  fun kind_of (Token (_, (k, _))) = k;
    1.64 -
    1.65  fun is_kind k (Token (_, (k', _))) = k = k';
    1.66  
    1.67  fun keyword_with pred (Token (_, (Keyword, x))) = pred x
    1.68 @@ -150,9 +144,8 @@
    1.69  
    1.70  fun val_of (Token (_, (_, x))) = x;
    1.71  
    1.72 -fun source_of (Token ((range, src), _)) =
    1.73 -  XML.Elem (Markup.tokenN, Position.properties_of (Position.encode_range range), [XML.Text src])
    1.74 -  |> YXML.string_of;
    1.75 +fun source_of (Token ((src, (pos, _)), _)) =
    1.76 +  YXML.string_of (XML.Elem (Markup.tokenN, Position.properties_of pos, [XML.Text src]));
    1.77  
    1.78  
    1.79  (* unparse *)
    1.80 @@ -188,13 +181,11 @@
    1.81  
    1.82  (** scanners **)
    1.83  
    1.84 -fun change_prompt scan = Scan.prompt "# " scan;
    1.85 -
    1.86 +open BasicSymbolPos;
    1.87  
    1.88 -(* diagnostics *)
    1.89 +fun !!! msg = SymbolPos.!!! ("Outer lexical error: " ^ msg);
    1.90  
    1.91 -fun lex_err msg ((pos, cs), _) = "Outer lexical error" ^ Position.str_of pos ^ ": " ^ msg cs;
    1.92 -fun !!! msg scan = Scan.!! (lex_err (K msg)) scan;
    1.93 +fun change_prompt scan = Scan.prompt "# " scan;
    1.94  
    1.95  
    1.96  (* scan symbolic idents *)
    1.97 @@ -202,8 +193,8 @@
    1.98  val is_sym_char = member (op =) (explode "!#$%&*+-/<=>?@^_|~");
    1.99  
   1.100  val scan_symid =
   1.101 -  Scan.many1 is_sym_char ||
   1.102 -  Scan.one Symbol.is_symbolic >> single;
   1.103 +  Scan.many1 (is_sym_char o symbol) ||
   1.104 +  Scan.one (Symbol.is_symbolic o symbol) >> single;
   1.105  
   1.106  fun is_symid str =
   1.107    (case try Symbol.explode str of
   1.108 @@ -222,31 +213,27 @@
   1.109  local
   1.110  
   1.111  val char_code =
   1.112 -  Scan.one Symbol.is_ascii_digit --
   1.113 -  Scan.one Symbol.is_ascii_digit --
   1.114 -  Scan.one Symbol.is_ascii_digit :|--
   1.115 -  (fn ((a, b), c) =>
   1.116 +  Scan.one (Symbol.is_ascii_digit o symbol) --
   1.117 +  Scan.one (Symbol.is_ascii_digit o symbol) --
   1.118 +  Scan.one (Symbol.is_ascii_digit o symbol) :|--
   1.119 +  (fn (((a, pos), (b, _)), (c, _)) =>
   1.120      let val (n, _) = Library.read_int [a, b, c]
   1.121 -    in if n <= 255 then Scan.succeed [chr n, Symbol.DEL, Symbol.DEL] else Scan.fail end);
   1.122 +    in if n <= 255 then Scan.succeed [(chr n, pos)] else Scan.fail end);
   1.123  
   1.124  fun scan_str q =
   1.125 -  Scan.lift ($$ "\\") |-- !!! "bad escape character in string"
   1.126 -    (Scan.lift (($$ q || $$ "\\") >> single || char_code) >> cons Symbol.DEL) ||
   1.127 -  Scan.lift (Scan.one (fn s => s <> q andalso s <> "\\" andalso Symbol.is_regular s) >> single);
   1.128 +  $$$ "\\" |-- !!! "bad escape character in string" ($$$ q || $$$ "\\" || char_code) ||
   1.129 +  Scan.one (fn (s, _) => s <> q andalso s <> "\\" andalso Symbol.is_regular s) >> single;
   1.130  
   1.131  fun scan_strs q =
   1.132 -  Scan.lift ($$ q) |-- !!! "missing quote at end of string"
   1.133 -    (change_prompt (Scan.repeat (scan_str q) --| Scan.lift ($$ q)))
   1.134 -  >> (fn body => Symbol.DEL :: flat body @ [Symbol.DEL]);
   1.135 +  $$$ q |-- !!! "missing quote at end of string"
   1.136 +    (change_prompt (Scan.repeat (scan_str q) --| $$$ q)) >> flat;
   1.137  
   1.138  in
   1.139  
   1.140  val scan_string = scan_strs "\"";
   1.141  val scan_alt_string = scan_strs "`";
   1.142  
   1.143 -val scan_quoted = Scan.depend (fn pos =>
   1.144 -  Scan.trace (Scan.pass pos (scan_string || scan_alt_string))
   1.145 -    >> (fn (_, syms) => (Position.advance syms pos, syms)));
   1.146 +val scan_quoted = Scan.trace (scan_string || scan_alt_string) >> #2;
   1.147  
   1.148  end;
   1.149  
   1.150 @@ -254,13 +241,12 @@
   1.151  (* scan verbatim text *)
   1.152  
   1.153  val scan_verb =
   1.154 -  $$ "*" --| Scan.ahead (~$$ "}") ||
   1.155 -  Scan.one (fn s => s <> "*" andalso Symbol.is_regular s);
   1.156 +  $$$ "*" --| Scan.ahead (~$$$ "}") ||
   1.157 +  Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s) >> single;
   1.158  
   1.159  val scan_verbatim =
   1.160 -  Scan.lift ($$ "{" |-- $$ "*") |-- !!! "missing end of verbatim text"
   1.161 -    (Scan.lift (change_prompt (Scan.repeat scan_verb --| $$ "*" --| $$ "}")))
   1.162 -  >> (fn body => Symbol.DEL :: Symbol.DEL :: body @ [Symbol.DEL, Symbol.DEL]);
   1.163 +  $$$ "{" |-- $$$ "*" |-- !!! "missing end of verbatim text"
   1.164 +    (change_prompt (Scan.repeat scan_verb --| $$$ "*" --| $$$ "}")) >> flat;
   1.165  
   1.166  
   1.167  (* scan space *)
   1.168 @@ -268,90 +254,72 @@
   1.169  fun is_space s = Symbol.is_blank s andalso s <> "\n";
   1.170  
   1.171  val scan_space =
   1.172 -  (Scan.many1 is_space @@@ Scan.optional ($$ "\n" >> single) [] ||
   1.173 -    Scan.many is_space @@@ ($$ "\n" >> single));
   1.174 +  Scan.many1 (is_space o symbol) @@@ Scan.optional ($$$ "\n") [] ||
   1.175 +  Scan.many (is_space o symbol) @@@ $$$ "\n";
   1.176  
   1.177  
   1.178  (* scan nested comments *)
   1.179  
   1.180  val scan_cmt =
   1.181 -  Scan.depend (fn d => $$ "(" -- $$ "*" >> (fn (a, b) => (d + 1, [a, b]))) ||
   1.182 -  Scan.depend (fn 0 => Scan.fail | d => $$ "*" -- $$ ")" >> (fn (a, b) => (d - 1, [a, b]))) ||
   1.183 -  Scan.lift ($$ "*" --| Scan.ahead (~$$ ")") >> single) ||
   1.184 -  Scan.lift (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s) >> single);
   1.185 +  Scan.depend (fn d => $$$ "(" @@@ $$$ "*" >> pair (d + 1)) ||
   1.186 +  Scan.depend (fn 0 => Scan.fail | d => $$$ "*" @@@ $$$ ")" >> pair (d - 1)) ||
   1.187 +  Scan.lift ($$$ "*" --| Scan.ahead (~$$$ ")")) ||
   1.188 +  Scan.lift (Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s) >> single);
   1.189  
   1.190  val scan_comment =
   1.191 -  Scan.lift ($$ "(" |-- $$ "*") |--
   1.192 -    !!! "missing end of comment"
   1.193 -      (Scan.lift (change_prompt (Scan.pass 0 (Scan.repeat scan_cmt) --| $$ "*" --| $$ ")")))
   1.194 -  >> (fn body => Symbol.DEL :: Symbol.DEL :: flat body @ [Symbol.DEL, Symbol.DEL]);
   1.195 +  $$$ "(" |-- $$$ "*" |-- !!! "missing end of comment"
   1.196 +    (change_prompt (Scan.pass 0 (Scan.repeat scan_cmt >> flat) --| $$$ "*" --| $$$ ")"));
   1.197  
   1.198  
   1.199  (* scan malformed symbols *)
   1.200  
   1.201  val scan_malformed =
   1.202 -  $$ Symbol.malformed |--
   1.203 -    change_prompt (Scan.many Symbol.is_regular)
   1.204 -  --| Scan.option ($$ Symbol.end_malformed);
   1.205 -
   1.206 -
   1.207 -(* scan token *)
   1.208 -
   1.209 -fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2;
   1.210 -
   1.211 -fun advance_range syms pos =
   1.212 -  let val pos' = Position.advance syms pos
   1.213 -  in (Position.encode_range (pos, pos'), pos') end;
   1.214 -
   1.215 -val clean_value = implode o filter_out (fn s => s = Symbol.DEL);
   1.216 +  $$$ Symbol.malformed |--
   1.217 +    change_prompt (Scan.many (Symbol.is_regular o symbol))
   1.218 +  --| Scan.option ($$$ Symbol.end_malformed);
   1.219  
   1.220  
   1.221 -fun scan (lex1, lex2) =
   1.222 -  let
   1.223 -    val scanner = Scan.depend (fn pos => Scan.pass pos
   1.224 -      (scan_string >> pair String ||
   1.225 -        scan_alt_string >> pair AltString ||
   1.226 -        scan_verbatim >> pair Verbatim ||
   1.227 -        scan_comment >> pair Comment ||
   1.228 -        Scan.lift scan_space >> pair Space ||
   1.229 -        Scan.lift scan_malformed >> pair Malformed ||
   1.230 -        Scan.lift (Scan.one Symbol.is_sync >> K (Sync, [Symbol.sync])) ||
   1.231 -        Scan.lift ((Scan.max token_leq
   1.232 -          (Scan.max token_leq
   1.233 -            (Scan.literal lex2 >> pair Command)
   1.234 -            (Scan.literal lex1 >> pair Keyword))
   1.235 -          (Syntax.scan_longid >> pair LongIdent ||
   1.236 -            Syntax.scan_id >> pair Ident ||
   1.237 -            Syntax.scan_var >> pair Var ||
   1.238 -            Syntax.scan_tid >> pair TypeIdent ||
   1.239 -            Syntax.scan_tvar >> pair TypeVar ||
   1.240 -            Syntax.scan_nat >> pair Nat ||
   1.241 -            scan_symid >> pair SymIdent)))) >>
   1.242 -      (fn (k, syms) => 
   1.243 -        let val (range_pos, pos') = advance_range syms pos
   1.244 -        in (pos', Token (((range_pos, pos'), implode syms), (k, clean_value syms))) end));
   1.245  
   1.246 -  in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end;
   1.247 +(** token sources **)
   1.248  
   1.249 -
   1.250 -(* token sources *)
   1.251 +fun source_proper src = src |> Source.filter is_proper;
   1.252  
   1.253  local
   1.254  
   1.255 -val is_junk = (not o Symbol.is_blank) andf Symbol.is_regular;
   1.256 +fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2;
   1.257 +fun token (k, ss) = Token (SymbolPos.implode ss, (k, implode (map symbol ss)));
   1.258  
   1.259 -fun recover msg = Scan.depend (fn pos => Scan.many is_junk >> (fn syms =>
   1.260 -  let val (range_pos, pos') = advance_range syms pos
   1.261 -  in (pos', [Token (((range_pos, pos'), implode syms), (Error msg, clean_value syms))]) end));
   1.262 +fun scan (lex1, lex2) = !!! "bad input"
   1.263 +  (scan_string >> pair String ||
   1.264 +    scan_alt_string >> pair AltString ||
   1.265 +    scan_verbatim >> pair Verbatim ||
   1.266 +    SymbolPos.scan_comment_body !!! >> pair Comment ||
   1.267 +    scan_space >> pair Space ||
   1.268 +    scan_malformed >> pair Malformed ||
   1.269 +    Scan.one (Symbol.is_sync o symbol) >> (fn s => (Sync, [s])) ||
   1.270 +    ((Scan.max token_leq
   1.271 +      (Scan.max token_leq
   1.272 +        (Scan.literal lex2 >> pair Command)
   1.273 +        (Scan.literal lex1 >> pair Keyword))
   1.274 +      (Syntax.scan_longid >> pair LongIdent ||
   1.275 +        Syntax.scan_id >> pair Ident ||
   1.276 +        Syntax.scan_var >> pair Var ||
   1.277 +        Syntax.scan_tid >> pair TypeIdent ||
   1.278 +        Syntax.scan_tvar >> pair TypeVar ||
   1.279 +        Syntax.scan_nat >> pair Nat ||
   1.280 +        scan_symid >> pair SymIdent)))) >> token;
   1.281 +
   1.282 +fun recover msg =
   1.283 +  Scan.many ((Symbol.is_regular andf (not o Symbol.is_blank)) o symbol)
   1.284 +  >> (fn ss => [token (Error msg, ss)]);
   1.285  
   1.286  in
   1.287  
   1.288  fun source do_recover get_lex pos src =
   1.289 -  Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
   1.290 -    (Option.map (rpair recover) do_recover) src;
   1.291 +  SymbolPos.source pos src
   1.292 +  |> Source.source SymbolPos.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
   1.293 +    (Option.map (rpair recover) do_recover);
   1.294  
   1.295  end;
   1.296  
   1.297 -fun source_proper src = src |> Source.filter is_proper;
   1.298 -
   1.299  end;