# HG changeset patch # User wenzelm # Date 1218109505 -7200 # Node ID ad50c38ef842d82e5cfff5fe4433b332e32a7558 # Parent 398c64b2acef2c6a901ae038486eed2effa7e46b improved position handling due to SymbolPos.T; SymbolPos.scan_comment_body; tuned; diff -r 398c64b2acef -r ad50c38ef842 src/Pure/Isar/outer_lex.ML --- a/src/Pure/Isar/outer_lex.ML Thu Aug 07 13:45:03 2008 +0200 +++ b/src/Pure/Isar/outer_lex.ML Thu Aug 07 13:45:05 2008 +0200 @@ -36,22 +36,17 @@ val unparse: token -> string val text_of: token -> string * string val is_sid: string -> bool - val !!! : string -> (Position.T * 'a -> 'b) -> Position.T * 'a -> 'b - val scan_quoted: Position.T * Symbol.symbol list -> - Symbol.symbol list * (Position.T * Symbol.symbol list) - val scan: (Scan.lexicon * Scan.lexicon) -> - Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list) - val source: bool option -> (unit -> Scan.lexicon * Scan.lexicon) -> - Position.T -> (Symbol.symbol, 'a) Source.source -> - (token, Position.T * (Symbol.symbol, 'a) Source.source) Source.source - val source_proper: (token, 'a) Source.source -> - (token, (token, 'a) Source.source) Source.source + val !!! : string -> (SymbolPos.T list -> 'a) -> SymbolPos.T list -> 'a + val scan_quoted: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list + val source_proper: (token, 'a) Source.source -> (token, (token, 'a) Source.source) Source.source + val source: bool Option.option -> (unit -> Scan.lexicon * Scan.lexicon) -> + Position.T -> (Symbol.symbol, 'a) Source.source -> (token, + (SymbolPos.T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source end; structure OuterLex: OUTER_LEX = struct - (** tokens **) (* datatype token *) @@ -60,7 +55,7 @@ Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | Nat | String | AltString | Verbatim | Space | Comment | Malformed | Error of string | Sync | EOF; -datatype token = Token of (Position.range * string) * (token_kind * string); +datatype token = Token of (string * Position.range) * (token_kind * string); val str_of_kind = fn Command => "command" @@ -85,15 +80,15 @@ (* position *) -fun position_of (Token (((pos, _), _), _)) = pos; -fun end_position_of (Token (((_, pos), _), _)) = pos; +fun position_of (Token ((_, (pos, _)), _)) = pos; +fun end_position_of (Token ((_, (_, pos)), _)) = pos; val pos_of = Position.str_of o position_of; (* control tokens *) -fun mk_eof pos = Token (((pos, Position.none), ""), (EOF, "")); +fun mk_eof pos = Token (("", (pos, Position.none)), (EOF, "")); val eof = mk_eof Position.none; fun is_eof (Token (_, (EOF, _))) = true @@ -111,7 +106,6 @@ (* kind of token *) fun kind_of (Token (_, (k, _))) = k; - fun is_kind k (Token (_, (k', _))) = k = k'; fun keyword_with pred (Token (_, (Keyword, x))) = pred x @@ -150,9 +144,8 @@ fun val_of (Token (_, (_, x))) = x; -fun source_of (Token ((range, src), _)) = - XML.Elem (Markup.tokenN, Position.properties_of (Position.encode_range range), [XML.Text src]) - |> YXML.string_of; +fun source_of (Token ((src, (pos, _)), _)) = + YXML.string_of (XML.Elem (Markup.tokenN, Position.properties_of pos, [XML.Text src])); (* unparse *) @@ -188,13 +181,11 @@ (** scanners **) -fun change_prompt scan = Scan.prompt "# " scan; - +open BasicSymbolPos; -(* diagnostics *) +fun !!! msg = SymbolPos.!!! ("Outer lexical error: " ^ msg); -fun lex_err msg ((pos, cs), _) = "Outer lexical error" ^ Position.str_of pos ^ ": " ^ msg cs; -fun !!! msg scan = Scan.!! (lex_err (K msg)) scan; +fun change_prompt scan = Scan.prompt "# " scan; (* scan symbolic idents *) @@ -202,8 +193,8 @@ val is_sym_char = member (op =) (explode "!#$%&*+-/<=>?@^_|~"); val scan_symid = - Scan.many1 is_sym_char || - Scan.one Symbol.is_symbolic >> single; + Scan.many1 (is_sym_char o symbol) || + Scan.one (Symbol.is_symbolic o symbol) >> single; fun is_symid str = (case try Symbol.explode str of @@ -222,31 +213,27 @@ local val char_code = - Scan.one Symbol.is_ascii_digit -- - Scan.one Symbol.is_ascii_digit -- - Scan.one Symbol.is_ascii_digit :|-- - (fn ((a, b), c) => + Scan.one (Symbol.is_ascii_digit o symbol) -- + Scan.one (Symbol.is_ascii_digit o symbol) -- + Scan.one (Symbol.is_ascii_digit o symbol) :|-- + (fn (((a, pos), (b, _)), (c, _)) => let val (n, _) = Library.read_int [a, b, c] - in if n <= 255 then Scan.succeed [chr n, Symbol.DEL, Symbol.DEL] else Scan.fail end); + in if n <= 255 then Scan.succeed [(chr n, pos)] else Scan.fail end); fun scan_str q = - Scan.lift ($$ "\\") |-- !!! "bad escape character in string" - (Scan.lift (($$ q || $$ "\\") >> single || char_code) >> cons Symbol.DEL) || - Scan.lift (Scan.one (fn s => s <> q andalso s <> "\\" andalso Symbol.is_regular s) >> single); + $$$ "\\" |-- !!! "bad escape character in string" ($$$ q || $$$ "\\" || char_code) || + Scan.one (fn (s, _) => s <> q andalso s <> "\\" andalso Symbol.is_regular s) >> single; fun scan_strs q = - Scan.lift ($$ q) |-- !!! "missing quote at end of string" - (change_prompt (Scan.repeat (scan_str q) --| Scan.lift ($$ q))) - >> (fn body => Symbol.DEL :: flat body @ [Symbol.DEL]); + $$$ q |-- !!! "missing quote at end of string" + (change_prompt (Scan.repeat (scan_str q) --| $$$ q)) >> flat; in val scan_string = scan_strs "\""; val scan_alt_string = scan_strs "`"; -val scan_quoted = Scan.depend (fn pos => - Scan.trace (Scan.pass pos (scan_string || scan_alt_string)) - >> (fn (_, syms) => (Position.advance syms pos, syms))); +val scan_quoted = Scan.trace (scan_string || scan_alt_string) >> #2; end; @@ -254,13 +241,12 @@ (* scan verbatim text *) val scan_verb = - $$ "*" --| Scan.ahead (~$$ "}") || - Scan.one (fn s => s <> "*" andalso Symbol.is_regular s); + $$$ "*" --| Scan.ahead (~$$$ "}") || + Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s) >> single; val scan_verbatim = - Scan.lift ($$ "{" |-- $$ "*") |-- !!! "missing end of verbatim text" - (Scan.lift (change_prompt (Scan.repeat scan_verb --| $$ "*" --| $$ "}"))) - >> (fn body => Symbol.DEL :: Symbol.DEL :: body @ [Symbol.DEL, Symbol.DEL]); + $$$ "{" |-- $$$ "*" |-- !!! "missing end of verbatim text" + (change_prompt (Scan.repeat scan_verb --| $$$ "*" --| $$$ "}")) >> flat; (* scan space *) @@ -268,90 +254,72 @@ fun is_space s = Symbol.is_blank s andalso s <> "\n"; val scan_space = - (Scan.many1 is_space @@@ Scan.optional ($$ "\n" >> single) [] || - Scan.many is_space @@@ ($$ "\n" >> single)); + Scan.many1 (is_space o symbol) @@@ Scan.optional ($$$ "\n") [] || + Scan.many (is_space o symbol) @@@ $$$ "\n"; (* scan nested comments *) val scan_cmt = - Scan.depend (fn d => $$ "(" -- $$ "*" >> (fn (a, b) => (d + 1, [a, b]))) || - Scan.depend (fn 0 => Scan.fail | d => $$ "*" -- $$ ")" >> (fn (a, b) => (d - 1, [a, b]))) || - Scan.lift ($$ "*" --| Scan.ahead (~$$ ")") >> single) || - Scan.lift (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s) >> single); + Scan.depend (fn d => $$$ "(" @@@ $$$ "*" >> pair (d + 1)) || + Scan.depend (fn 0 => Scan.fail | d => $$$ "*" @@@ $$$ ")" >> pair (d - 1)) || + Scan.lift ($$$ "*" --| Scan.ahead (~$$$ ")")) || + Scan.lift (Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s) >> single); val scan_comment = - Scan.lift ($$ "(" |-- $$ "*") |-- - !!! "missing end of comment" - (Scan.lift (change_prompt (Scan.pass 0 (Scan.repeat scan_cmt) --| $$ "*" --| $$ ")"))) - >> (fn body => Symbol.DEL :: Symbol.DEL :: flat body @ [Symbol.DEL, Symbol.DEL]); + $$$ "(" |-- $$$ "*" |-- !!! "missing end of comment" + (change_prompt (Scan.pass 0 (Scan.repeat scan_cmt >> flat) --| $$$ "*" --| $$$ ")")); (* scan malformed symbols *) val scan_malformed = - $$ Symbol.malformed |-- - change_prompt (Scan.many Symbol.is_regular) - --| Scan.option ($$ Symbol.end_malformed); - - -(* scan token *) - -fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2; - -fun advance_range syms pos = - let val pos' = Position.advance syms pos - in (Position.encode_range (pos, pos'), pos') end; - -val clean_value = implode o filter_out (fn s => s = Symbol.DEL); + $$$ Symbol.malformed |-- + change_prompt (Scan.many (Symbol.is_regular o symbol)) + --| Scan.option ($$$ Symbol.end_malformed); -fun scan (lex1, lex2) = - let - val scanner = Scan.depend (fn pos => Scan.pass pos - (scan_string >> pair String || - scan_alt_string >> pair AltString || - scan_verbatim >> pair Verbatim || - scan_comment >> pair Comment || - Scan.lift scan_space >> pair Space || - Scan.lift scan_malformed >> pair Malformed || - Scan.lift (Scan.one Symbol.is_sync >> K (Sync, [Symbol.sync])) || - Scan.lift ((Scan.max token_leq - (Scan.max token_leq - (Scan.literal lex2 >> pair Command) - (Scan.literal lex1 >> pair Keyword)) - (Syntax.scan_longid >> pair LongIdent || - Syntax.scan_id >> pair Ident || - Syntax.scan_var >> pair Var || - Syntax.scan_tid >> pair TypeIdent || - Syntax.scan_tvar >> pair TypeVar || - Syntax.scan_nat >> pair Nat || - scan_symid >> pair SymIdent)))) >> - (fn (k, syms) => - let val (range_pos, pos') = advance_range syms pos - in (pos', Token (((range_pos, pos'), implode syms), (k, clean_value syms))) end)); - in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end; +(** token sources **) - -(* token sources *) +fun source_proper src = src |> Source.filter is_proper; local -val is_junk = (not o Symbol.is_blank) andf Symbol.is_regular; +fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2; +fun token (k, ss) = Token (SymbolPos.implode ss, (k, implode (map symbol ss))); -fun recover msg = Scan.depend (fn pos => Scan.many is_junk >> (fn syms => - let val (range_pos, pos') = advance_range syms pos - in (pos', [Token (((range_pos, pos'), implode syms), (Error msg, clean_value syms))]) end)); +fun scan (lex1, lex2) = !!! "bad input" + (scan_string >> pair String || + scan_alt_string >> pair AltString || + scan_verbatim >> pair Verbatim || + SymbolPos.scan_comment_body !!! >> pair Comment || + scan_space >> pair Space || + scan_malformed >> pair Malformed || + Scan.one (Symbol.is_sync o symbol) >> (fn s => (Sync, [s])) || + ((Scan.max token_leq + (Scan.max token_leq + (Scan.literal lex2 >> pair Command) + (Scan.literal lex1 >> pair Keyword)) + (Syntax.scan_longid >> pair LongIdent || + Syntax.scan_id >> pair Ident || + Syntax.scan_var >> pair Var || + Syntax.scan_tid >> pair TypeIdent || + Syntax.scan_tvar >> pair TypeVar || + Syntax.scan_nat >> pair Nat || + scan_symid >> pair SymIdent)))) >> token; + +fun recover msg = + Scan.many ((Symbol.is_regular andf (not o Symbol.is_blank)) o symbol) + >> (fn ss => [token (Error msg, ss)]); in fun source do_recover get_lex pos src = - Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs)) - (Option.map (rpair recover) do_recover) src; + SymbolPos.source pos src + |> Source.source SymbolPos.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs)) + (Option.map (rpair recover) do_recover); end; -fun source_proper src = src |> Source.filter is_proper; - end;