src/Pure/Isar/outer_lex.ML
changeset 27752 ea7d573e565f
parent 27747 d41abb7bc08a
child 27769 ad50c38ef842
equal deleted inserted replaced
27751:22c32eb18c23 27752:ea7d573e565f
    10   datatype token_kind =
    10   datatype token_kind =
    11     Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | Nat |
    11     Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | Nat |
    12     String | AltString | Verbatim | Space | Comment | Malformed | Error of string | Sync | EOF
    12     String | AltString | Verbatim | Space | Comment | Malformed | Error of string | Sync | EOF
    13   eqtype token
    13   eqtype token
    14   val str_of_kind: token_kind -> string
    14   val str_of_kind: token_kind -> string
    15   val range_of: token -> Position.range
       
    16   val position_of: token -> Position.T
    15   val position_of: token -> Position.T
       
    16   val end_position_of: token -> Position.T
    17   val pos_of: token -> string
    17   val pos_of: token -> string
    18   val eof: token
    18   val eof: token
    19   val is_eof: token -> bool
    19   val is_eof: token -> bool
    20   val not_eof: token -> bool
    20   val not_eof: token -> bool
    21   val not_sync: token -> bool
    21   val not_sync: token -> bool
    30   val is_begin_ignore: token -> bool
    30   val is_begin_ignore: token -> bool
    31   val is_end_ignore: token -> bool
    31   val is_end_ignore: token -> bool
    32   val is_blank: token -> bool
    32   val is_blank: token -> bool
    33   val is_newline: token -> bool
    33   val is_newline: token -> bool
    34   val val_of: token -> string
    34   val val_of: token -> string
    35   val clean_value: Symbol.symbol list -> Symbol.symbol list
       
    36   val source_of: token -> string
    35   val source_of: token -> string
    37   val unparse: token -> string
    36   val unparse: token -> string
    38   val text_of: token -> string * string
    37   val text_of: token -> string * string
    39   val is_sid: string -> bool
    38   val is_sid: string -> bool
    40   val !!! : string -> (Position.T * 'a -> 'b) -> Position.T * 'a -> 'b
    39   val !!! : string -> (Position.T * 'a -> 'b) -> Position.T * 'a -> 'b
    41   val scan_string: Position.T * Symbol.symbol list ->
    40   val scan_quoted: Position.T * Symbol.symbol list ->
    42     Symbol.symbol list * (Position.T * Symbol.symbol list)
    41     Symbol.symbol list * (Position.T * Symbol.symbol list)
    43   val scan: (Scan.lexicon * Scan.lexicon) ->
    42   val scan: (Scan.lexicon * Scan.lexicon) ->
    44     Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list)
    43     Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list)
    45   val source: bool option -> (unit -> Scan.lexicon * Scan.lexicon) ->
    44   val source: bool option -> (unit -> Scan.lexicon * Scan.lexicon) ->
    46     Position.T -> (Symbol.symbol, 'a) Source.source ->
    45     Position.T -> (Symbol.symbol, 'a) Source.source ->
    84   | EOF => "end-of-file";
    83   | EOF => "end-of-file";
    85 
    84 
    86 
    85 
    87 (* position *)
    86 (* position *)
    88 
    87 
    89 fun range_of (Token ((range, _), _)) = range;
    88 fun position_of (Token (((pos, _), _), _)) = pos;
    90 
    89 fun end_position_of (Token (((_, pos), _), _)) = pos;
    91 val position_of = #1 o range_of;
    90 
    92 val pos_of = Position.str_of o position_of;
    91 val pos_of = Position.str_of o position_of;
    93 
    92 
    94 
    93 
    95 (* control tokens *)
    94 (* control tokens *)
    96 
    95 
   103 val not_eof = not o is_eof;
   102 val not_eof = not o is_eof;
   104 
   103 
   105 fun not_sync (Token (_, (Sync, _))) = false
   104 fun not_sync (Token (_, (Sync, _))) = false
   106   | not_sync _ = true;
   105   | not_sync _ = true;
   107 
   106 
   108 val stopper = Scan.stopper (fn [] => eof | toks => mk_eof (#2 (range_of (List.last toks)))) is_eof;
   107 val stopper =
       
   108   Scan.stopper (fn [] => eof | toks => mk_eof (end_position_of (List.last toks))) is_eof;
   109 
   109 
   110 
   110 
   111 (* kind of token *)
   111 (* kind of token *)
   112 
   112 
   113 fun kind_of (Token (_, (k, _))) = k;
   113 fun kind_of (Token (_, (k, _))) = k;
   147 
   147 
   148 
   148 
   149 (* token content *)
   149 (* token content *)
   150 
   150 
   151 fun val_of (Token (_, (_, x))) = x;
   151 fun val_of (Token (_, (_, x))) = x;
   152 
       
   153 val clean_value = filter_out (fn s => s = Symbol.DEL);
       
   154 
   152 
   155 fun source_of (Token ((range, src), _)) =
   153 fun source_of (Token ((range, src), _)) =
   156   XML.Elem (Markup.tokenN, Position.properties_of (Position.encode_range range), [XML.Text src])
   154   XML.Elem (Markup.tokenN, Position.properties_of (Position.encode_range range), [XML.Text src])
   157   |> YXML.string_of;
   155   |> YXML.string_of;
   158 
   156 
   244 in
   242 in
   245 
   243 
   246 val scan_string = scan_strs "\"";
   244 val scan_string = scan_strs "\"";
   247 val scan_alt_string = scan_strs "`";
   245 val scan_alt_string = scan_strs "`";
   248 
   246 
       
   247 val scan_quoted = Scan.depend (fn pos =>
       
   248   Scan.trace (Scan.pass pos (scan_string || scan_alt_string))
       
   249     >> (fn (_, syms) => (Position.advance syms pos, syms)));
       
   250 
   249 end;
   251 end;
   250 
   252 
   251 
   253 
   252 (* scan verbatim text *)
   254 (* scan verbatim text *)
   253 
   255 
   295 
   297 
   296 (* scan token *)
   298 (* scan token *)
   297 
   299 
   298 fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2;
   300 fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2;
   299 
   301 
       
   302 fun advance_range syms pos =
       
   303   let val pos' = Position.advance syms pos
       
   304   in (Position.encode_range (pos, pos'), pos') end;
       
   305 
       
   306 val clean_value = implode o filter_out (fn s => s = Symbol.DEL);
       
   307 
       
   308 
   300 fun scan (lex1, lex2) =
   309 fun scan (lex1, lex2) =
   301   let
   310   let
   302     val scanner =
   311     val scanner = Scan.depend (fn pos => Scan.pass pos
   303       (scan_string >> pair String ||
   312       (scan_string >> pair String ||
   304         scan_alt_string >> pair AltString ||
   313         scan_alt_string >> pair AltString ||
   305         scan_verbatim >> pair Verbatim ||
   314         scan_verbatim >> pair Verbatim ||
   306         scan_comment >> pair Comment ||
   315         scan_comment >> pair Comment ||
   307         Scan.lift scan_space >> pair Space ||
   316         Scan.lift scan_space >> pair Space ||
   315             Syntax.scan_id >> pair Ident ||
   324             Syntax.scan_id >> pair Ident ||
   316             Syntax.scan_var >> pair Var ||
   325             Syntax.scan_var >> pair Var ||
   317             Syntax.scan_tid >> pair TypeIdent ||
   326             Syntax.scan_tid >> pair TypeIdent ||
   318             Syntax.scan_tvar >> pair TypeVar ||
   327             Syntax.scan_tvar >> pair TypeVar ||
   319             Syntax.scan_nat >> pair Nat ||
   328             Syntax.scan_nat >> pair Nat ||
   320             scan_symid >> pair SymIdent)))) :|--
   329             scan_symid >> pair SymIdent)))) >>
   321       (fn (k, syms) => Scan.depend (fn pos =>
   330       (fn (k, syms) => 
   322         let
   331         let val (range_pos, pos') = advance_range syms pos
   323           val pos' = Position.advance syms pos;
   332         in (pos', Token (((range_pos, pos'), implode syms), (k, clean_value syms))) end));
   324           val x = implode (clean_value syms);
       
   325         in Scan.succeed (pos', Token (((pos, pos'), implode syms), (k, x))) end));
       
   326 
   333 
   327   in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end;
   334   in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end;
   328 
   335 
   329 
   336 
   330 (* token sources *)
   337 (* token sources *)
   331 
   338 
   332 local
   339 local
   333 
   340 
   334 val is_junk = (not o Symbol.is_blank) andf Symbol.is_regular;
   341 val is_junk = (not o Symbol.is_blank) andf Symbol.is_regular;
   335 
   342 
   336 fun recover msg = Scan.lift (Scan.many is_junk) :|-- (fn syms => Scan.depend (fn pos =>
   343 fun recover msg = Scan.depend (fn pos => Scan.many is_junk >> (fn syms =>
   337   let
   344   let val (range_pos, pos') = advance_range syms pos
   338     val pos' = Position.advance syms pos;
   345   in (pos', [Token (((range_pos, pos'), implode syms), (Error msg, clean_value syms))]) end));
   339     val x = implode (clean_value syms);
       
   340   in Scan.succeed (pos', [Token (((pos, pos'), implode syms), (Error msg, x))]) end));
       
   341 
   346 
   342 in
   347 in
   343 
   348 
   344 fun source do_recover get_lex pos src =
   349 fun source do_recover get_lex pos src =
   345   Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
   350   Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))