src/Pure/Isar/outer_lex.ML
changeset 26004 2abb3005660f
parent 25642 ebdff0dca2a5
child 27358 d6679949a869
equal deleted inserted replaced
26003:7a8aee8353cf 26004:2abb3005660f
    31   val val_of: token -> string
    31   val val_of: token -> string
    32   val unparse: token -> string
    32   val unparse: token -> string
    33   val text_of: token -> string * string
    33   val text_of: token -> string * string
    34   val is_sid: string -> bool
    34   val is_sid: string -> bool
    35   val !!! : string -> (Position.T * 'a -> 'b) -> Position.T * 'a -> 'b
    35   val !!! : string -> (Position.T * 'a -> 'b) -> Position.T * 'a -> 'b
    36   val incr_line: ('a -> 'b * 'c) -> Position.T * 'a -> 'b * (Position.T * 'c)
    36   val count: (Symbol.symbol list -> Symbol.symbol * Symbol.symbol list) ->
    37   val keep_line: ('a -> 'b * 'c) -> Position.T * 'a -> 'b * (Position.T * 'c)
    37     Position.T * Symbol.symbol list -> Symbol.symbol * (Position.T * Symbol.symbol list)
    38   val scan_blank: Position.T * Symbol.symbol list
    38   val counted: (Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list) ->
    39     -> Symbol.symbol * (Position.T * Symbol.symbol list)
    39     Position.T * Symbol.symbol list -> string * (Position.T * Symbol.symbol list)
    40   val scan_string: Position.T * Symbol.symbol list -> string * (Position.T * Symbol.symbol list)
    40   val scan_string: Position.T * Symbol.symbol list -> string * (Position.T * Symbol.symbol list)
    41   val scan: (Scan.lexicon * Scan.lexicon) ->
    41   val scan: (Scan.lexicon * Scan.lexicon) ->
    42     Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list)
    42     Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list)
    43   val source: bool option -> (unit -> Scan.lexicon * Scan.lexicon) ->
    43   val source: bool option -> (unit -> Scan.lexicon * Scan.lexicon) ->
    44     Position.T -> (Symbol.symbol, 'a) Source.source ->
    44     Position.T -> (Symbol.symbol, 'a) Source.source ->
   184 
   184 
   185 fun lex_err msg ((pos, cs), _) = "Outer lexical error" ^ Position.str_of pos ^ ": " ^ msg cs;
   185 fun lex_err msg ((pos, cs), _) = "Outer lexical error" ^ Position.str_of pos ^ ": " ^ msg cs;
   186 fun !!! msg scan = Scan.!! (lex_err (K msg)) scan;
   186 fun !!! msg scan = Scan.!! (lex_err (K msg)) scan;
   187 
   187 
   188 
   188 
   189 (* line numbering *)
   189 (* position *)
   190 
   190 
   191 fun incr_line scan = Scan.depend (fn pos => scan >> pair (Position.inc pos));
   191 local
   192 val keep_line = Scan.lift;
   192 
   193 
   193 fun map_position f (scan: Symbol.symbol list -> 'a * Symbol.symbol list)  =
   194 val scan_blank =
   194   Scan.depend (fn (pos: Position.T) => scan >> (fn x => (f x pos, x)));
   195   incr_line ($$ "\n") ||
   195 
   196   keep_line (Scan.one Symbol.is_blank);
   196 in
       
   197 
       
   198 fun count scan = map_position Position.advance scan;
       
   199 fun counted scan = map_position (fold Position.advance) scan >> implode;
       
   200 
       
   201 end;
   197 
   202 
   198 
   203 
   199 (* scan symbolic idents *)
   204 (* scan symbolic idents *)
   200 
   205 
   201 val is_sym_char = member (op =) (explode "!#$%&*+-/<=>?@^_|~");
   206 val is_sym_char = member (op =) (explode "!#$%&*+-/<=>?@^_|~");
   202 
   207 
   203 val scan_symid =
   208 val scan_symid =
   204   Scan.many1 is_sym_char >> implode ||
   209   Scan.many1 is_sym_char ||
   205   Scan.one Symbol.is_symbolic;
   210   Scan.one Symbol.is_symbolic >> single;
   206 
   211 
   207 fun is_symid str =
   212 fun is_symid str =
   208   (case try Symbol.explode str of
   213   (case try Symbol.explode str of
   209     SOME [s] => Symbol.is_symbolic s orelse is_sym_char s
   214     SOME [s] => Symbol.is_symbolic s orelse is_sym_char s
   210   | SOME ss => forall is_sym_char ss
   215   | SOME ss => forall is_sym_char ss
   219 (* scan strings *)
   224 (* scan strings *)
   220 
   225 
   221 local
   226 local
   222 
   227 
   223 val char_code =
   228 val char_code =
   224   Scan.one Symbol.is_ascii_digit --
   229   count (Scan.one Symbol.is_ascii_digit) --
   225   Scan.one Symbol.is_ascii_digit --
   230   count (Scan.one Symbol.is_ascii_digit) --
   226   Scan.one Symbol.is_ascii_digit :|-- (fn ((a, b), c) =>
   231   count (Scan.one Symbol.is_ascii_digit) :|--
       
   232   (fn ((a, b), c) =>
   227     let val (n, _) = Library.read_int [a, b, c]
   233     let val (n, _) = Library.read_int [a, b, c]
   228     in if n <= 255 then Scan.succeed (chr n) else Scan.fail end);
   234     in if n <= 255 then Scan.succeed (chr n) else Scan.fail end);
   229 
   235 
   230 fun scan_str q =
   236 fun scan_str q =
   231   scan_blank ||
   237   count ($$ "\\") |-- !!! "bad escape character in string" (count ($$ q || $$ "\\") || char_code) ||
   232   keep_line ($$ "\\") |-- !!! "bad escape character in string"
   238   count (Scan.one (fn s => s <> q andalso s <> "\\" andalso Symbol.is_regular s));
   233       (scan_blank || keep_line ($$ q || $$ "\\" || char_code)) ||
       
   234   keep_line (Scan.one (fn s => s <> q andalso s <> "\\" andalso Symbol.is_regular s));
       
   235 
   239 
   236 fun scan_strs q =
   240 fun scan_strs q =
   237   keep_line ($$ q) |--
   241   count ($$ q) |--
   238     !!! "missing quote at end of string"
   242     !!! "missing quote at end of string"
   239       (change_prompt ((Scan.repeat (scan_str q) >> implode) --| keep_line ($$ q)));
   243       (change_prompt ((Scan.repeat (scan_str q) >> implode) --| count ($$ q)));
   240 
   244 
   241 in
   245 in
   242 
   246 
   243 val scan_string = scan_strs "\"";
   247 val scan_string = scan_strs "\"";
   244 val scan_alt_string = scan_strs "`";
   248 val scan_alt_string = scan_strs "`";
   247 
   251 
   248 
   252 
   249 (* scan verbatim text *)
   253 (* scan verbatim text *)
   250 
   254 
   251 val scan_verb =
   255 val scan_verb =
   252   scan_blank ||
   256   count ($$ "*" --| Scan.ahead (~$$ "}")) ||
   253   keep_line ($$ "*" --| Scan.ahead (~$$ "}")) ||
   257   count (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s));
   254   keep_line (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s));
       
   255 
   258 
   256 val scan_verbatim =
   259 val scan_verbatim =
   257   keep_line ($$ "{" -- $$ "*") |--
   260   count ($$ "{") |-- count ($$ "*") |--
   258     !!! "missing end of verbatim text"
   261     !!! "missing end of verbatim text"
   259       (change_prompt ((Scan.repeat scan_verb >> implode) --| keep_line ($$ "*" -- $$ "}")));
   262       (change_prompt ((Scan.repeat scan_verb >> implode) --| count ($$ "*") --| count ($$ "}")));
   260 
   263 
   261 
   264 
   262 (* scan space *)
   265 (* scan space *)
   263 
   266 
   264 fun is_space s = Symbol.is_blank s andalso s <> "\n";
   267 fun is_space s = Symbol.is_blank s andalso s <> "\n";
   265 
   268 
   266 val scan_space =
   269 val scan_space =
   267   (keep_line (Scan.many1 is_space) -- Scan.optional (incr_line ($$ "\n")) "" ||
   270   (Scan.many1 is_space @@@ Scan.optional ($$ "\n" >> single) [] ||
   268     keep_line (Scan.many is_space) -- incr_line ($$ "\n")) >> (fn (cs, c) => implode cs ^ c);
   271     Scan.many is_space @@@ ($$ "\n" >> single));
   269 
   272 
   270 
   273 
   271 (* scan nested comments *)
   274 (* scan nested comments *)
   272 
   275 
   273 val scan_cmt =
   276 val scan_cmt =
   274   Scan.lift scan_blank ||
   277   Scan.depend (fn d => count ($$ "(") ^^ count ($$ "*") >> pair (d + 1)) ||
   275   Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
   278   Scan.depend (fn 0 => Scan.fail | d => count ($$ "*") ^^ count ($$ ")") >> pair (d - 1)) ||
   276   Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) ||
   279   Scan.lift (count ($$ "*" --| Scan.ahead (~$$ ")"))) ||
   277   Scan.lift (keep_line ($$ "*" --| Scan.ahead (~$$ ")"))) ||
   280   Scan.lift (count (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s)));
   278   Scan.lift (keep_line (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s)));
       
   279 
   281 
   280 val scan_comment =
   282 val scan_comment =
   281   keep_line ($$ "(" -- $$ "*") |--
   283   count ($$ "(") |-- count ($$ "*") |--
   282     !!! "missing end of comment"
   284     !!! "missing end of comment"
   283       (change_prompt
   285       (change_prompt
   284         (Scan.pass 0 (Scan.repeat scan_cmt >> implode) --| keep_line ($$ "*" -- $$ ")")));
   286         (Scan.pass 0 (Scan.repeat scan_cmt >> implode) --| count ($$ "*") --| count ($$ ")")));
   285 
   287 
   286 
   288 
   287 (* scan malformed symbols *)
   289 (* scan malformed symbols *)
   288 
   290 
   289 local
       
   290 
       
   291 val scan_mal =
       
   292   scan_blank ||
       
   293   keep_line (Scan.one Symbol.is_regular);
       
   294 
       
   295 in
       
   296 
       
   297 val scan_malformed =
   291 val scan_malformed =
   298   keep_line ($$ Symbol.malformed) |--
   292   $$ Symbol.malformed |--
   299     change_prompt (Scan.repeat scan_mal >> implode)
   293     change_prompt (Scan.many Symbol.is_regular)
   300       --| keep_line (Scan.option ($$ Symbol.end_malformed));
   294   --| Scan.option ($$ Symbol.end_malformed);
   301 
       
   302 end;
       
   303 
   295 
   304 
   296 
   305 (* scan token *)
   297 (* scan token *)
   306 
   298 
   307 fun scan (lex1, lex2) =
   299 fun scan (lex1, lex2) =
   312         fun sync _ = token Sync Symbol.sync;
   304         fun sync _ = token Sync Symbol.sync;
   313       in
   305       in
   314         scan_string >> token String ||
   306         scan_string >> token String ||
   315         scan_alt_string >> token AltString ||
   307         scan_alt_string >> token AltString ||
   316         scan_verbatim >> token Verbatim ||
   308         scan_verbatim >> token Verbatim ||
   317         scan_space >> token Space ||
       
   318         scan_comment >> token Comment ||
   309         scan_comment >> token Comment ||
   319         scan_malformed >> token Malformed ||
   310         counted scan_space >> token Space ||
   320         keep_line (Scan.one Symbol.is_sync >> sync) ||
   311         counted scan_malformed >> token Malformed ||
   321         keep_line (Scan.max token_leq
   312         Scan.lift (Scan.one Symbol.is_sync >> sync) ||
       
   313         (Scan.max token_leq
   322           (Scan.max token_leq
   314           (Scan.max token_leq
   323             (Scan.literal lex1 >> (token Keyword o implode))
   315             (counted (Scan.literal lex1) >> token Keyword)
   324             (Scan.literal lex2 >> (token Command o implode)))
   316             (counted (Scan.literal lex2) >> token Command))
   325           (Syntax.scan_longid >> token LongIdent ||
   317           (counted Syntax.scan_longid >> token LongIdent ||
   326             Syntax.scan_id >> token Ident ||
   318             counted Syntax.scan_id >> token Ident ||
   327             Syntax.scan_var >> token Var ||
   319             counted Syntax.scan_var >> token Var ||
   328             Syntax.scan_tid >> token TypeIdent ||
   320             counted Syntax.scan_tid >> token TypeIdent ||
   329             Syntax.scan_tvar >> token TypeVar ||
   321             counted Syntax.scan_tvar >> token TypeVar ||
   330             Syntax.scan_nat >> token Nat ||
   322             counted Syntax.scan_nat >> token Nat ||
   331             scan_symid >> token SymIdent))
   323             counted scan_symid >> token SymIdent))
   332       end);
   324       end);
   333   in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end;
   325   in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end;
   334 
   326 
   335 
   327 
   336 (* token sources *)
   328 (* token sources *)
   338 local
   330 local
   339 
   331 
   340 val is_junk = (not o Symbol.is_blank) andf Symbol.is_regular;
   332 val is_junk = (not o Symbol.is_blank) andf Symbol.is_regular;
   341 
   333 
   342 fun recover msg = Scan.state :|-- (fn pos =>
   334 fun recover msg = Scan.state :|-- (fn pos =>
   343   keep_line (Scan.many is_junk) >> (fn xs => [Token (pos, (Error msg, implode xs))]));
   335   counted (Scan.many is_junk) >> (fn s => [Token (pos, (Error msg, s))]));
   344 
   336 
   345 in
   337 in
   346 
   338 
   347 fun source do_recover get_lex pos src =
   339 fun source do_recover get_lex pos src =
   348   Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
   340   Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))