src/Pure/Isar/outer_lex.ML
changeset 6859 2b3db2b6c129
parent 6743 5d50225637c8
child 7026 69724548fad1
equal deleted inserted replaced
6858:5906a7929b85 6859:2b3db2b6c129
     7 
     7 
     8 signature OUTER_LEX =
     8 signature OUTER_LEX =
     9 sig
     9 sig
    10   datatype token_kind =
    10   datatype token_kind =
    11     Keyword | Ident | LongIdent | SymIdent | Var | TextVar | TypeIdent | TypeVar | Nat |
    11     Keyword | Ident | LongIdent | SymIdent | Var | TextVar | TypeIdent | TypeVar | Nat |
    12     String | Verbatim | Ignore | EOF
    12     String | Verbatim | Ignore | Sync | EOF
    13   type token
    13   type token
    14   val str_of_kind: token_kind -> string
    14   val str_of_kind: token_kind -> string
    15   val stopper: token * (token -> bool)
    15   val stopper: token * (token -> bool)
       
    16   val not_sync: token -> bool
    16   val not_eof: token -> bool
    17   val not_eof: token -> bool
    17   val position_of: token -> Position.T
    18   val position_of: token -> Position.T
    18   val pos_of: token -> string
    19   val pos_of: token -> string
    19   val is_kind: token_kind -> token -> bool
    20   val is_kind: token_kind -> token -> bool
    20   val keyword_pred: (string -> bool) -> token -> bool
    21   val keyword_pred: (string -> bool) -> token -> bool
    36 
    37 
    37 (* datatype token *)
    38 (* datatype token *)
    38 
    39 
    39 datatype token_kind =
    40 datatype token_kind =
    40   Keyword | Ident | LongIdent | SymIdent | Var | TextVar | TypeIdent | TypeVar | Nat |
    41   Keyword | Ident | LongIdent | SymIdent | Var | TextVar | TypeIdent | TypeVar | Nat |
    41   String | Verbatim | Ignore | EOF;
    42   String | Verbatim | Ignore | Sync | EOF;
    42 
    43 
    43 datatype token = Token of Position.T * (token_kind * string);
    44 datatype token = Token of Position.T * (token_kind * string);
    44 
    45 
    45 val str_of_kind =
    46 val str_of_kind =
    46  fn Keyword => "keyword"
    47  fn Keyword => "keyword"
    53   | TypeVar => "schematic type variable"
    54   | TypeVar => "schematic type variable"
    54   | Nat => "number"
    55   | Nat => "number"
    55   | String => "string"
    56   | String => "string"
    56   | Verbatim => "verbatim text"
    57   | Verbatim => "verbatim text"
    57   | Ignore => "ignored text"
    58   | Ignore => "ignored text"
       
    59   | Sync => "sync marker"
    58   | EOF => "end-of-file";
    60   | EOF => "end-of-file";
       
    61 
       
    62 
       
    63 (* sync token *)
       
    64 
       
    65 fun not_sync (Token (_, (Sync, _))) = false
       
    66   | not_sync _ = true;
    59 
    67 
    60 
    68 
    61 (* eof token *)
    69 (* eof token *)
    62 
    70 
    63 val eof = Token (Position.none, (EOF, ""));
    71 val eof = Token (Position.none, (EOF, ""));
   129 
   137 
   130 (* scan strings *)
   138 (* scan strings *)
   131 
   139 
   132 val scan_str =
   140 val scan_str =
   133   scan_blank >> K Symbol.space ||
   141   scan_blank >> K Symbol.space ||
   134   keep_line ($$ "\\" |-- Scan.one Symbol.not_eof) ||
   142   keep_line ($$ "\\" |-- Scan.one (Symbol.not_sync andf Symbol.not_eof)) ||
   135   keep_line (Scan.one (not_equal "\\" andf not_equal "\"" andf Symbol.not_eof));
   143   keep_line (Scan.one (not_equal "\\" andf not_equal "\"" andf
       
   144     Symbol.not_sync andf Symbol.not_eof));
   136 
   145 
   137 val scan_string =
   146 val scan_string =
   138   keep_line ($$ "\"") |--
   147   keep_line ($$ "\"") |--
   139     !! (lex_err (K "missing quote at end of string"))
   148     !! (lex_err (K "missing quote at end of string"))
   140       (change_prompt ((Scan.repeat scan_str >> implode) --| keep_line ($$ "\"")));
   149       (change_prompt ((Scan.repeat scan_str >> implode) --| keep_line ($$ "\"")));
   143 (* scan verbatim text *)
   152 (* scan verbatim text *)
   144 
   153 
   145 val scan_verb =
   154 val scan_verb =
   146   scan_blank ||
   155   scan_blank ||
   147   keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal "}"))) ||
   156   keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal "}"))) ||
   148   keep_line (Scan.one (not_equal "*" andf Symbol.not_eof));
   157   keep_line (Scan.one (not_equal "*" andf Symbol.not_sync andf Symbol.not_eof));
   149 
   158 
   150 val scan_verbatim =
   159 val scan_verbatim =
   151   keep_line ($$ "{" -- $$ "*") |--
   160   keep_line ($$ "{" -- $$ "*") |--
   152     !! (lex_err (K "missing end of verbatim text"))
   161     !! (lex_err (K "missing end of verbatim text"))
   153       (change_prompt ((Scan.repeat scan_verb >> implode) --| keep_line ($$ "*" -- $$ "}")));
   162       (change_prompt ((Scan.repeat scan_verb >> implode) --| keep_line ($$ "*" -- $$ "}")));
   167 val scan_cmt =
   176 val scan_cmt =
   168   Scan.lift scan_blank ||
   177   Scan.lift scan_blank ||
   169   Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
   178   Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
   170   Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) ||
   179   Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) ||
   171   Scan.lift (keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal ")")))) ||
   180   Scan.lift (keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal ")")))) ||
   172   Scan.lift (keep_line (Scan.one (not_equal "*" andf Symbol.not_eof)));
   181   Scan.lift (keep_line (Scan.one (not_equal "*" andf Symbol.not_sync andf Symbol.not_eof)));
   173 
   182 
   174 val scan_comment =
   183 val scan_comment =
   175   keep_line ($$ "(" -- $$ "*") |--
   184   keep_line ($$ "(" -- $$ "*") |--
   176     !! (lex_err (K "missing end of comment"))
   185     !! (lex_err (K "missing end of comment"))
   177       (change_prompt
   186       (change_prompt
   182 
   191 
   183 fun scan lex (pos, cs) =
   192 fun scan lex (pos, cs) =
   184   let
   193   let
   185     fun token k x = Token (pos, (k, x));
   194     fun token k x = Token (pos, (k, x));
   186     fun ignore _ = token Ignore "";
   195     fun ignore _ = token Ignore "";
       
   196     fun sync _ = token Sync Symbol.sync;
   187 
   197 
   188     val scanner =
   198     val scanner =
   189       scan_string >> token String ||
   199       scan_string >> token String ||
   190       scan_verbatim >> token Verbatim ||
   200       scan_verbatim >> token Verbatim ||
   191       scan_space >> ignore ||
   201       scan_space >> ignore ||
   192       scan_comment >> ignore ||
   202       scan_comment >> ignore ||
       
   203       keep_line (Scan.one Symbol.is_sync >> sync) ||
   193       keep_line (Scan.max token_leq
   204       keep_line (Scan.max token_leq
   194         (Scan.literal lex >> (token Keyword o implode))
   205         (Scan.literal lex >> (token Keyword o implode))
   195         (Syntax.scan_longid >> token LongIdent ||
   206         (Syntax.scan_longid >> token LongIdent ||
   196           Syntax.scan_id >> token Ident ||
   207           Syntax.scan_id >> token Ident ||
   197           Syntax.scan_var >> token Var ||
   208           Syntax.scan_var >> token Var ||
   198           $$ "?" ^^ $$ "?" ^^ Syntax.scan_id >> token TextVar ||
   209           $$ "?" ^^ $$ "?" ^^ Syntax.scan_id >> token TextVar ||
   199           Syntax.scan_tid >> token TypeIdent ||
   210           Syntax.scan_tid >> token TypeIdent ||
   200           Syntax.scan_tvar >> token TypeVar ||
   211           Syntax.scan_tvar >> token TypeVar ||
   201           Syntax.scan_nat >> token Nat ||
   212           Syntax.scan_nat >> token Nat ||
   202           scan_symid >> token SymIdent));
   213           scan_symid >> token SymIdent));
   203   in
   214   in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning cs))) scanner (pos, cs) end;
   204     !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning cs))) scanner (pos, cs)
       
   205   end;
       
   206 
   215 
   207 
   216 
   208 (* source of (proper) tokens *)
   217 (* source of (proper) tokens *)
   209 
   218 
   210 fun recover xs = keep_line (Scan.any1 ((not o Symbol.is_blank) andf Symbol.not_eof)) xs;
   219 val is_junk = (not o Symbol.is_blank) andf Symbol.not_sync andf Symbol.not_eof;
       
   220 fun recover xs = keep_line (Scan.any1 is_junk) xs;
   211 
   221 
   212 fun source do_recover get_lex pos src =
   222 fun source do_recover get_lex pos src =
   213   Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
   223   Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
   214     (if do_recover then Some recover else None) src
   224     (if do_recover then Some recover else None) src
   215   |> Source.filter is_proper;
   225   |> Source.filter is_proper;