src/Pure/Isar/outer_lex.ML
changeset 10748 74ed77fa5310
parent 9195 29f1e53f9937
child 14729 0e987111a17e
equal deleted inserted replaced
10747:794cd4d768b5 10748:74ed77fa5310
     8 
     8 
     9 signature OUTER_LEX =
     9 signature OUTER_LEX =
    10 sig
    10 sig
    11   datatype token_kind =
    11   datatype token_kind =
    12     Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
    12     Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
    13     Nat | String | Verbatim | Space | Comment | Sync | EOF
    13     Nat | String | Verbatim | Space | Comment | Sync | Malformed | EOF
    14   type token
    14   type token
    15   val str_of_kind: token_kind -> string
    15   val str_of_kind: token_kind -> string
    16   val stopper: token * (token -> bool)
    16   val stopper: token * (token -> bool)
    17   val not_sync: token -> bool
    17   val not_sync: token -> bool
    18   val not_eof: token -> bool
    18   val not_eof: token -> bool
    52 
    52 
    53 (* datatype token *)
    53 (* datatype token *)
    54 
    54 
    55 datatype token_kind =
    55 datatype token_kind =
    56   Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
    56   Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
    57   Nat | String | Verbatim | Space | Comment | Sync | EOF;
    57   Nat | String | Verbatim | Space | Comment | Sync | Malformed | EOF;
    58 
    58 
    59 datatype token = Token of Position.T * (token_kind * string);
    59 datatype token = Token of Position.T * (token_kind * string);
    60 
    60 
    61 val str_of_kind =
    61 val str_of_kind =
    62  fn Command => "command"
    62  fn Command => "command"
    71   | String => "string"
    71   | String => "string"
    72   | Verbatim => "verbatim text"
    72   | Verbatim => "verbatim text"
    73   | Space => "white space"
    73   | Space => "white space"
    74   | Comment => "comment text"
    74   | Comment => "comment text"
    75   | Sync => "sync marker"
    75   | Sync => "sync marker"
       
    76   | Malformed => "bad input"
    76   | EOF => "end-of-file";
    77   | EOF => "end-of-file";
    77 
    78 
    78 
    79 
    79 (* sync token *)
    80 (* control tokens *)
    80 
    81 
    81 fun not_sync (Token (_, (Sync, _))) = false
    82 fun not_sync (Token (_, (Sync, _))) = false
    82   | not_sync _ = true;
    83   | not_sync _ = true;
       
    84 
       
    85 val malformed = Token (Position.none, (Malformed, ""));
    83 
    86 
    84 
    87 
    85 (* eof token *)
    88 (* eof token *)
    86 
    89 
    87 val eof = Token (Position.none, (EOF, ""));
    90 val eof = Token (Position.none, (EOF, ""));
   131 
   134 
   132 
   135 
   133 (* name and value of token *)
   136 (* name and value of token *)
   134 
   137 
   135 fun name_of (tok as Token (_, (k, x))) =
   138 fun name_of (tok as Token (_, (k, x))) =
   136   if is_semicolon tok then "terminator" else str_of_kind k ^ " " ^ quote x;
   139   if is_semicolon tok then "terminator"
       
   140   else if x = "" then str_of_kind k
       
   141   else str_of_kind k ^ " " ^ quote x;
   137 
   142 
   138 fun val_of (Token (_, (_, x))) = x;
   143 fun val_of (Token (_, (_, x))) = x;
   139 
   144 
   140 fun token_leq (Token (_, (_, x)), Token (_, (_, x'))) = x <= x';
   145 fun token_leq (Token (_, (_, x)), Token (_, (_, x'))) = x <= x';
   141 
   146 
   263 
   268 
   264 
   269 
   265 (* token sources *)
   270 (* token sources *)
   266 
   271 
   267 val is_junk = (not o Symbol.is_blank) andf Symbol.not_sync andf Symbol.not_eof;
   272 val is_junk = (not o Symbol.is_blank) andf Symbol.not_sync andf Symbol.not_eof;
   268 fun recover xs = keep_line (Scan.any is_junk) xs;
   273 fun recover xs = (keep_line (Scan.any is_junk) >> K [malformed]) xs;
   269 
   274 
   270 fun source do_recover get_lex pos src =
   275 fun source do_recover get_lex pos src =
   271   Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
   276   Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
   272     (if do_recover then Some recover else None) src;
   277     (if do_recover then Some recover else None) src;
   273 
   278