src/Pure/Isar/token.ML
changeset 67439 78759a7bd874
parent 66067 cdbcb417db67
child 67440 e5ba0ca1e465
equal deleted inserted replaced
67438:fdb7b995974d 67439:78759a7bd874
     9   datatype kind =
     9   datatype kind =
    10     (*immediate source*)
    10     (*immediate source*)
    11     Command | Keyword | Ident | Long_Ident | Sym_Ident | Var | Type_Ident | Type_Var | Nat |
    11     Command | Keyword | Ident | Long_Ident | Sym_Ident | Var | Type_Ident | Type_Var | Nat |
    12     Float | Space |
    12     Float | Space |
    13     (*delimited content*)
    13     (*delimited content*)
    14     String | Alt_String | Verbatim | Cartouche | Comment |
    14     String | Alt_String | Verbatim | Cartouche | Comment of Comment.kind option |
    15     (*special content*)
    15     (*special content*)
    16     Error of string | EOF
    16     Error of string | EOF
    17   val str_of_kind: kind -> string
    17   val str_of_kind: kind -> string
    18   type file = {src_path: Path.T, lines: string list, digest: SHA1.digest, pos: Position.T}
    18   type file = {src_path: Path.T, lines: string list, digest: SHA1.digest, pos: Position.T}
    19   type T
    19   type T
    42   val is_command_modifier: T -> bool
    42   val is_command_modifier: T -> bool
    43   val ident_with: (string -> bool) -> T -> bool
    43   val ident_with: (string -> bool) -> T -> bool
    44   val is_proper: T -> bool
    44   val is_proper: T -> bool
    45   val is_improper: T -> bool
    45   val is_improper: T -> bool
    46   val is_comment: T -> bool
    46   val is_comment: T -> bool
       
    47   val is_informal_comment: T -> bool
       
    48   val is_formal_comment: T -> bool
    47   val is_begin_ignore: T -> bool
    49   val is_begin_ignore: T -> bool
    48   val is_end_ignore: T -> bool
    50   val is_end_ignore: T -> bool
    49   val is_error: T -> bool
    51   val is_error: T -> bool
    50   val is_space: T -> bool
    52   val is_space: T -> bool
    51   val is_blank: T -> bool
    53   val is_blank: T -> bool
   114 datatype kind =
   116 datatype kind =
   115   (*immediate source*)
   117   (*immediate source*)
   116   Command | Keyword | Ident | Long_Ident | Sym_Ident | Var | Type_Ident | Type_Var | Nat |
   118   Command | Keyword | Ident | Long_Ident | Sym_Ident | Var | Type_Ident | Type_Var | Nat |
   117   Float | Space |
   119   Float | Space |
   118   (*delimited content*)
   120   (*delimited content*)
   119   String | Alt_String | Verbatim | Cartouche | Comment |
   121   String | Alt_String | Verbatim | Cartouche | Comment of Comment.kind option |
   120   (*special content*)
   122   (*special content*)
   121   Error of string | EOF;
   123   Error of string | EOF;
   122 
   124 
   123 val str_of_kind =
   125 val str_of_kind =
   124  fn Command => "command"
   126  fn Command => "command"
   134   | Space => "white space"
   136   | Space => "white space"
   135   | String => "quoted string"
   137   | String => "quoted string"
   136   | Alt_String => "back-quoted string"
   138   | Alt_String => "back-quoted string"
   137   | Verbatim => "verbatim text"
   139   | Verbatim => "verbatim text"
   138   | Cartouche => "text cartouche"
   140   | Cartouche => "text cartouche"
   139   | Comment => "comment text"
   141   | Comment NONE => "informal comment"
       
   142   | Comment (SOME _) => "formal comment"
   140   | Error _ => "bad input"
   143   | Error _ => "bad input"
   141   | EOF => "end-of-input";
   144   | EOF => "end-of-input";
   142 
   145 
   143 val immediate_kinds =
   146 val immediate_kinds =
   144   Vector.fromList
   147   Vector.fromList
   145     [Command, Keyword, Ident, Long_Ident, Sym_Ident, Var, Type_Ident, Type_Var, Nat, Float, Space];
   148     [Command, Keyword, Ident, Long_Ident, Sym_Ident, Var, Type_Ident, Type_Var, Nat, Float, Space];
   146 
   149 
   147 val delimited_kind = member (op =) [String, Alt_String, Verbatim, Cartouche, Comment];
   150 val delimited_kind =
       
   151   (fn String => true
       
   152     | Alt_String => true
       
   153     | Verbatim => true
       
   154     | Cartouche => true
       
   155     | Comment _ => true
       
   156     | _ => false);
   148 
   157 
   149 
   158 
   150 (* datatype token *)
   159 (* datatype token *)
   151 
   160 
   152 (*The value slot assigns an (optional) internal value to a token,
   161 (*The value slot assigns an (optional) internal value to a token,
   218 
   227 
   219 fun ident_with pred (Token (_, (Ident, x), _)) = pred x
   228 fun ident_with pred (Token (_, (Ident, x), _)) = pred x
   220   | ident_with _ _ = false;
   229   | ident_with _ _ = false;
   221 
   230 
   222 fun is_proper (Token (_, (Space, _), _)) = false
   231 fun is_proper (Token (_, (Space, _), _)) = false
   223   | is_proper (Token (_, (Comment, _), _)) = false
   232   | is_proper (Token (_, (Comment _, _), _)) = false
   224   | is_proper _ = true;
   233   | is_proper _ = true;
   225 
   234 
   226 val is_improper = not o is_proper;
   235 val is_improper = not o is_proper;
   227 
   236 
   228 fun is_comment (Token (_, (Comment, _), _)) = true
   237 fun is_comment (Token (_, (Comment _, _), _)) = true
   229   | is_comment _ = false;
   238   | is_comment _ = false;
   230 
   239 
   231 fun is_begin_ignore (Token (_, (Comment, "<"), _)) = true
   240 fun is_informal_comment (Token (_, (Comment NONE, _), _)) = true
       
   241   | is_informal_comment _ = false;
       
   242 
       
   243 fun is_formal_comment (Token (_, (Comment (SOME _), _), _)) = true
       
   244   | is_formal_comment _ = false;
       
   245 
       
   246 fun is_begin_ignore (Token (_, (Comment NONE, "<"), _)) = true
   232   | is_begin_ignore _ = false;
   247   | is_begin_ignore _ = false;
   233 
   248 
   234 fun is_end_ignore (Token (_, (Comment, ">"), _)) = true
   249 fun is_end_ignore (Token (_, (Comment NONE, ">"), _)) = true
   235   | is_end_ignore _ = false;
   250   | is_end_ignore _ = false;
   236 
   251 
   237 fun is_error (Token (_, (Error _, _), _)) = true
   252 fun is_error (Token (_, (Error _, _), _)) = true
   238   | is_error _ = false;
   253   | is_error _ = false;
   239 
   254 
   272   | Type_Var => (Markup.tvar, "")
   287   | Type_Var => (Markup.tvar, "")
   273   | String => (Markup.string, "")
   288   | String => (Markup.string, "")
   274   | Alt_String => (Markup.alt_string, "")
   289   | Alt_String => (Markup.alt_string, "")
   275   | Verbatim => (Markup.verbatim, "")
   290   | Verbatim => (Markup.verbatim, "")
   276   | Cartouche => (Markup.cartouche, "")
   291   | Cartouche => (Markup.cartouche, "")
   277   | Comment => (Markup.comment, "")
   292   | Comment NONE => (Markup.comment, "")
   278   | Error msg => (Markup.bad (), msg)
   293   | Error msg => (Markup.bad (), msg)
   279   | _ => (Markup.empty, "");
   294   | _ => (Markup.empty, "");
   280 
   295 
   281 fun keyword_reports tok = map (fn markup => ((pos_of tok, markup), ""));
   296 fun keyword_reports tok = map (fn markup => ((pos_of tok, markup), ""));
   282 
   297 
   319   (case kind of
   334   (case kind of
   320     String => Symbol_Pos.quote_string_qq x
   335     String => Symbol_Pos.quote_string_qq x
   321   | Alt_String => Symbol_Pos.quote_string_bq x
   336   | Alt_String => Symbol_Pos.quote_string_bq x
   322   | Verbatim => enclose "{*" "*}" x
   337   | Verbatim => enclose "{*" "*}" x
   323   | Cartouche => cartouche x
   338   | Cartouche => cartouche x
   324   | Comment => enclose "(*" "*)" x
   339   | Comment NONE => enclose "(*" "*)" x
   325   | EOF => ""
   340   | EOF => ""
   326   | _ => x);
   341   | _ => x);
   327 
   342 
   328 fun print tok = Markup.markups (markups Keyword.empty_keywords tok) (unparse tok);
   343 fun print tok = Markup.markups (markups Keyword.empty_keywords tok) (unparse tok);
   329 
   344 
   606 fun scan_token keywords = !!! "bad input"
   621 fun scan_token keywords = !!! "bad input"
   607   (Symbol_Pos.scan_string_qq err_prefix >> token_range String ||
   622   (Symbol_Pos.scan_string_qq err_prefix >> token_range String ||
   608     Symbol_Pos.scan_string_bq err_prefix >> token_range Alt_String ||
   623     Symbol_Pos.scan_string_bq err_prefix >> token_range Alt_String ||
   609     scan_verbatim >> token_range Verbatim ||
   624     scan_verbatim >> token_range Verbatim ||
   610     scan_cartouche >> token_range Cartouche ||
   625     scan_cartouche >> token_range Cartouche ||
   611     scan_comment >> token_range Comment ||
   626     scan_comment >> token_range (Comment NONE) ||
       
   627     (Comment.scan_cancel || Comment.scan_latex) >> (fn (k, ss) => token (Comment (SOME k)) ss) ||
   612     scan_space >> token Space ||
   628     scan_space >> token Space ||
   613     (Scan.max token_leq
   629     (Scan.max token_leq
   614       (Scan.max token_leq
   630       (Scan.max token_leq
   615         (Scan.literal (Keyword.major_keywords keywords) >> pair Command)
   631         (Scan.literal (Keyword.major_keywords keywords) >> pair Command)
   616         (Scan.literal (Keyword.minor_keywords keywords) >> pair Keyword))
   632         (Scan.literal (Keyword.minor_keywords keywords) >> pair Keyword))