src/HOL/TPTP/TPTP_Parser/tptp.lex
changeset 46844 5d9aab0c609c
child 47357 15e579392a68
equal deleted inserted replaced
46843:8d5d255bf89c 46844:5d9aab0c609c
       
     1 (*  Title:      HOL/TPTP/TPTP_Parser/tptp.lex
       
     2     Author:     Nik Sultana, Cambridge University Computer Laboratory
       
     3 
       
     4  Notes:
       
     5  * Omit %full in definitions to restrict alphabet to ascii.
       
     6  * Could include %posarg to ensure that start counting character positions from
       
     7    0, but it would punish performance.
       
     8  * %s AF F COMMENT; -- could improve by making stateful.
       
     9 
       
    10  Acknowledgements:
       
    11  * Geoff Sutcliffe for help with TPTP.
       
    12  * Timothy Bourke for his tips on getting ML-Yacc working with Poly/ML.
       
    13  * An early version of this was ported from the specification shipped with
       
    14    Leo-II, written by Frank Theiss.
       
    15  * Some boilerplate bits were taken from the ml-yacc/ml-lex manual by Roger Price.
       
    16  * Jasmin Blanchette and Makarius Wenzel for help with Isabelle integration.
       
    17 *)
       
    18 
       
    19 structure T = Tokens
       
    20 type pos = int             (* Position in file *)
       
    21 type lineNo = int
       
    22 type svalue = T.svalue
       
    23 type ('a,'b) token = ('a,'b) T.token
       
    24 type lexresult = (svalue,pos) token
       
    25 type lexarg = string
       
    26 type arg = lexarg
       
    27 val col = ref 0;
       
    28 val linep = ref 1;         (* Line pointer *)
       
    29 val eolpos = ref 0;
       
    30 
       
    31 val badCh : string * string * int * int -> unit = fn
       
    32     (file_name, bad, line, col) =>
       
    33     TextIO.output(TextIO.stdOut, file_name ^ "["
       
    34           ^ Int.toString line ^ "." ^ Int.toString col
       
    35           ^ "] Invalid character \"" ^ bad ^ "\"\n");
       
    36 
       
    37 val eof = fn file_name =>
       
    38   let
       
    39     val result = T.EOF (!linep,!col);
       
    40     val _ = linep := 0;
       
    41   in result end
       
    42 (*here could check whether file ended prematurely:
       
    43  see if have open brackets, or if we're in some state other than INITIAL*)
       
    44 
       
    45 val count_commentlines : string -> unit = fn str =>
       
    46   let
       
    47     val str' = String.explode str
       
    48     val newlines = List.filter (fn x => x = #"\n") str'
       
    49   in linep := (!linep) + (List.length newlines) end
       
    50 
       
    51 %%
       
    52 %header (functor TPTPLexFun(structure Tokens: TPTP_TOKENS));
       
    53 %arg (file_name:string);
       
    54 
       
    55 printable_char            = .;
       
    56 viewable_char             = [.\n];
       
    57 numeric                   = [0-9];
       
    58 lower_alpha               = [a-z];
       
    59 upper_alpha               = [A-Z];
       
    60 alpha_numeric             = ({lower_alpha}|{upper_alpha}|{numeric}|_);
       
    61 zero_numeric              = [0];
       
    62 non_zero_numeric          = [1-9];
       
    63 slash                     = [/];
       
    64 exponent                  = [Ee];
       
    65 dot                       = [.];
       
    66 any_char                  = [^\n];
       
    67 dollar                    = \$;
       
    68 ddollar                   = \$\$;
       
    69 unsigned_integer          = {numeric}+;
       
    70 sign                      = [+-];
       
    71 divide                    = [/];
       
    72 
       
    73 signed_integer            = {sign}{unsigned_integer};
       
    74 dot_decimal               = {dot}{numeric}+;
       
    75 exp_suffix                = {exponent}({signed_integer}|{unsigned_integer});
       
    76 real                      = ({signed_integer}|{unsigned_integer}){dot_decimal}{exp_suffix}?;
       
    77 upper_word                = {upper_alpha}{alpha_numeric}*;
       
    78 rational                  = ({signed_integer}|{unsigned_integer}){divide}{unsigned_integer};
       
    79 
       
    80 percentage_sign           = "%";
       
    81 
       
    82 sq_char                   = ([\040-\041\043-\126]|[\\]['\\]);
       
    83 
       
    84 ws                        = ([\ ]|[\t]);
       
    85 eol                       = ("\013\010"|"\010"|"\013");
       
    86 
       
    87 single_quote              = ['];
       
    88 single_quoted             = {single_quote}({alpha_numeric}|{sq_char}|{ws})+{single_quote};
       
    89 
       
    90 lower_word                = {lower_alpha}{alpha_numeric}*;
       
    91 atomic_system_word        = {ddollar}{lower_word};
       
    92 atomic_defined_word       = {dollar}{lower_word};
       
    93 
       
    94 system_comment_one        = [%][\ ]*{ddollar}[_]*;
       
    95 system_comment_multi      = [/][\*][\ ]*(ddollar)([^\*]*[\*][\*]*[^/\*])*[^\*]*[\*][\*]*[/];
       
    96 system_comment            = (system_comment_one)|(system_comment_multi);
       
    97 comment_one               = {percentage_sign}[^\n]*;
       
    98 comment_multi             = [/][\*]([^\*]*[\*]+[^/\*])*[^\*]*[\*]+[/];
       
    99 comment                   = ({comment_one}|{comment_multi})+;
       
   100 
       
   101 do_char                   = ([^"]|[\\]["\\]);
       
   102 double_quote              = ["];
       
   103 distinct_object           = {double_quote}{do_char}+{double_quote};
       
   104 
       
   105 %%
       
   106 
       
   107 {ws}*           => (col:=(!col)+size yytext; continue () );
       
   108 
       
   109 {eol}           => (linep:=(!linep)+1;
       
   110                    eolpos:=yypos+size yytext; continue ());
       
   111 
       
   112 "&"             => (col:=yypos-(!eolpos); T.AMPERSAND(!linep,!col));
       
   113 
       
   114 "@+"            => (col:=yypos-(!eolpos); T.INDEF_CHOICE(!linep,!col));
       
   115 "@-"            => (col:=yypos-(!eolpos); T.DEFIN_CHOICE(!linep,!col));
       
   116 
       
   117 "!!"            => (col:=yypos-(!eolpos); T.OPERATOR_FORALL(!linep,!col));
       
   118 "??"            => (col:=yypos-(!eolpos); T.OPERATOR_EXISTS(!linep,!col));
       
   119 
       
   120 "@"             => (col:=yypos-(!eolpos); T.AT_SIGN(!linep,!col));
       
   121 "^"             => (col:=yypos-(!eolpos); T.CARET(!linep,!col));
       
   122 
       
   123 ":"             => (col:=yypos-(!eolpos); T.COLON(!linep,!col));
       
   124 ","             => (col:=yypos-(!eolpos); T.COMMA(!linep,!col));
       
   125 "="             => (col:=yypos-(!eolpos); T.EQUALS(!linep,!col));
       
   126 "!"             => (col:=yypos-(!eolpos); T.EXCLAMATION(!linep,!col));
       
   127 ":="            => (col:=yypos-(!eolpos); T.LET(!linep,!col));
       
   128 ">"             => (col:=yypos-(!eolpos); T.ARROW(!linep,!col));
       
   129 
       
   130 "<="            => (col:=yypos-(!eolpos); T.IF(!linep,!col));
       
   131 "<=>"           => (col:=yypos-(!eolpos); T.IFF(!linep,!col));
       
   132 "=>"            => (col:=yypos-(!eolpos); T.IMPLIES(!linep,!col));
       
   133 "["             => (col:=yypos-(!eolpos); T.LBRKT(!linep,!col));
       
   134 "("             => (col:=yypos-(!eolpos); T.LPAREN(!linep,!col));
       
   135 "->"            => (col:=yypos-(!eolpos); T.MAP_TO(!linep,!col));
       
   136 "--"            => (col:=yypos-(!eolpos); T.MMINUS(!linep,!col));
       
   137 "~&"            => (col:=yypos-(!eolpos); T.NAND(!linep,!col));
       
   138 "!="            => (col:=yypos-(!eolpos); T.NEQUALS(!linep,!col));
       
   139 "<~>"           => (col:=yypos-(!eolpos); T.XOR(!linep,!col));
       
   140 "~|"            => (col:=yypos-(!eolpos); T.NOR(!linep,!col));
       
   141 "."             => (col:=yypos-(!eolpos); T.PERIOD(!linep,!col));
       
   142 "++"            => (col:=yypos-(!eolpos); T.PPLUS(!linep,!col));
       
   143 "?"             => (col:=yypos-(!eolpos); T.QUESTION(!linep,!col));
       
   144 "]"             => (col:=yypos-(!eolpos); T.RBRKT(!linep,!col));
       
   145 ")"             => (col:=yypos-(!eolpos); T.RPAREN(!linep,!col));
       
   146 "~"             => (col:=yypos-(!eolpos); T.TILDE(!linep,!col));
       
   147 "|"             => (col:=yypos-(!eolpos); T.VLINE(!linep,!col));
       
   148 
       
   149 {distinct_object}    => (col:=yypos-(!eolpos); T.DISTINCT_OBJECT(yytext,!linep,!col));
       
   150 {rational}           => (col:=yypos-(!eolpos); T.RATIONAL(yytext,!linep,!col));
       
   151 {real}               => (col:=yypos-(!eolpos); T.REAL(yytext,!linep,!col));
       
   152 {signed_integer}     => (col:=yypos-(!eolpos); T.SIGNED_INTEGER(yytext,!linep,!col));
       
   153 {unsigned_integer}   => (col:=yypos-(!eolpos); T.UNSIGNED_INTEGER(yytext,!linep,!col));
       
   154 {dot_decimal}        => (col:=yypos-(!eolpos); T.DOT_DECIMAL(yytext,!linep,!col));
       
   155 {single_quoted}      => (col:=yypos-(!eolpos); T.SINGLE_QUOTED(yytext,!linep,!col));
       
   156 {upper_word}         => (col:=yypos-(!eolpos); T.UPPER_WORD(yytext,!linep,!col));
       
   157 {comment}            => (col:=yypos-(!eolpos); count_commentlines yytext;T.COMMENT(yytext,!linep,!col));
       
   158 
       
   159 "thf"          => (col:=yypos-(!eolpos); T.THF(!linep,!col));
       
   160 "fof"          => (col:=yypos-(!eolpos); T.FOF(!linep,!col));
       
   161 "cnf"          => (col:=yypos-(!eolpos); T.CNF(!linep,!col));
       
   162 "tff"          => (col:=yypos-(!eolpos); T.TFF(!linep,!col));
       
   163 "include"      => (col:=yypos-(!eolpos); T.INCLUDE(!linep,!col));
       
   164 
       
   165 "$thf"          => (col:=yypos-(!eolpos); T.DTHF(!linep,!col));
       
   166 "$fof"          => (col:=yypos-(!eolpos); T.DFOF(!linep,!col));
       
   167 "$cnf"          => (col:=yypos-(!eolpos); T.DCNF(!linep,!col));
       
   168 "$fot"          => (col:=yypos-(!eolpos); T.DFOT(!linep,!col));
       
   169 "$tff"          => (col:=yypos-(!eolpos); T.DTFF(!linep,!col));
       
   170 
       
   171 "$ite_f"        => (col:=yypos-(!eolpos); T.ITE_F(!linep,!col));
       
   172 "$ite_t"        => (col:=yypos-(!eolpos); T.ITE_T(!linep,!col));
       
   173 
       
   174 {lower_word}          => (col:=yypos-(!eolpos); T.LOWER_WORD(yytext,!linep,!col));
       
   175 {atomic_system_word}  => (col:=yypos-(!eolpos); T.ATOMIC_SYSTEM_WORD(yytext,!linep,!col));
       
   176 {atomic_defined_word} => (col:=yypos-(!eolpos); T.ATOMIC_DEFINED_WORD(yytext,!linep,!col));
       
   177 
       
   178 "+"           => (col:=yypos-(!eolpos); T.PLUS(!linep,!col));
       
   179 "*"           => (col:=yypos-(!eolpos); T.TIMES(!linep,!col));
       
   180 "-->"         => (col:=yypos-(!eolpos); T.GENTZEN_ARROW(!linep,!col));
       
   181 "<<"          => (col:=yypos-(!eolpos); T.SUBTYPE(!linep,!col));
       
   182 "!>"          => (col:=yypos-(!eolpos); T.DEP_PROD(!linep,!col));
       
   183 "?*"          => (col:=yypos-(!eolpos); T.DEP_SUM(!linep,!col));
       
   184 
       
   185 ":-"          => (col:=yypos-(!eolpos); T.LET_TERM(!linep,!col));