| author | blanchet | 
| Fri, 05 Sep 2014 00:41:01 +0200 | |
| changeset 58187 | d2ddd401d74d | 
| parent 57808 | cf72aed038c8 | 
| permissions | -rw-r--r-- | 
| 46844 | 1 | (* Title: HOL/TPTP/TPTP_Parser/tptp.lex | 
| 2 | Author: Nik Sultana, Cambridge University Computer Laboratory | |
| 3 | ||
| 4 | Notes: | |
| 5 | * Omit %full in definitions to restrict alphabet to ascii. | |
| 47358 | 6 | * Could include %posarg to ensure that we'd start counting character positions | 
| 7 | from 0, but it would punish performance. | |
| 46844 | 8 | * %s AF F COMMENT; -- could improve by making stateful. | 
| 9 | ||
| 10 | Acknowledgements: | |
| 11 | * Geoff Sutcliffe for help with TPTP. | |
| 12 | * Timothy Bourke for his tips on getting ML-Yacc working with Poly/ML. | |
| 13 | * An early version of this was ported from the specification shipped with | |
| 14 | Leo-II, written by Frank Theiss. | |
| 15 | * Some boilerplate bits were taken from the ml-yacc/ml-lex manual by Roger Price. | |
| 16 | * Jasmin Blanchette and Makarius Wenzel for help with Isabelle integration. | |
| 17 | *) | |
| 18 | ||
| 19 | structure T = Tokens | |
| 20 | type pos = int (* Position in file *) | |
| 21 | type lineNo = int | |
| 22 | type svalue = T.svalue | |
| 23 | type ('a,'b) token = ('a,'b) T.token
 | |
| 24 | type lexresult = (svalue,pos) token | |
| 25 | type lexarg = string | |
| 26 | type arg = lexarg | |
| 27 | val col = ref 0; | |
| 28 | val linep = ref 1; (* Line pointer *) | |
| 29 | val eolpos = ref 0; | |
| 30 | ||
| 31 | val badCh : string * string * int * int -> unit = fn | |
| 32 | (file_name, bad, line, col) => | |
| 33 | TextIO.output(TextIO.stdOut, file_name ^ "[" | |
| 34 | ^ Int.toString line ^ "." ^ Int.toString col | |
| 35 | ^ "] Invalid character \"" ^ bad ^ "\"\n"); | |
| 36 | ||
| 37 | val eof = fn file_name => | |
| 38 | let | |
| 39 | val result = T.EOF (!linep,!col); | |
| 40 | val _ = linep := 0; | |
| 41 | in result end | |
| 42 | (*here could check whether file ended prematurely: | |
| 43 | see if have open brackets, or if we're in some state other than INITIAL*) | |
| 44 | ||
| 45 | val count_commentlines : string -> unit = fn str => | |
| 46 | let | |
| 47 | val str' = String.explode str | |
| 48 | val newlines = List.filter (fn x => x = #"\n") str' | |
| 49 | in linep := (!linep) + (List.length newlines) end | |
| 50 | ||
| 51 | %% | |
| 52 | %header (functor TPTPLexFun(structure Tokens: TPTP_TOKENS)); | |
| 53 | %arg (file_name:string); | |
| 54 | ||
| 47358 | 55 | percentage_sign = "%"; | 
| 56 | double_quote = ["]; | |
| 47689 | 57 | do_char = ([^"\\]|[\\]["\\]); | 
| 47358 | 58 | single_quote = [']; | 
| 47689 | 59 | sq_char = ([^'\\]|[\\]['\\]); | 
| 47358 | 60 | sign = [+-]; | 
| 61 | dot = [.]; | |
| 62 | exponent = [Ee]; | |
| 63 | slash = [/]; | |
| 64 | zero_numeric = [0]; | |
| 65 | non_zero_numeric = [1-9]; | |
| 46844 | 66 | numeric = [0-9]; | 
| 67 | lower_alpha = [a-z]; | |
| 68 | upper_alpha = [A-Z]; | |
| 69 | alpha_numeric             = ({lower_alpha}|{upper_alpha}|{numeric}|_);
 | |
| 70 | dollar = \$; | |
| 47358 | 71 | printable_char = .; | 
| 72 | viewable_char = [.\n]; | |
| 73 | ||
| 74 | dot_decimal               = {dot}{numeric}+;
 | |
| 75 | ||
| 46844 | 76 | ddollar = \$\$; | 
| 77 | unsigned_integer          = {numeric}+;
 | |
| 78 | divide = [/]; | |
| 79 | ||
| 80 | signed_integer            = {sign}{unsigned_integer};
 | |
| 81 | exp_suffix                = {exponent}({signed_integer}|{unsigned_integer});
 | |
| 82 | real                      = ({signed_integer}|{unsigned_integer}){dot_decimal}{exp_suffix}?;
 | |
| 83 | rational                  = ({signed_integer}|{unsigned_integer}){divide}{unsigned_integer};
 | |
| 84 | ||
| 47358 | 85 | lower_word                = {lower_alpha}{alpha_numeric}*;
 | 
| 86 | upper_word                = {upper_alpha}{alpha_numeric}*;
 | |
| 87 | dollar_dollar_word        = {ddollar}{lower_word};
 | |
| 88 | dollar_word               = {dollar}{lower_word};
 | |
| 57808 | 89 | dollar_underscore         = {dollar}_;
 | 
| 46844 | 90 | |
| 47358 | 91 | distinct_object           = {double_quote}{do_char}+{double_quote};
 | 
| 46844 | 92 | |
| 93 | ws = ([\ ]|[\t]); | |
| 47689 | 94 | single_quoted             = {single_quote}{sq_char}+{single_quote};
 | 
| 46844 | 95 | |
| 96 | system_comment_one        = [%][\ ]*{ddollar}[_]*;
 | |
| 97 | system_comment_multi = [/][\*][\ ]*(ddollar)([^\*]*[\*][\*]*[^/\*])*[^\*]*[\*][\*]*[/]; | |
| 98 | system_comment = (system_comment_one)|(system_comment_multi); | |
| 99 | ||
| 47358 | 100 | comment_line              = {percentage_sign}[^\n]*;
 | 
| 101 | comment_block = [/][\*]([^\*]*[\*]+[^/\*])*[^\*]*[\*]+[/]; | |
| 102 | comment                   = ({comment_line}|{comment_block})+;
 | |
| 103 | ||
| 104 | eol                       = ("\013\010"|"\010"|"\013");
 | |
| 46844 | 105 | |
| 106 | %% | |
| 107 | ||
| 108 | {ws}*           => (col:=(!col)+size yytext; continue () );
 | |
| 109 | ||
| 110 | {eol}           => (linep:=(!linep)+1;
 | |
| 111 | eolpos:=yypos+size yytext; continue ()); | |
| 112 | ||
| 113 | "&" => (col:=yypos-(!eolpos); T.AMPERSAND(!linep,!col)); | |
| 114 | ||
| 115 | "@+" => (col:=yypos-(!eolpos); T.INDEF_CHOICE(!linep,!col)); | |
| 116 | "@-" => (col:=yypos-(!eolpos); T.DEFIN_CHOICE(!linep,!col)); | |
| 117 | ||
| 118 | "!!" => (col:=yypos-(!eolpos); T.OPERATOR_FORALL(!linep,!col)); | |
| 119 | "??" => (col:=yypos-(!eolpos); T.OPERATOR_EXISTS(!linep,!col)); | |
| 120 | ||
| 121 | "@" => (col:=yypos-(!eolpos); T.AT_SIGN(!linep,!col)); | |
| 122 | "^" => (col:=yypos-(!eolpos); T.CARET(!linep,!col)); | |
| 123 | ||
| 124 | ":" => (col:=yypos-(!eolpos); T.COLON(!linep,!col)); | |
| 125 | "," => (col:=yypos-(!eolpos); T.COMMA(!linep,!col)); | |
| 126 | "=" => (col:=yypos-(!eolpos); T.EQUALS(!linep,!col)); | |
| 127 | "!" => (col:=yypos-(!eolpos); T.EXCLAMATION(!linep,!col)); | |
| 128 | ":=" => (col:=yypos-(!eolpos); T.LET(!linep,!col)); | |
| 129 | ">" => (col:=yypos-(!eolpos); T.ARROW(!linep,!col)); | |
| 130 | ||
| 47357 
15e579392a68
refactored tptp yacc to bring close to official spec;
 sultana parents: 
46844diff
changeset | 131 | "<=" => (col:=yypos-(!eolpos); T.FI(!linep,!col)); | 
| 46844 | 132 | "<=>" => (col:=yypos-(!eolpos); T.IFF(!linep,!col)); | 
| 133 | "=>" => (col:=yypos-(!eolpos); T.IMPLIES(!linep,!col)); | |
| 134 | "[" => (col:=yypos-(!eolpos); T.LBRKT(!linep,!col)); | |
| 135 | "("             => (col:=yypos-(!eolpos); T.LPAREN(!linep,!col));
 | |
| 136 | "->" => (col:=yypos-(!eolpos); T.MAP_TO(!linep,!col)); | |
| 137 | "--" => (col:=yypos-(!eolpos); T.MMINUS(!linep,!col)); | |
| 138 | "~&" => (col:=yypos-(!eolpos); T.NAND(!linep,!col)); | |
| 139 | "!=" => (col:=yypos-(!eolpos); T.NEQUALS(!linep,!col)); | |
| 140 | "<~>" => (col:=yypos-(!eolpos); T.XOR(!linep,!col)); | |
| 141 | "~|" => (col:=yypos-(!eolpos); T.NOR(!linep,!col)); | |
| 142 | "." => (col:=yypos-(!eolpos); T.PERIOD(!linep,!col)); | |
| 143 | "++" => (col:=yypos-(!eolpos); T.PPLUS(!linep,!col)); | |
| 144 | "?" => (col:=yypos-(!eolpos); T.QUESTION(!linep,!col)); | |
| 145 | "]" => (col:=yypos-(!eolpos); T.RBRKT(!linep,!col)); | |
| 146 | ")" => (col:=yypos-(!eolpos); T.RPAREN(!linep,!col)); | |
| 147 | "~" => (col:=yypos-(!eolpos); T.TILDE(!linep,!col)); | |
| 148 | "|" => (col:=yypos-(!eolpos); T.VLINE(!linep,!col)); | |
| 149 | ||
| 150 | {distinct_object}    => (col:=yypos-(!eolpos); T.DISTINCT_OBJECT(yytext,!linep,!col));
 | |
| 151 | {rational}           => (col:=yypos-(!eolpos); T.RATIONAL(yytext,!linep,!col));
 | |
| 152 | {real}               => (col:=yypos-(!eolpos); T.REAL(yytext,!linep,!col));
 | |
| 153 | {signed_integer}     => (col:=yypos-(!eolpos); T.SIGNED_INTEGER(yytext,!linep,!col));
 | |
| 154 | {unsigned_integer}   => (col:=yypos-(!eolpos); T.UNSIGNED_INTEGER(yytext,!linep,!col));
 | |
| 155 | {dot_decimal}        => (col:=yypos-(!eolpos); T.DOT_DECIMAL(yytext,!linep,!col));
 | |
| 156 | {single_quoted}      => (col:=yypos-(!eolpos); T.SINGLE_QUOTED(yytext,!linep,!col));
 | |
| 157 | {upper_word}         => (col:=yypos-(!eolpos); T.UPPER_WORD(yytext,!linep,!col));
 | |
| 158 | {comment}            => (col:=yypos-(!eolpos); count_commentlines yytext;T.COMMENT(yytext,!linep,!col));
 | |
| 159 | ||
| 160 | "thf" => (col:=yypos-(!eolpos); T.THF(!linep,!col)); | |
| 161 | "fof" => (col:=yypos-(!eolpos); T.FOF(!linep,!col)); | |
| 162 | "cnf" => (col:=yypos-(!eolpos); T.CNF(!linep,!col)); | |
| 163 | "tff" => (col:=yypos-(!eolpos); T.TFF(!linep,!col)); | |
| 164 | "include" => (col:=yypos-(!eolpos); T.INCLUDE(!linep,!col)); | |
| 165 | ||
| 166 | "$thf" => (col:=yypos-(!eolpos); T.DTHF(!linep,!col)); | |
| 167 | "$fof" => (col:=yypos-(!eolpos); T.DFOF(!linep,!col)); | |
| 168 | "$cnf" => (col:=yypos-(!eolpos); T.DCNF(!linep,!col)); | |
| 169 | "$fot" => (col:=yypos-(!eolpos); T.DFOT(!linep,!col)); | |
| 170 | "$tff" => (col:=yypos-(!eolpos); T.DTFF(!linep,!col)); | |
| 171 | ||
| 172 | "$ite_f" => (col:=yypos-(!eolpos); T.ITE_F(!linep,!col)); | |
| 173 | "$ite_t" => (col:=yypos-(!eolpos); T.ITE_T(!linep,!col)); | |
| 47357 
15e579392a68
refactored tptp yacc to bring close to official spec;
 sultana parents: 
46844diff
changeset | 174 | "$let_tf" => (col:=yypos-(!eolpos); T.LET_TF(!linep,!col)); | 
| 
15e579392a68
refactored tptp yacc to bring close to official spec;
 sultana parents: 
46844diff
changeset | 175 | "$let_ff" => (col:=yypos-(!eolpos); T.LET_FF(!linep,!col)); | 
| 
15e579392a68
refactored tptp yacc to bring close to official spec;
 sultana parents: 
46844diff
changeset | 176 | "$let_ft" => (col:=yypos-(!eolpos); T.LET_FT(!linep,!col)); | 
| 
15e579392a68
refactored tptp yacc to bring close to official spec;
 sultana parents: 
46844diff
changeset | 177 | "$let_tt" => (col:=yypos-(!eolpos); T.LET_TT(!linep,!col)); | 
| 46844 | 178 | |
| 47358 | 179 | {lower_word}   => (col:=yypos-(!eolpos); T.LOWER_WORD(yytext,!linep,!col));
 | 
| 180 | {dollar_word}  => (col:=yypos-(!eolpos); T.DOLLAR_WORD(yytext,!linep,!col));
 | |
| 57808 | 181 | {dollar_underscore}  => (col:=yypos-(!eolpos); T.DOLLAR_WORD(yytext,!linep,!col));
 | 
| 47358 | 182 | {dollar_dollar_word}  => (col:=yypos-(!eolpos); T.DOLLAR_DOLLAR_WORD(yytext,!linep,!col));
 | 
| 46844 | 183 | |
| 184 | "+" => (col:=yypos-(!eolpos); T.PLUS(!linep,!col)); | |
| 185 | "*" => (col:=yypos-(!eolpos); T.TIMES(!linep,!col)); | |
| 186 | "-->" => (col:=yypos-(!eolpos); T.GENTZEN_ARROW(!linep,!col)); | |
| 187 | "<<" => (col:=yypos-(!eolpos); T.SUBTYPE(!linep,!col)); | |
| 188 | "!>" => (col:=yypos-(!eolpos); T.DEP_PROD(!linep,!col)); | |
| 189 | "?*" => (col:=yypos-(!eolpos); T.DEP_SUM(!linep,!col)); | |
| 190 | ||
| 191 | ":-" => (col:=yypos-(!eolpos); T.LET_TERM(!linep,!col)); |