| author | blanchet | 
| Thu, 07 Aug 2014 12:17:41 +0200 | |
| changeset 57808 | cf72aed038c8 | 
| parent 47689 | f5c05e51668f | 
| permissions | -rw-r--r-- | 
| 46844 | 1  | 
(* Title: HOL/TPTP/TPTP_Parser/tptp.lex  | 
2  | 
Author: Nik Sultana, Cambridge University Computer Laboratory  | 
|
3  | 
||
4  | 
Notes:  | 
|
5  | 
* Omit %full in definitions to restrict alphabet to ascii.  | 
|
| 47358 | 6  | 
* Could include %posarg to ensure that we'd start counting character positions  | 
7  | 
from 0, but it would punish performance.  | 
|
| 46844 | 8  | 
* %s AF F COMMENT; -- could improve by making stateful.  | 
9  | 
||
10  | 
Acknowledgements:  | 
|
11  | 
* Geoff Sutcliffe for help with TPTP.  | 
|
12  | 
* Timothy Bourke for his tips on getting ML-Yacc working with Poly/ML.  | 
|
13  | 
* An early version of this was ported from the specification shipped with  | 
|
14  | 
Leo-II, written by Frank Theiss.  | 
|
15  | 
* Some boilerplate bits were taken from the ml-yacc/ml-lex manual by Roger Price.  | 
|
16  | 
* Jasmin Blanchette and Makarius Wenzel for help with Isabelle integration.  | 
|
17  | 
*)  | 
|
18  | 
||
19  | 
structure T = Tokens  | 
|
20  | 
type pos = int (* Position in file *)  | 
|
21  | 
type lineNo = int  | 
|
22  | 
type svalue = T.svalue  | 
|
23  | 
type ('a,'b) token = ('a,'b) T.token
 | 
|
24  | 
type lexresult = (svalue,pos) token  | 
|
25  | 
type lexarg = string  | 
|
26  | 
type arg = lexarg  | 
|
27  | 
val col = ref 0;  | 
|
28  | 
val linep = ref 1; (* Line pointer *)  | 
|
29  | 
val eolpos = ref 0;  | 
|
30  | 
||
31  | 
val badCh : string * string * int * int -> unit = fn  | 
|
32  | 
(file_name, bad, line, col) =>  | 
|
33  | 
TextIO.output(TextIO.stdOut, file_name ^ "["  | 
|
34  | 
^ Int.toString line ^ "." ^ Int.toString col  | 
|
35  | 
^ "] Invalid character \"" ^ bad ^ "\"\n");  | 
|
36  | 
||
37  | 
val eof = fn file_name =>  | 
|
38  | 
let  | 
|
39  | 
val result = T.EOF (!linep,!col);  | 
|
40  | 
val _ = linep := 0;  | 
|
41  | 
in result end  | 
|
42  | 
(*here could check whether file ended prematurely:  | 
|
43  | 
see if have open brackets, or if we're in some state other than INITIAL*)  | 
|
44  | 
||
45  | 
val count_commentlines : string -> unit = fn str =>  | 
|
46  | 
let  | 
|
47  | 
val str' = String.explode str  | 
|
48  | 
val newlines = List.filter (fn x => x = #"\n") str'  | 
|
49  | 
in linep := (!linep) + (List.length newlines) end  | 
|
50  | 
||
51  | 
%%  | 
|
52  | 
%header (functor TPTPLexFun(structure Tokens: TPTP_TOKENS));  | 
|
53  | 
%arg (file_name:string);  | 
|
54  | 
||
| 47358 | 55  | 
percentage_sign = "%";  | 
56  | 
double_quote = ["];  | 
|
| 47689 | 57  | 
do_char = ([^"\\]|[\\]["\\]);  | 
| 47358 | 58  | 
single_quote = ['];  | 
| 47689 | 59  | 
sq_char = ([^'\\]|[\\]['\\]);  | 
| 47358 | 60  | 
sign = [+-];  | 
61  | 
dot = [.];  | 
|
62  | 
exponent = [Ee];  | 
|
63  | 
slash = [/];  | 
|
64  | 
zero_numeric = [0];  | 
|
65  | 
non_zero_numeric = [1-9];  | 
|
| 46844 | 66  | 
numeric = [0-9];  | 
67  | 
lower_alpha = [a-z];  | 
|
68  | 
upper_alpha = [A-Z];  | 
|
69  | 
alpha_numeric             = ({lower_alpha}|{upper_alpha}|{numeric}|_);
 | 
|
70  | 
dollar = \$;  | 
|
| 47358 | 71  | 
printable_char = .;  | 
72  | 
viewable_char = [.\n];  | 
|
73  | 
||
74  | 
dot_decimal               = {dot}{numeric}+;
 | 
|
75  | 
||
| 46844 | 76  | 
ddollar = \$\$;  | 
77  | 
unsigned_integer          = {numeric}+;
 | 
|
78  | 
divide = [/];  | 
|
79  | 
||
80  | 
signed_integer            = {sign}{unsigned_integer};
 | 
|
81  | 
exp_suffix                = {exponent}({signed_integer}|{unsigned_integer});
 | 
|
82  | 
real                      = ({signed_integer}|{unsigned_integer}){dot_decimal}{exp_suffix}?;
 | 
|
83  | 
rational                  = ({signed_integer}|{unsigned_integer}){divide}{unsigned_integer};
 | 
|
84  | 
||
| 47358 | 85  | 
lower_word                = {lower_alpha}{alpha_numeric}*;
 | 
86  | 
upper_word                = {upper_alpha}{alpha_numeric}*;
 | 
|
87  | 
dollar_dollar_word        = {ddollar}{lower_word};
 | 
|
88  | 
dollar_word               = {dollar}{lower_word};
 | 
|
| 57808 | 89  | 
dollar_underscore         = {dollar}_;
 | 
| 46844 | 90  | 
|
| 47358 | 91  | 
distinct_object           = {double_quote}{do_char}+{double_quote};
 | 
| 46844 | 92  | 
|
93  | 
ws = ([\ ]|[\t]);  | 
|
| 47689 | 94  | 
single_quoted             = {single_quote}{sq_char}+{single_quote};
 | 
| 46844 | 95  | 
|
96  | 
system_comment_one        = [%][\ ]*{ddollar}[_]*;
 | 
|
97  | 
system_comment_multi = [/][\*][\ ]*(ddollar)([^\*]*[\*][\*]*[^/\*])*[^\*]*[\*][\*]*[/];  | 
|
98  | 
system_comment = (system_comment_one)|(system_comment_multi);  | 
|
99  | 
||
| 47358 | 100  | 
comment_line              = {percentage_sign}[^\n]*;
 | 
101  | 
comment_block = [/][\*]([^\*]*[\*]+[^/\*])*[^\*]*[\*]+[/];  | 
|
102  | 
comment                   = ({comment_line}|{comment_block})+;
 | 
|
103  | 
||
104  | 
eol                       = ("\013\010"|"\010"|"\013");
 | 
|
| 46844 | 105  | 
|
106  | 
%%  | 
|
107  | 
||
108  | 
{ws}*           => (col:=(!col)+size yytext; continue () );
 | 
|
109  | 
||
110  | 
{eol}           => (linep:=(!linep)+1;
 | 
|
111  | 
eolpos:=yypos+size yytext; continue ());  | 
|
112  | 
||
113  | 
"&" => (col:=yypos-(!eolpos); T.AMPERSAND(!linep,!col));  | 
|
114  | 
||
115  | 
"@+" => (col:=yypos-(!eolpos); T.INDEF_CHOICE(!linep,!col));  | 
|
116  | 
"@-" => (col:=yypos-(!eolpos); T.DEFIN_CHOICE(!linep,!col));  | 
|
117  | 
||
118  | 
"!!" => (col:=yypos-(!eolpos); T.OPERATOR_FORALL(!linep,!col));  | 
|
119  | 
"??" => (col:=yypos-(!eolpos); T.OPERATOR_EXISTS(!linep,!col));  | 
|
120  | 
||
121  | 
"@" => (col:=yypos-(!eolpos); T.AT_SIGN(!linep,!col));  | 
|
122  | 
"^" => (col:=yypos-(!eolpos); T.CARET(!linep,!col));  | 
|
123  | 
||
124  | 
":" => (col:=yypos-(!eolpos); T.COLON(!linep,!col));  | 
|
125  | 
"," => (col:=yypos-(!eolpos); T.COMMA(!linep,!col));  | 
|
126  | 
"=" => (col:=yypos-(!eolpos); T.EQUALS(!linep,!col));  | 
|
127  | 
"!" => (col:=yypos-(!eolpos); T.EXCLAMATION(!linep,!col));  | 
|
128  | 
":=" => (col:=yypos-(!eolpos); T.LET(!linep,!col));  | 
|
129  | 
">" => (col:=yypos-(!eolpos); T.ARROW(!linep,!col));  | 
|
130  | 
||
| 
47357
 
15e579392a68
refactored tptp yacc to bring close to official spec;
 
sultana 
parents: 
46844 
diff
changeset
 | 
131  | 
"<=" => (col:=yypos-(!eolpos); T.FI(!linep,!col));  | 
| 46844 | 132  | 
"<=>" => (col:=yypos-(!eolpos); T.IFF(!linep,!col));  | 
133  | 
"=>" => (col:=yypos-(!eolpos); T.IMPLIES(!linep,!col));  | 
|
134  | 
"[" => (col:=yypos-(!eolpos); T.LBRKT(!linep,!col));  | 
|
135  | 
"("             => (col:=yypos-(!eolpos); T.LPAREN(!linep,!col));
 | 
|
136  | 
"->" => (col:=yypos-(!eolpos); T.MAP_TO(!linep,!col));  | 
|
137  | 
"--" => (col:=yypos-(!eolpos); T.MMINUS(!linep,!col));  | 
|
138  | 
"~&" => (col:=yypos-(!eolpos); T.NAND(!linep,!col));  | 
|
139  | 
"!=" => (col:=yypos-(!eolpos); T.NEQUALS(!linep,!col));  | 
|
140  | 
"<~>" => (col:=yypos-(!eolpos); T.XOR(!linep,!col));  | 
|
141  | 
"~|" => (col:=yypos-(!eolpos); T.NOR(!linep,!col));  | 
|
142  | 
"." => (col:=yypos-(!eolpos); T.PERIOD(!linep,!col));  | 
|
143  | 
"++" => (col:=yypos-(!eolpos); T.PPLUS(!linep,!col));  | 
|
144  | 
"?" => (col:=yypos-(!eolpos); T.QUESTION(!linep,!col));  | 
|
145  | 
"]" => (col:=yypos-(!eolpos); T.RBRKT(!linep,!col));  | 
|
146  | 
")" => (col:=yypos-(!eolpos); T.RPAREN(!linep,!col));  | 
|
147  | 
"~" => (col:=yypos-(!eolpos); T.TILDE(!linep,!col));  | 
|
148  | 
"|" => (col:=yypos-(!eolpos); T.VLINE(!linep,!col));  | 
|
149  | 
||
150  | 
{distinct_object}    => (col:=yypos-(!eolpos); T.DISTINCT_OBJECT(yytext,!linep,!col));
 | 
|
151  | 
{rational}           => (col:=yypos-(!eolpos); T.RATIONAL(yytext,!linep,!col));
 | 
|
152  | 
{real}               => (col:=yypos-(!eolpos); T.REAL(yytext,!linep,!col));
 | 
|
153  | 
{signed_integer}     => (col:=yypos-(!eolpos); T.SIGNED_INTEGER(yytext,!linep,!col));
 | 
|
154  | 
{unsigned_integer}   => (col:=yypos-(!eolpos); T.UNSIGNED_INTEGER(yytext,!linep,!col));
 | 
|
155  | 
{dot_decimal}        => (col:=yypos-(!eolpos); T.DOT_DECIMAL(yytext,!linep,!col));
 | 
|
156  | 
{single_quoted}      => (col:=yypos-(!eolpos); T.SINGLE_QUOTED(yytext,!linep,!col));
 | 
|
157  | 
{upper_word}         => (col:=yypos-(!eolpos); T.UPPER_WORD(yytext,!linep,!col));
 | 
|
158  | 
{comment}            => (col:=yypos-(!eolpos); count_commentlines yytext;T.COMMENT(yytext,!linep,!col));
 | 
|
159  | 
||
160  | 
"thf" => (col:=yypos-(!eolpos); T.THF(!linep,!col));  | 
|
161  | 
"fof" => (col:=yypos-(!eolpos); T.FOF(!linep,!col));  | 
|
162  | 
"cnf" => (col:=yypos-(!eolpos); T.CNF(!linep,!col));  | 
|
163  | 
"tff" => (col:=yypos-(!eolpos); T.TFF(!linep,!col));  | 
|
164  | 
"include" => (col:=yypos-(!eolpos); T.INCLUDE(!linep,!col));  | 
|
165  | 
||
166  | 
"$thf" => (col:=yypos-(!eolpos); T.DTHF(!linep,!col));  | 
|
167  | 
"$fof" => (col:=yypos-(!eolpos); T.DFOF(!linep,!col));  | 
|
168  | 
"$cnf" => (col:=yypos-(!eolpos); T.DCNF(!linep,!col));  | 
|
169  | 
"$fot" => (col:=yypos-(!eolpos); T.DFOT(!linep,!col));  | 
|
170  | 
"$tff" => (col:=yypos-(!eolpos); T.DTFF(!linep,!col));  | 
|
171  | 
||
172  | 
"$ite_f" => (col:=yypos-(!eolpos); T.ITE_F(!linep,!col));  | 
|
173  | 
"$ite_t" => (col:=yypos-(!eolpos); T.ITE_T(!linep,!col));  | 
|
| 
47357
 
15e579392a68
refactored tptp yacc to bring close to official spec;
 
sultana 
parents: 
46844 
diff
changeset
 | 
174  | 
"$let_tf" => (col:=yypos-(!eolpos); T.LET_TF(!linep,!col));  | 
| 
 
15e579392a68
refactored tptp yacc to bring close to official spec;
 
sultana 
parents: 
46844 
diff
changeset
 | 
175  | 
"$let_ff" => (col:=yypos-(!eolpos); T.LET_FF(!linep,!col));  | 
| 
 
15e579392a68
refactored tptp yacc to bring close to official spec;
 
sultana 
parents: 
46844 
diff
changeset
 | 
176  | 
"$let_ft" => (col:=yypos-(!eolpos); T.LET_FT(!linep,!col));  | 
| 
 
15e579392a68
refactored tptp yacc to bring close to official spec;
 
sultana 
parents: 
46844 
diff
changeset
 | 
177  | 
"$let_tt" => (col:=yypos-(!eolpos); T.LET_TT(!linep,!col));  | 
| 46844 | 178  | 
|
| 47358 | 179  | 
{lower_word}   => (col:=yypos-(!eolpos); T.LOWER_WORD(yytext,!linep,!col));
 | 
180  | 
{dollar_word}  => (col:=yypos-(!eolpos); T.DOLLAR_WORD(yytext,!linep,!col));
 | 
|
| 57808 | 181  | 
{dollar_underscore}  => (col:=yypos-(!eolpos); T.DOLLAR_WORD(yytext,!linep,!col));
 | 
| 47358 | 182  | 
{dollar_dollar_word}  => (col:=yypos-(!eolpos); T.DOLLAR_DOLLAR_WORD(yytext,!linep,!col));
 | 
| 46844 | 183  | 
|
184  | 
"+" => (col:=yypos-(!eolpos); T.PLUS(!linep,!col));  | 
|
185  | 
"*" => (col:=yypos-(!eolpos); T.TIMES(!linep,!col));  | 
|
186  | 
"-->" => (col:=yypos-(!eolpos); T.GENTZEN_ARROW(!linep,!col));  | 
|
187  | 
"<<" => (col:=yypos-(!eolpos); T.SUBTYPE(!linep,!col));  | 
|
188  | 
"!>" => (col:=yypos-(!eolpos); T.DEP_PROD(!linep,!col));  | 
|
189  | 
"?*" => (col:=yypos-(!eolpos); T.DEP_SUM(!linep,!col));  | 
|
190  | 
||
191  | 
":-" => (col:=yypos-(!eolpos); T.LET_TERM(!linep,!col));  |