|
46844
|
1 |
(* Title: HOL/TPTP/TPTP_Parser/tptp.lex
|
|
|
2 |
Author: Nik Sultana, Cambridge University Computer Laboratory
|
|
|
3 |
|
|
|
4 |
Notes:
|
|
|
5 |
* Omit %full in definitions to restrict alphabet to ascii.
|
|
|
6 |
* Could include %posarg to ensure that start counting character positions from
|
|
|
7 |
0, but it would punish performance.
|
|
|
8 |
* %s AF F COMMENT; -- could improve by making stateful.
|
|
|
9 |
|
|
|
10 |
Acknowledgements:
|
|
|
11 |
* Geoff Sutcliffe for help with TPTP.
|
|
|
12 |
* Timothy Bourke for his tips on getting ML-Yacc working with Poly/ML.
|
|
|
13 |
* An early version of this was ported from the specification shipped with
|
|
|
14 |
Leo-II, written by Frank Theiss.
|
|
|
15 |
* Some boilerplate bits were taken from the ml-yacc/ml-lex manual by Roger Price.
|
|
|
16 |
* Jasmin Blanchette and Makarius Wenzel for help with Isabelle integration.
|
|
|
17 |
*)
|
|
|
18 |
|
|
|
19 |
structure T = Tokens
|
|
|
20 |
type pos = int (* Position in file *)
|
|
|
21 |
type lineNo = int
|
|
|
22 |
type svalue = T.svalue
|
|
|
23 |
type ('a,'b) token = ('a,'b) T.token
|
|
|
24 |
type lexresult = (svalue,pos) token
|
|
|
25 |
type lexarg = string
|
|
|
26 |
type arg = lexarg
|
|
|
27 |
val col = ref 0;
|
|
|
28 |
val linep = ref 1; (* Line pointer *)
|
|
|
29 |
val eolpos = ref 0;
|
|
|
30 |
|
|
|
31 |
val badCh : string * string * int * int -> unit = fn
|
|
|
32 |
(file_name, bad, line, col) =>
|
|
|
33 |
TextIO.output(TextIO.stdOut, file_name ^ "["
|
|
|
34 |
^ Int.toString line ^ "." ^ Int.toString col
|
|
|
35 |
^ "] Invalid character \"" ^ bad ^ "\"\n");
|
|
|
36 |
|
|
|
37 |
val eof = fn file_name =>
|
|
|
38 |
let
|
|
|
39 |
val result = T.EOF (!linep,!col);
|
|
|
40 |
val _ = linep := 0;
|
|
|
41 |
in result end
|
|
|
42 |
(*here could check whether file ended prematurely:
|
|
|
43 |
see if have open brackets, or if we're in some state other than INITIAL*)
|
|
|
44 |
|
|
|
45 |
val count_commentlines : string -> unit = fn str =>
|
|
|
46 |
let
|
|
|
47 |
val str' = String.explode str
|
|
|
48 |
val newlines = List.filter (fn x => x = #"\n") str'
|
|
|
49 |
in linep := (!linep) + (List.length newlines) end
|
|
|
50 |
|
|
|
51 |
%%
|
|
|
52 |
%header (functor TPTPLexFun(structure Tokens: TPTP_TOKENS));
|
|
|
53 |
%arg (file_name:string);
|
|
|
54 |
|
|
|
55 |
printable_char = .;
|
|
|
56 |
viewable_char = [.\n];
|
|
|
57 |
numeric = [0-9];
|
|
|
58 |
lower_alpha = [a-z];
|
|
|
59 |
upper_alpha = [A-Z];
|
|
|
60 |
alpha_numeric = ({lower_alpha}|{upper_alpha}|{numeric}|_);
|
|
|
61 |
zero_numeric = [0];
|
|
|
62 |
non_zero_numeric = [1-9];
|
|
|
63 |
slash = [/];
|
|
|
64 |
exponent = [Ee];
|
|
|
65 |
dot = [.];
|
|
|
66 |
any_char = [^\n];
|
|
|
67 |
dollar = \$;
|
|
|
68 |
ddollar = \$\$;
|
|
|
69 |
unsigned_integer = {numeric}+;
|
|
|
70 |
sign = [+-];
|
|
|
71 |
divide = [/];
|
|
|
72 |
|
|
|
73 |
signed_integer = {sign}{unsigned_integer};
|
|
|
74 |
dot_decimal = {dot}{numeric}+;
|
|
|
75 |
exp_suffix = {exponent}({signed_integer}|{unsigned_integer});
|
|
|
76 |
real = ({signed_integer}|{unsigned_integer}){dot_decimal}{exp_suffix}?;
|
|
|
77 |
upper_word = {upper_alpha}{alpha_numeric}*;
|
|
|
78 |
rational = ({signed_integer}|{unsigned_integer}){divide}{unsigned_integer};
|
|
|
79 |
|
|
|
80 |
percentage_sign = "%";
|
|
|
81 |
|
|
|
82 |
sq_char = ([\040-\041\043-\126]|[\\]['\\]);
|
|
|
83 |
|
|
|
84 |
ws = ([\ ]|[\t]);
|
|
|
85 |
eol = ("\013\010"|"\010"|"\013");
|
|
|
86 |
|
|
|
87 |
single_quote = ['];
|
|
|
88 |
single_quoted = {single_quote}({alpha_numeric}|{sq_char}|{ws})+{single_quote};
|
|
|
89 |
|
|
|
90 |
lower_word = {lower_alpha}{alpha_numeric}*;
|
|
|
91 |
atomic_system_word = {ddollar}{lower_word};
|
|
|
92 |
atomic_defined_word = {dollar}{lower_word};
|
|
|
93 |
|
|
|
94 |
system_comment_one = [%][\ ]*{ddollar}[_]*;
|
|
|
95 |
system_comment_multi = [/][\*][\ ]*(ddollar)([^\*]*[\*][\*]*[^/\*])*[^\*]*[\*][\*]*[/];
|
|
|
96 |
system_comment = (system_comment_one)|(system_comment_multi);
|
|
|
97 |
comment_one = {percentage_sign}[^\n]*;
|
|
|
98 |
comment_multi = [/][\*]([^\*]*[\*]+[^/\*])*[^\*]*[\*]+[/];
|
|
|
99 |
comment = ({comment_one}|{comment_multi})+;
|
|
|
100 |
|
|
|
101 |
do_char = ([^"]|[\\]["\\]);
|
|
|
102 |
double_quote = ["];
|
|
|
103 |
distinct_object = {double_quote}{do_char}+{double_quote};
|
|
|
104 |
|
|
|
105 |
%%
|
|
|
106 |
|
|
|
107 |
{ws}* => (col:=(!col)+size yytext; continue () );
|
|
|
108 |
|
|
|
109 |
{eol} => (linep:=(!linep)+1;
|
|
|
110 |
eolpos:=yypos+size yytext; continue ());
|
|
|
111 |
|
|
|
112 |
"&" => (col:=yypos-(!eolpos); T.AMPERSAND(!linep,!col));
|
|
|
113 |
|
|
|
114 |
"@+" => (col:=yypos-(!eolpos); T.INDEF_CHOICE(!linep,!col));
|
|
|
115 |
"@-" => (col:=yypos-(!eolpos); T.DEFIN_CHOICE(!linep,!col));
|
|
|
116 |
|
|
|
117 |
"!!" => (col:=yypos-(!eolpos); T.OPERATOR_FORALL(!linep,!col));
|
|
|
118 |
"??" => (col:=yypos-(!eolpos); T.OPERATOR_EXISTS(!linep,!col));
|
|
|
119 |
|
|
|
120 |
"@" => (col:=yypos-(!eolpos); T.AT_SIGN(!linep,!col));
|
|
|
121 |
"^" => (col:=yypos-(!eolpos); T.CARET(!linep,!col));
|
|
|
122 |
|
|
|
123 |
":" => (col:=yypos-(!eolpos); T.COLON(!linep,!col));
|
|
|
124 |
"," => (col:=yypos-(!eolpos); T.COMMA(!linep,!col));
|
|
|
125 |
"=" => (col:=yypos-(!eolpos); T.EQUALS(!linep,!col));
|
|
|
126 |
"!" => (col:=yypos-(!eolpos); T.EXCLAMATION(!linep,!col));
|
|
|
127 |
":=" => (col:=yypos-(!eolpos); T.LET(!linep,!col));
|
|
|
128 |
">" => (col:=yypos-(!eolpos); T.ARROW(!linep,!col));
|
|
|
129 |
|
|
|
130 |
"<=" => (col:=yypos-(!eolpos); T.IF(!linep,!col));
|
|
|
131 |
"<=>" => (col:=yypos-(!eolpos); T.IFF(!linep,!col));
|
|
|
132 |
"=>" => (col:=yypos-(!eolpos); T.IMPLIES(!linep,!col));
|
|
|
133 |
"[" => (col:=yypos-(!eolpos); T.LBRKT(!linep,!col));
|
|
|
134 |
"(" => (col:=yypos-(!eolpos); T.LPAREN(!linep,!col));
|
|
|
135 |
"->" => (col:=yypos-(!eolpos); T.MAP_TO(!linep,!col));
|
|
|
136 |
"--" => (col:=yypos-(!eolpos); T.MMINUS(!linep,!col));
|
|
|
137 |
"~&" => (col:=yypos-(!eolpos); T.NAND(!linep,!col));
|
|
|
138 |
"!=" => (col:=yypos-(!eolpos); T.NEQUALS(!linep,!col));
|
|
|
139 |
"<~>" => (col:=yypos-(!eolpos); T.XOR(!linep,!col));
|
|
|
140 |
"~|" => (col:=yypos-(!eolpos); T.NOR(!linep,!col));
|
|
|
141 |
"." => (col:=yypos-(!eolpos); T.PERIOD(!linep,!col));
|
|
|
142 |
"++" => (col:=yypos-(!eolpos); T.PPLUS(!linep,!col));
|
|
|
143 |
"?" => (col:=yypos-(!eolpos); T.QUESTION(!linep,!col));
|
|
|
144 |
"]" => (col:=yypos-(!eolpos); T.RBRKT(!linep,!col));
|
|
|
145 |
")" => (col:=yypos-(!eolpos); T.RPAREN(!linep,!col));
|
|
|
146 |
"~" => (col:=yypos-(!eolpos); T.TILDE(!linep,!col));
|
|
|
147 |
"|" => (col:=yypos-(!eolpos); T.VLINE(!linep,!col));
|
|
|
148 |
|
|
|
149 |
{distinct_object} => (col:=yypos-(!eolpos); T.DISTINCT_OBJECT(yytext,!linep,!col));
|
|
|
150 |
{rational} => (col:=yypos-(!eolpos); T.RATIONAL(yytext,!linep,!col));
|
|
|
151 |
{real} => (col:=yypos-(!eolpos); T.REAL(yytext,!linep,!col));
|
|
|
152 |
{signed_integer} => (col:=yypos-(!eolpos); T.SIGNED_INTEGER(yytext,!linep,!col));
|
|
|
153 |
{unsigned_integer} => (col:=yypos-(!eolpos); T.UNSIGNED_INTEGER(yytext,!linep,!col));
|
|
|
154 |
{dot_decimal} => (col:=yypos-(!eolpos); T.DOT_DECIMAL(yytext,!linep,!col));
|
|
|
155 |
{single_quoted} => (col:=yypos-(!eolpos); T.SINGLE_QUOTED(yytext,!linep,!col));
|
|
|
156 |
{upper_word} => (col:=yypos-(!eolpos); T.UPPER_WORD(yytext,!linep,!col));
|
|
|
157 |
{comment} => (col:=yypos-(!eolpos); count_commentlines yytext;T.COMMENT(yytext,!linep,!col));
|
|
|
158 |
|
|
|
159 |
"thf" => (col:=yypos-(!eolpos); T.THF(!linep,!col));
|
|
|
160 |
"fof" => (col:=yypos-(!eolpos); T.FOF(!linep,!col));
|
|
|
161 |
"cnf" => (col:=yypos-(!eolpos); T.CNF(!linep,!col));
|
|
|
162 |
"tff" => (col:=yypos-(!eolpos); T.TFF(!linep,!col));
|
|
|
163 |
"include" => (col:=yypos-(!eolpos); T.INCLUDE(!linep,!col));
|
|
|
164 |
|
|
|
165 |
"$thf" => (col:=yypos-(!eolpos); T.DTHF(!linep,!col));
|
|
|
166 |
"$fof" => (col:=yypos-(!eolpos); T.DFOF(!linep,!col));
|
|
|
167 |
"$cnf" => (col:=yypos-(!eolpos); T.DCNF(!linep,!col));
|
|
|
168 |
"$fot" => (col:=yypos-(!eolpos); T.DFOT(!linep,!col));
|
|
|
169 |
"$tff" => (col:=yypos-(!eolpos); T.DTFF(!linep,!col));
|
|
|
170 |
|
|
|
171 |
"$ite_f" => (col:=yypos-(!eolpos); T.ITE_F(!linep,!col));
|
|
|
172 |
"$ite_t" => (col:=yypos-(!eolpos); T.ITE_T(!linep,!col));
|
|
|
173 |
|
|
|
174 |
{lower_word} => (col:=yypos-(!eolpos); T.LOWER_WORD(yytext,!linep,!col));
|
|
|
175 |
{atomic_system_word} => (col:=yypos-(!eolpos); T.ATOMIC_SYSTEM_WORD(yytext,!linep,!col));
|
|
|
176 |
{atomic_defined_word} => (col:=yypos-(!eolpos); T.ATOMIC_DEFINED_WORD(yytext,!linep,!col));
|
|
|
177 |
|
|
|
178 |
"+" => (col:=yypos-(!eolpos); T.PLUS(!linep,!col));
|
|
|
179 |
"*" => (col:=yypos-(!eolpos); T.TIMES(!linep,!col));
|
|
|
180 |
"-->" => (col:=yypos-(!eolpos); T.GENTZEN_ARROW(!linep,!col));
|
|
|
181 |
"<<" => (col:=yypos-(!eolpos); T.SUBTYPE(!linep,!col));
|
|
|
182 |
"!>" => (col:=yypos-(!eolpos); T.DEP_PROD(!linep,!col));
|
|
|
183 |
"?*" => (col:=yypos-(!eolpos); T.DEP_SUM(!linep,!col));
|
|
|
184 |
|
|
|
185 |
":-" => (col:=yypos-(!eolpos); T.LET_TERM(!linep,!col));
|