author | haftmann |
Mon, 22 Apr 2019 06:28:17 +0000 | |
changeset 70190 | ff9efdc84289 |
parent 57808 | cf72aed038c8 |
permissions | -rw-r--r-- |
46844 | 1 |
(* Title: HOL/TPTP/TPTP_Parser/tptp.lex |
2 |
Author: Nik Sultana, Cambridge University Computer Laboratory |
|
3 |
||
4 |
Notes: |
|
5 |
* Omit %full in definitions to restrict alphabet to ascii. |
|
47358 | 6 |
* Could include %posarg to ensure that we'd start counting character positions |
7 |
from 0, but it would punish performance. |
|
46844 | 8 |
* %s AF F COMMENT; -- could improve by making stateful. |
9 |
||
10 |
Acknowledgements: |
|
11 |
* Geoff Sutcliffe for help with TPTP. |
|
12 |
* Timothy Bourke for his tips on getting ML-Yacc working with Poly/ML. |
|
13 |
* An early version of this was ported from the specification shipped with |
|
14 |
Leo-II, written by Frank Theiss. |
|
15 |
* Some boilerplate bits were taken from the ml-yacc/ml-lex manual by Roger Price. |
|
16 |
* Jasmin Blanchette and Makarius Wenzel for help with Isabelle integration. |
|
17 |
*) |
|
18 |
||
19 |
structure T = Tokens |
|
20 |
type pos = int (* Position in file *) |
|
21 |
type lineNo = int |
|
22 |
type svalue = T.svalue |
|
23 |
type ('a,'b) token = ('a,'b) T.token |
|
24 |
type lexresult = (svalue,pos) token |
|
25 |
type lexarg = string |
|
26 |
type arg = lexarg |
|
27 |
val col = ref 0; |
|
28 |
val linep = ref 1; (* Line pointer *) |
|
29 |
val eolpos = ref 0; |
|
30 |
||
31 |
val badCh : string * string * int * int -> unit = fn |
|
32 |
(file_name, bad, line, col) => |
|
33 |
TextIO.output(TextIO.stdOut, file_name ^ "[" |
|
34 |
^ Int.toString line ^ "." ^ Int.toString col |
|
35 |
^ "] Invalid character \"" ^ bad ^ "\"\n"); |
|
36 |
||
37 |
val eof = fn file_name => |
|
38 |
let |
|
39 |
val result = T.EOF (!linep,!col); |
|
40 |
val _ = linep := 0; |
|
41 |
in result end |
|
42 |
(*here could check whether file ended prematurely: |
|
43 |
see if have open brackets, or if we're in some state other than INITIAL*) |
|
44 |
||
45 |
val count_commentlines : string -> unit = fn str => |
|
46 |
let |
|
47 |
val str' = String.explode str |
|
48 |
val newlines = List.filter (fn x => x = #"\n") str' |
|
49 |
in linep := (!linep) + (List.length newlines) end |
|
50 |
||
51 |
%% |
|
52 |
%header (functor TPTPLexFun(structure Tokens: TPTP_TOKENS)); |
|
53 |
%arg (file_name:string); |
|
54 |
||
47358 | 55 |
percentage_sign = "%"; |
56 |
double_quote = ["]; |
|
47689 | 57 |
do_char = ([^"\\]|[\\]["\\]); |
47358 | 58 |
single_quote = [']; |
47689 | 59 |
sq_char = ([^'\\]|[\\]['\\]); |
47358 | 60 |
sign = [+-]; |
61 |
dot = [.]; |
|
62 |
exponent = [Ee]; |
|
63 |
slash = [/]; |
|
64 |
zero_numeric = [0]; |
|
65 |
non_zero_numeric = [1-9]; |
|
46844 | 66 |
numeric = [0-9]; |
67 |
lower_alpha = [a-z]; |
|
68 |
upper_alpha = [A-Z]; |
|
69 |
alpha_numeric = ({lower_alpha}|{upper_alpha}|{numeric}|_); |
|
70 |
dollar = \$; |
|
47358 | 71 |
printable_char = .; |
72 |
viewable_char = [.\n]; |
|
73 |
||
74 |
dot_decimal = {dot}{numeric}+; |
|
75 |
||
46844 | 76 |
ddollar = \$\$; |
77 |
unsigned_integer = {numeric}+; |
|
78 |
divide = [/]; |
|
79 |
||
80 |
signed_integer = {sign}{unsigned_integer}; |
|
81 |
exp_suffix = {exponent}({signed_integer}|{unsigned_integer}); |
|
82 |
real = ({signed_integer}|{unsigned_integer}){dot_decimal}{exp_suffix}?; |
|
83 |
rational = ({signed_integer}|{unsigned_integer}){divide}{unsigned_integer}; |
|
84 |
||
47358 | 85 |
lower_word = {lower_alpha}{alpha_numeric}*; |
86 |
upper_word = {upper_alpha}{alpha_numeric}*; |
|
87 |
dollar_dollar_word = {ddollar}{lower_word}; |
|
88 |
dollar_word = {dollar}{lower_word}; |
|
57808 | 89 |
dollar_underscore = {dollar}_; |
46844 | 90 |
|
47358 | 91 |
distinct_object = {double_quote}{do_char}+{double_quote}; |
46844 | 92 |
|
93 |
ws = ([\ ]|[\t]); |
|
47689 | 94 |
single_quoted = {single_quote}{sq_char}+{single_quote}; |
46844 | 95 |
|
96 |
system_comment_one = [%][\ ]*{ddollar}[_]*; |
|
97 |
system_comment_multi = [/][\*][\ ]*(ddollar)([^\*]*[\*][\*]*[^/\*])*[^\*]*[\*][\*]*[/]; |
|
98 |
system_comment = (system_comment_one)|(system_comment_multi); |
|
99 |
||
47358 | 100 |
comment_line = {percentage_sign}[^\n]*; |
101 |
comment_block = [/][\*]([^\*]*[\*]+[^/\*])*[^\*]*[\*]+[/]; |
|
102 |
comment = ({comment_line}|{comment_block})+; |
|
103 |
||
104 |
eol = ("\013\010"|"\010"|"\013"); |
|
46844 | 105 |
|
106 |
%% |
|
107 |
||
108 |
{ws}* => (col:=(!col)+size yytext; continue () ); |
|
109 |
||
110 |
{eol} => (linep:=(!linep)+1; |
|
111 |
eolpos:=yypos+size yytext; continue ()); |
|
112 |
||
113 |
"&" => (col:=yypos-(!eolpos); T.AMPERSAND(!linep,!col)); |
|
114 |
||
115 |
"@+" => (col:=yypos-(!eolpos); T.INDEF_CHOICE(!linep,!col)); |
|
116 |
"@-" => (col:=yypos-(!eolpos); T.DEFIN_CHOICE(!linep,!col)); |
|
117 |
||
118 |
"!!" => (col:=yypos-(!eolpos); T.OPERATOR_FORALL(!linep,!col)); |
|
119 |
"??" => (col:=yypos-(!eolpos); T.OPERATOR_EXISTS(!linep,!col)); |
|
120 |
||
121 |
"@" => (col:=yypos-(!eolpos); T.AT_SIGN(!linep,!col)); |
|
122 |
"^" => (col:=yypos-(!eolpos); T.CARET(!linep,!col)); |
|
123 |
||
124 |
":" => (col:=yypos-(!eolpos); T.COLON(!linep,!col)); |
|
125 |
"," => (col:=yypos-(!eolpos); T.COMMA(!linep,!col)); |
|
126 |
"=" => (col:=yypos-(!eolpos); T.EQUALS(!linep,!col)); |
|
127 |
"!" => (col:=yypos-(!eolpos); T.EXCLAMATION(!linep,!col)); |
|
128 |
":=" => (col:=yypos-(!eolpos); T.LET(!linep,!col)); |
|
129 |
">" => (col:=yypos-(!eolpos); T.ARROW(!linep,!col)); |
|
130 |
||
47357
15e579392a68
refactored tptp yacc to bring close to official spec;
sultana
parents:
46844
diff
changeset
|
131 |
"<=" => (col:=yypos-(!eolpos); T.FI(!linep,!col)); |
46844 | 132 |
"<=>" => (col:=yypos-(!eolpos); T.IFF(!linep,!col)); |
133 |
"=>" => (col:=yypos-(!eolpos); T.IMPLIES(!linep,!col)); |
|
134 |
"[" => (col:=yypos-(!eolpos); T.LBRKT(!linep,!col)); |
|
135 |
"(" => (col:=yypos-(!eolpos); T.LPAREN(!linep,!col)); |
|
136 |
"->" => (col:=yypos-(!eolpos); T.MAP_TO(!linep,!col)); |
|
137 |
"--" => (col:=yypos-(!eolpos); T.MMINUS(!linep,!col)); |
|
138 |
"~&" => (col:=yypos-(!eolpos); T.NAND(!linep,!col)); |
|
139 |
"!=" => (col:=yypos-(!eolpos); T.NEQUALS(!linep,!col)); |
|
140 |
"<~>" => (col:=yypos-(!eolpos); T.XOR(!linep,!col)); |
|
141 |
"~|" => (col:=yypos-(!eolpos); T.NOR(!linep,!col)); |
|
142 |
"." => (col:=yypos-(!eolpos); T.PERIOD(!linep,!col)); |
|
143 |
"++" => (col:=yypos-(!eolpos); T.PPLUS(!linep,!col)); |
|
144 |
"?" => (col:=yypos-(!eolpos); T.QUESTION(!linep,!col)); |
|
145 |
"]" => (col:=yypos-(!eolpos); T.RBRKT(!linep,!col)); |
|
146 |
")" => (col:=yypos-(!eolpos); T.RPAREN(!linep,!col)); |
|
147 |
"~" => (col:=yypos-(!eolpos); T.TILDE(!linep,!col)); |
|
148 |
"|" => (col:=yypos-(!eolpos); T.VLINE(!linep,!col)); |
|
149 |
||
150 |
{distinct_object} => (col:=yypos-(!eolpos); T.DISTINCT_OBJECT(yytext,!linep,!col)); |
|
151 |
{rational} => (col:=yypos-(!eolpos); T.RATIONAL(yytext,!linep,!col)); |
|
152 |
{real} => (col:=yypos-(!eolpos); T.REAL(yytext,!linep,!col)); |
|
153 |
{signed_integer} => (col:=yypos-(!eolpos); T.SIGNED_INTEGER(yytext,!linep,!col)); |
|
154 |
{unsigned_integer} => (col:=yypos-(!eolpos); T.UNSIGNED_INTEGER(yytext,!linep,!col)); |
|
155 |
{dot_decimal} => (col:=yypos-(!eolpos); T.DOT_DECIMAL(yytext,!linep,!col)); |
|
156 |
{single_quoted} => (col:=yypos-(!eolpos); T.SINGLE_QUOTED(yytext,!linep,!col)); |
|
157 |
{upper_word} => (col:=yypos-(!eolpos); T.UPPER_WORD(yytext,!linep,!col)); |
|
158 |
{comment} => (col:=yypos-(!eolpos); count_commentlines yytext;T.COMMENT(yytext,!linep,!col)); |
|
159 |
||
160 |
"thf" => (col:=yypos-(!eolpos); T.THF(!linep,!col)); |
|
161 |
"fof" => (col:=yypos-(!eolpos); T.FOF(!linep,!col)); |
|
162 |
"cnf" => (col:=yypos-(!eolpos); T.CNF(!linep,!col)); |
|
163 |
"tff" => (col:=yypos-(!eolpos); T.TFF(!linep,!col)); |
|
164 |
"include" => (col:=yypos-(!eolpos); T.INCLUDE(!linep,!col)); |
|
165 |
||
166 |
"$thf" => (col:=yypos-(!eolpos); T.DTHF(!linep,!col)); |
|
167 |
"$fof" => (col:=yypos-(!eolpos); T.DFOF(!linep,!col)); |
|
168 |
"$cnf" => (col:=yypos-(!eolpos); T.DCNF(!linep,!col)); |
|
169 |
"$fot" => (col:=yypos-(!eolpos); T.DFOT(!linep,!col)); |
|
170 |
"$tff" => (col:=yypos-(!eolpos); T.DTFF(!linep,!col)); |
|
171 |
||
172 |
"$ite_f" => (col:=yypos-(!eolpos); T.ITE_F(!linep,!col)); |
|
173 |
"$ite_t" => (col:=yypos-(!eolpos); T.ITE_T(!linep,!col)); |
|
47357
15e579392a68
refactored tptp yacc to bring close to official spec;
sultana
parents:
46844
diff
changeset
|
174 |
"$let_tf" => (col:=yypos-(!eolpos); T.LET_TF(!linep,!col)); |
15e579392a68
refactored tptp yacc to bring close to official spec;
sultana
parents:
46844
diff
changeset
|
175 |
"$let_ff" => (col:=yypos-(!eolpos); T.LET_FF(!linep,!col)); |
15e579392a68
refactored tptp yacc to bring close to official spec;
sultana
parents:
46844
diff
changeset
|
176 |
"$let_ft" => (col:=yypos-(!eolpos); T.LET_FT(!linep,!col)); |
15e579392a68
refactored tptp yacc to bring close to official spec;
sultana
parents:
46844
diff
changeset
|
177 |
"$let_tt" => (col:=yypos-(!eolpos); T.LET_TT(!linep,!col)); |
46844 | 178 |
|
47358 | 179 |
{lower_word} => (col:=yypos-(!eolpos); T.LOWER_WORD(yytext,!linep,!col)); |
180 |
{dollar_word} => (col:=yypos-(!eolpos); T.DOLLAR_WORD(yytext,!linep,!col)); |
|
57808 | 181 |
{dollar_underscore} => (col:=yypos-(!eolpos); T.DOLLAR_WORD(yytext,!linep,!col)); |
47358 | 182 |
{dollar_dollar_word} => (col:=yypos-(!eolpos); T.DOLLAR_DOLLAR_WORD(yytext,!linep,!col)); |
46844 | 183 |
|
184 |
"+" => (col:=yypos-(!eolpos); T.PLUS(!linep,!col)); |
|
185 |
"*" => (col:=yypos-(!eolpos); T.TIMES(!linep,!col)); |
|
186 |
"-->" => (col:=yypos-(!eolpos); T.GENTZEN_ARROW(!linep,!col)); |
|
187 |
"<<" => (col:=yypos-(!eolpos); T.SUBTYPE(!linep,!col)); |
|
188 |
"!>" => (col:=yypos-(!eolpos); T.DEP_PROD(!linep,!col)); |
|
189 |
"?*" => (col:=yypos-(!eolpos); T.DEP_SUM(!linep,!col)); |
|
190 |
||
191 |
":-" => (col:=yypos-(!eolpos); T.LET_TERM(!linep,!col)); |