|
1 (* Title: HOL/TPTP/TPTP_Parser/tptp.lex |
|
2 Author: Nik Sultana, Cambridge University Computer Laboratory |
|
3 |
|
4 Notes: |
|
5 * Omit %full in definitions to restrict alphabet to ascii. |
|
6 * Could include %posarg to ensure that start counting character positions from |
|
7 0, but it would punish performance. |
|
8 * %s AF F COMMENT; -- could improve by making stateful. |
|
9 |
|
10 Acknowledgements: |
|
11 * Geoff Sutcliffe for help with TPTP. |
|
12 * Timothy Bourke for his tips on getting ML-Yacc working with Poly/ML. |
|
13 * An early version of this was ported from the specification shipped with |
|
14 Leo-II, written by Frank Theiss. |
|
15 * Some boilerplate bits were taken from the ml-yacc/ml-lex manual by Roger Price. |
|
16 * Jasmin Blanchette and Makarius Wenzel for help with Isabelle integration. |
|
17 *) |
|
18 |
|
19 structure T = Tokens |
|
20 type pos = int (* Position in file *) |
|
21 type lineNo = int |
|
22 type svalue = T.svalue |
|
23 type ('a,'b) token = ('a,'b) T.token |
|
24 type lexresult = (svalue,pos) token |
|
25 type lexarg = string |
|
26 type arg = lexarg |
|
27 val col = ref 0; |
|
28 val linep = ref 1; (* Line pointer *) |
|
29 val eolpos = ref 0; |
|
30 |
|
31 val badCh : string * string * int * int -> unit = fn |
|
32 (file_name, bad, line, col) => |
|
33 TextIO.output(TextIO.stdOut, file_name ^ "[" |
|
34 ^ Int.toString line ^ "." ^ Int.toString col |
|
35 ^ "] Invalid character \"" ^ bad ^ "\"\n"); |
|
36 |
|
37 val eof = fn file_name => |
|
38 let |
|
39 val result = T.EOF (!linep,!col); |
|
40 val _ = linep := 0; |
|
41 in result end |
|
42 (*here could check whether file ended prematurely: |
|
43 see if have open brackets, or if we're in some state other than INITIAL*) |
|
44 |
|
45 val count_commentlines : string -> unit = fn str => |
|
46 let |
|
47 val str' = String.explode str |
|
48 val newlines = List.filter (fn x => x = #"\n") str' |
|
49 in linep := (!linep) + (List.length newlines) end |
|
50 |
|
51 %% |
|
52 %header (functor TPTPLexFun(structure Tokens: TPTP_TOKENS)); |
|
53 %arg (file_name:string); |
|
54 |
|
55 printable_char = .; |
|
56 viewable_char = [.\n]; |
|
57 numeric = [0-9]; |
|
58 lower_alpha = [a-z]; |
|
59 upper_alpha = [A-Z]; |
|
60 alpha_numeric = ({lower_alpha}|{upper_alpha}|{numeric}|_); |
|
61 zero_numeric = [0]; |
|
62 non_zero_numeric = [1-9]; |
|
63 slash = [/]; |
|
64 exponent = [Ee]; |
|
65 dot = [.]; |
|
66 any_char = [^\n]; |
|
67 dollar = \$; |
|
68 ddollar = \$\$; |
|
69 unsigned_integer = {numeric}+; |
|
70 sign = [+-]; |
|
71 divide = [/]; |
|
72 |
|
73 signed_integer = {sign}{unsigned_integer}; |
|
74 dot_decimal = {dot}{numeric}+; |
|
75 exp_suffix = {exponent}({signed_integer}|{unsigned_integer}); |
|
76 real = ({signed_integer}|{unsigned_integer}){dot_decimal}{exp_suffix}?; |
|
77 upper_word = {upper_alpha}{alpha_numeric}*; |
|
78 rational = ({signed_integer}|{unsigned_integer}){divide}{unsigned_integer}; |
|
79 |
|
80 percentage_sign = "%"; |
|
81 |
|
82 sq_char = ([\040-\041\043-\126]|[\\]['\\]); |
|
83 |
|
84 ws = ([\ ]|[\t]); |
|
85 eol = ("\013\010"|"\010"|"\013"); |
|
86 |
|
87 single_quote = [']; |
|
88 single_quoted = {single_quote}({alpha_numeric}|{sq_char}|{ws})+{single_quote}; |
|
89 |
|
90 lower_word = {lower_alpha}{alpha_numeric}*; |
|
91 atomic_system_word = {ddollar}{lower_word}; |
|
92 atomic_defined_word = {dollar}{lower_word}; |
|
93 |
|
94 system_comment_one = [%][\ ]*{ddollar}[_]*; |
|
95 system_comment_multi = [/][\*][\ ]*(ddollar)([^\*]*[\*][\*]*[^/\*])*[^\*]*[\*][\*]*[/]; |
|
96 system_comment = (system_comment_one)|(system_comment_multi); |
|
97 comment_one = {percentage_sign}[^\n]*; |
|
98 comment_multi = [/][\*]([^\*]*[\*]+[^/\*])*[^\*]*[\*]+[/]; |
|
99 comment = ({comment_one}|{comment_multi})+; |
|
100 |
|
101 do_char = ([^"]|[\\]["\\]); |
|
102 double_quote = ["]; |
|
103 distinct_object = {double_quote}{do_char}+{double_quote}; |
|
104 |
|
105 %% |
|
106 |
|
107 {ws}* => (col:=(!col)+size yytext; continue () ); |
|
108 |
|
109 {eol} => (linep:=(!linep)+1; |
|
110 eolpos:=yypos+size yytext; continue ()); |
|
111 |
|
112 "&" => (col:=yypos-(!eolpos); T.AMPERSAND(!linep,!col)); |
|
113 |
|
114 "@+" => (col:=yypos-(!eolpos); T.INDEF_CHOICE(!linep,!col)); |
|
115 "@-" => (col:=yypos-(!eolpos); T.DEFIN_CHOICE(!linep,!col)); |
|
116 |
|
117 "!!" => (col:=yypos-(!eolpos); T.OPERATOR_FORALL(!linep,!col)); |
|
118 "??" => (col:=yypos-(!eolpos); T.OPERATOR_EXISTS(!linep,!col)); |
|
119 |
|
120 "@" => (col:=yypos-(!eolpos); T.AT_SIGN(!linep,!col)); |
|
121 "^" => (col:=yypos-(!eolpos); T.CARET(!linep,!col)); |
|
122 |
|
123 ":" => (col:=yypos-(!eolpos); T.COLON(!linep,!col)); |
|
124 "," => (col:=yypos-(!eolpos); T.COMMA(!linep,!col)); |
|
125 "=" => (col:=yypos-(!eolpos); T.EQUALS(!linep,!col)); |
|
126 "!" => (col:=yypos-(!eolpos); T.EXCLAMATION(!linep,!col)); |
|
127 ":=" => (col:=yypos-(!eolpos); T.LET(!linep,!col)); |
|
128 ">" => (col:=yypos-(!eolpos); T.ARROW(!linep,!col)); |
|
129 |
|
130 "<=" => (col:=yypos-(!eolpos); T.IF(!linep,!col)); |
|
131 "<=>" => (col:=yypos-(!eolpos); T.IFF(!linep,!col)); |
|
132 "=>" => (col:=yypos-(!eolpos); T.IMPLIES(!linep,!col)); |
|
133 "[" => (col:=yypos-(!eolpos); T.LBRKT(!linep,!col)); |
|
134 "(" => (col:=yypos-(!eolpos); T.LPAREN(!linep,!col)); |
|
135 "->" => (col:=yypos-(!eolpos); T.MAP_TO(!linep,!col)); |
|
136 "--" => (col:=yypos-(!eolpos); T.MMINUS(!linep,!col)); |
|
137 "~&" => (col:=yypos-(!eolpos); T.NAND(!linep,!col)); |
|
138 "!=" => (col:=yypos-(!eolpos); T.NEQUALS(!linep,!col)); |
|
139 "<~>" => (col:=yypos-(!eolpos); T.XOR(!linep,!col)); |
|
140 "~|" => (col:=yypos-(!eolpos); T.NOR(!linep,!col)); |
|
141 "." => (col:=yypos-(!eolpos); T.PERIOD(!linep,!col)); |
|
142 "++" => (col:=yypos-(!eolpos); T.PPLUS(!linep,!col)); |
|
143 "?" => (col:=yypos-(!eolpos); T.QUESTION(!linep,!col)); |
|
144 "]" => (col:=yypos-(!eolpos); T.RBRKT(!linep,!col)); |
|
145 ")" => (col:=yypos-(!eolpos); T.RPAREN(!linep,!col)); |
|
146 "~" => (col:=yypos-(!eolpos); T.TILDE(!linep,!col)); |
|
147 "|" => (col:=yypos-(!eolpos); T.VLINE(!linep,!col)); |
|
148 |
|
149 {distinct_object} => (col:=yypos-(!eolpos); T.DISTINCT_OBJECT(yytext,!linep,!col)); |
|
150 {rational} => (col:=yypos-(!eolpos); T.RATIONAL(yytext,!linep,!col)); |
|
151 {real} => (col:=yypos-(!eolpos); T.REAL(yytext,!linep,!col)); |
|
152 {signed_integer} => (col:=yypos-(!eolpos); T.SIGNED_INTEGER(yytext,!linep,!col)); |
|
153 {unsigned_integer} => (col:=yypos-(!eolpos); T.UNSIGNED_INTEGER(yytext,!linep,!col)); |
|
154 {dot_decimal} => (col:=yypos-(!eolpos); T.DOT_DECIMAL(yytext,!linep,!col)); |
|
155 {single_quoted} => (col:=yypos-(!eolpos); T.SINGLE_QUOTED(yytext,!linep,!col)); |
|
156 {upper_word} => (col:=yypos-(!eolpos); T.UPPER_WORD(yytext,!linep,!col)); |
|
157 {comment} => (col:=yypos-(!eolpos); count_commentlines yytext;T.COMMENT(yytext,!linep,!col)); |
|
158 |
|
159 "thf" => (col:=yypos-(!eolpos); T.THF(!linep,!col)); |
|
160 "fof" => (col:=yypos-(!eolpos); T.FOF(!linep,!col)); |
|
161 "cnf" => (col:=yypos-(!eolpos); T.CNF(!linep,!col)); |
|
162 "tff" => (col:=yypos-(!eolpos); T.TFF(!linep,!col)); |
|
163 "include" => (col:=yypos-(!eolpos); T.INCLUDE(!linep,!col)); |
|
164 |
|
165 "$thf" => (col:=yypos-(!eolpos); T.DTHF(!linep,!col)); |
|
166 "$fof" => (col:=yypos-(!eolpos); T.DFOF(!linep,!col)); |
|
167 "$cnf" => (col:=yypos-(!eolpos); T.DCNF(!linep,!col)); |
|
168 "$fot" => (col:=yypos-(!eolpos); T.DFOT(!linep,!col)); |
|
169 "$tff" => (col:=yypos-(!eolpos); T.DTFF(!linep,!col)); |
|
170 |
|
171 "$ite_f" => (col:=yypos-(!eolpos); T.ITE_F(!linep,!col)); |
|
172 "$ite_t" => (col:=yypos-(!eolpos); T.ITE_T(!linep,!col)); |
|
173 |
|
174 {lower_word} => (col:=yypos-(!eolpos); T.LOWER_WORD(yytext,!linep,!col)); |
|
175 {atomic_system_word} => (col:=yypos-(!eolpos); T.ATOMIC_SYSTEM_WORD(yytext,!linep,!col)); |
|
176 {atomic_defined_word} => (col:=yypos-(!eolpos); T.ATOMIC_DEFINED_WORD(yytext,!linep,!col)); |
|
177 |
|
178 "+" => (col:=yypos-(!eolpos); T.PLUS(!linep,!col)); |
|
179 "*" => (col:=yypos-(!eolpos); T.TIMES(!linep,!col)); |
|
180 "-->" => (col:=yypos-(!eolpos); T.GENTZEN_ARROW(!linep,!col)); |
|
181 "<<" => (col:=yypos-(!eolpos); T.SUBTYPE(!linep,!col)); |
|
182 "!>" => (col:=yypos-(!eolpos); T.DEP_PROD(!linep,!col)); |
|
183 "?*" => (col:=yypos-(!eolpos); T.DEP_SUM(!linep,!col)); |
|
184 |
|
185 ":-" => (col:=yypos-(!eolpos); T.LET_TERM(!linep,!col)); |