src/HOL/TPTP/TPTP_Parser/tptp.lex
changeset 46844 5d9aab0c609c
child 47357 15e579392a68
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/TPTP/TPTP_Parser/tptp.lex	Fri Mar 09 15:38:55 2012 +0000
@@ -0,0 +1,185 @@
+(*  Title:      HOL/TPTP/TPTP_Parser/tptp.lex
+    Author:     Nik Sultana, Cambridge University Computer Laboratory
+
+ Notes:
+ * Omit %full in definitions to restrict alphabet to ascii.
+ * Could include %posarg to ensure that start counting character positions from
+   0, but it would punish performance.
+ * %s AF F COMMENT; -- could improve by making stateful.
+
+ Acknowledgements:
+ * Geoff Sutcliffe for help with TPTP.
+ * Timothy Bourke for his tips on getting ML-Yacc working with Poly/ML.
+ * An early version of this was ported from the specification shipped with
+   Leo-II, written by Frank Theiss.
+ * Some boilerplate bits were taken from the ml-yacc/ml-lex manual by Roger Price.
+ * Jasmin Blanchette and Makarius Wenzel for help with Isabelle integration.
+*)
+
+structure T = Tokens
+type pos = int             (* Position in file *)
+type lineNo = int
+type svalue = T.svalue
+type ('a,'b) token = ('a,'b) T.token
+type lexresult = (svalue,pos) token
+type lexarg = string
+type arg = lexarg
+val col = ref 0;
+val linep = ref 1;         (* Line pointer *)
+val eolpos = ref 0;
+
+val badCh : string * string * int * int -> unit = fn
+    (file_name, bad, line, col) =>
+    TextIO.output(TextIO.stdOut, file_name ^ "["
+          ^ Int.toString line ^ "." ^ Int.toString col
+          ^ "] Invalid character \"" ^ bad ^ "\"\n");
+
+val eof = fn file_name =>
+  let
+    val result = T.EOF (!linep,!col);
+    val _ = linep := 0;
+  in result end
+(*here could check whether file ended prematurely:
+ see if have open brackets, or if we're in some state other than INITIAL*)
+
+val count_commentlines : string -> unit = fn str =>
+  let
+    val str' = String.explode str
+    val newlines = List.filter (fn x => x = #"\n") str'
+  in linep := (!linep) + (List.length newlines) end
+
+%%
+%header (functor TPTPLexFun(structure Tokens: TPTP_TOKENS));
+%arg (file_name:string);
+
+printable_char            = .;
+viewable_char             = [.\n];
+numeric                   = [0-9];
+lower_alpha               = [a-z];
+upper_alpha               = [A-Z];
+alpha_numeric             = ({lower_alpha}|{upper_alpha}|{numeric}|_);
+zero_numeric              = [0];
+non_zero_numeric          = [1-9];
+slash                     = [/];
+exponent                  = [Ee];
+dot                       = [.];
+any_char                  = [^\n];
+dollar                    = \$;
+ddollar                   = \$\$;
+unsigned_integer          = {numeric}+;
+sign                      = [+-];
+divide                    = [/];
+
+signed_integer            = {sign}{unsigned_integer};
+dot_decimal               = {dot}{numeric}+;
+exp_suffix                = {exponent}({signed_integer}|{unsigned_integer});
+real                      = ({signed_integer}|{unsigned_integer}){dot_decimal}{exp_suffix}?;
+upper_word                = {upper_alpha}{alpha_numeric}*;
+rational                  = ({signed_integer}|{unsigned_integer}){divide}{unsigned_integer};
+
+percentage_sign           = "%";
+
+sq_char                   = ([\040-\041\043-\126]|[\\]['\\]);
+
+ws                        = ([\ ]|[\t]);
+eol                       = ("\013\010"|"\010"|"\013");
+
+single_quote              = ['];
+single_quoted             = {single_quote}({alpha_numeric}|{sq_char}|{ws})+{single_quote};
+
+lower_word                = {lower_alpha}{alpha_numeric}*;
+atomic_system_word        = {ddollar}{lower_word};
+atomic_defined_word       = {dollar}{lower_word};
+
+system_comment_one        = [%][\ ]*{ddollar}[_]*;
+system_comment_multi      = [/][\*][\ ]*(ddollar)([^\*]*[\*][\*]*[^/\*])*[^\*]*[\*][\*]*[/];
+system_comment            = (system_comment_one)|(system_comment_multi);
+comment_one               = {percentage_sign}[^\n]*;
+comment_multi             = [/][\*]([^\*]*[\*]+[^/\*])*[^\*]*[\*]+[/];
+comment                   = ({comment_one}|{comment_multi})+;
+
+do_char                   = ([^"]|[\\]["\\]);
+double_quote              = ["];
+distinct_object           = {double_quote}{do_char}+{double_quote};
+
+%%
+
+{ws}*           => (col:=(!col)+size yytext; continue () );
+
+{eol}           => (linep:=(!linep)+1;
+                   eolpos:=yypos+size yytext; continue ());
+
+"&"             => (col:=yypos-(!eolpos); T.AMPERSAND(!linep,!col));
+
+"@+"            => (col:=yypos-(!eolpos); T.INDEF_CHOICE(!linep,!col));
+"@-"            => (col:=yypos-(!eolpos); T.DEFIN_CHOICE(!linep,!col));
+
+"!!"            => (col:=yypos-(!eolpos); T.OPERATOR_FORALL(!linep,!col));
+"??"            => (col:=yypos-(!eolpos); T.OPERATOR_EXISTS(!linep,!col));
+
+"@"             => (col:=yypos-(!eolpos); T.AT_SIGN(!linep,!col));
+"^"             => (col:=yypos-(!eolpos); T.CARET(!linep,!col));
+
+":"             => (col:=yypos-(!eolpos); T.COLON(!linep,!col));
+","             => (col:=yypos-(!eolpos); T.COMMA(!linep,!col));
+"="             => (col:=yypos-(!eolpos); T.EQUALS(!linep,!col));
+"!"             => (col:=yypos-(!eolpos); T.EXCLAMATION(!linep,!col));
+":="            => (col:=yypos-(!eolpos); T.LET(!linep,!col));
+">"             => (col:=yypos-(!eolpos); T.ARROW(!linep,!col));
+
+"<="            => (col:=yypos-(!eolpos); T.IF(!linep,!col));
+"<=>"           => (col:=yypos-(!eolpos); T.IFF(!linep,!col));
+"=>"            => (col:=yypos-(!eolpos); T.IMPLIES(!linep,!col));
+"["             => (col:=yypos-(!eolpos); T.LBRKT(!linep,!col));
+"("             => (col:=yypos-(!eolpos); T.LPAREN(!linep,!col));
+"->"            => (col:=yypos-(!eolpos); T.MAP_TO(!linep,!col));
+"--"            => (col:=yypos-(!eolpos); T.MMINUS(!linep,!col));
+"~&"            => (col:=yypos-(!eolpos); T.NAND(!linep,!col));
+"!="            => (col:=yypos-(!eolpos); T.NEQUALS(!linep,!col));
+"<~>"           => (col:=yypos-(!eolpos); T.XOR(!linep,!col));
+"~|"            => (col:=yypos-(!eolpos); T.NOR(!linep,!col));
+"."             => (col:=yypos-(!eolpos); T.PERIOD(!linep,!col));
+"++"            => (col:=yypos-(!eolpos); T.PPLUS(!linep,!col));
+"?"             => (col:=yypos-(!eolpos); T.QUESTION(!linep,!col));
+"]"             => (col:=yypos-(!eolpos); T.RBRKT(!linep,!col));
+")"             => (col:=yypos-(!eolpos); T.RPAREN(!linep,!col));
+"~"             => (col:=yypos-(!eolpos); T.TILDE(!linep,!col));
+"|"             => (col:=yypos-(!eolpos); T.VLINE(!linep,!col));
+
+{distinct_object}    => (col:=yypos-(!eolpos); T.DISTINCT_OBJECT(yytext,!linep,!col));
+{rational}           => (col:=yypos-(!eolpos); T.RATIONAL(yytext,!linep,!col));
+{real}               => (col:=yypos-(!eolpos); T.REAL(yytext,!linep,!col));
+{signed_integer}     => (col:=yypos-(!eolpos); T.SIGNED_INTEGER(yytext,!linep,!col));
+{unsigned_integer}   => (col:=yypos-(!eolpos); T.UNSIGNED_INTEGER(yytext,!linep,!col));
+{dot_decimal}        => (col:=yypos-(!eolpos); T.DOT_DECIMAL(yytext,!linep,!col));
+{single_quoted}      => (col:=yypos-(!eolpos); T.SINGLE_QUOTED(yytext,!linep,!col));
+{upper_word}         => (col:=yypos-(!eolpos); T.UPPER_WORD(yytext,!linep,!col));
+{comment}            => (col:=yypos-(!eolpos); count_commentlines yytext;T.COMMENT(yytext,!linep,!col));
+
+"thf"          => (col:=yypos-(!eolpos); T.THF(!linep,!col));
+"fof"          => (col:=yypos-(!eolpos); T.FOF(!linep,!col));
+"cnf"          => (col:=yypos-(!eolpos); T.CNF(!linep,!col));
+"tff"          => (col:=yypos-(!eolpos); T.TFF(!linep,!col));
+"include"      => (col:=yypos-(!eolpos); T.INCLUDE(!linep,!col));
+
+"$thf"          => (col:=yypos-(!eolpos); T.DTHF(!linep,!col));
+"$fof"          => (col:=yypos-(!eolpos); T.DFOF(!linep,!col));
+"$cnf"          => (col:=yypos-(!eolpos); T.DCNF(!linep,!col));
+"$fot"          => (col:=yypos-(!eolpos); T.DFOT(!linep,!col));
+"$tff"          => (col:=yypos-(!eolpos); T.DTFF(!linep,!col));
+
+"$ite_f"        => (col:=yypos-(!eolpos); T.ITE_F(!linep,!col));
+"$ite_t"        => (col:=yypos-(!eolpos); T.ITE_T(!linep,!col));
+
+{lower_word}          => (col:=yypos-(!eolpos); T.LOWER_WORD(yytext,!linep,!col));
+{atomic_system_word}  => (col:=yypos-(!eolpos); T.ATOMIC_SYSTEM_WORD(yytext,!linep,!col));
+{atomic_defined_word} => (col:=yypos-(!eolpos); T.ATOMIC_DEFINED_WORD(yytext,!linep,!col));
+
+"+"           => (col:=yypos-(!eolpos); T.PLUS(!linep,!col));
+"*"           => (col:=yypos-(!eolpos); T.TIMES(!linep,!col));
+"-->"         => (col:=yypos-(!eolpos); T.GENTZEN_ARROW(!linep,!col));
+"<<"          => (col:=yypos-(!eolpos); T.SUBTYPE(!linep,!col));
+"!>"          => (col:=yypos-(!eolpos); T.DEP_PROD(!linep,!col));
+"?*"          => (col:=yypos-(!eolpos); T.DEP_SUM(!linep,!col));
+
+":-"          => (col:=yypos-(!eolpos); T.LET_TERM(!linep,!col));