| author | wenzelm | 
| Fri, 03 Mar 2023 13:50:54 +0100 | |
| changeset 77495 | c546e3e1f7f6 | 
| parent 53384 | 63034189f9ec | 
| permissions | -rw-r--r-- | 
| 46844 | 1 | (* Title: HOL/TPTP/TPTP_Parser/tptp_problem_name.ML | 
| 2 | Author: Nik Sultana, Cambridge University Computer Laboratory | |
| 3 | ||
| 4 | Scans a TPTP problem name. Naming convention is described | |
| 5 | http://www.cs.miami.edu/~tptp/TPTP/TR/TPTPTR.shtml#Problem and Axiomatization Naming | |
| 6 | *) | |
| 7 | ||
| 8 | signature TPTP_PROBLEM_NAME = | |
| 9 | sig | |
| 10 | datatype suffix = | |
| 11 | Problem of | |
| 12 | ((*version*)int * | |
| 13 | (*size parameter*)int option) * | |
| 14 | (*extension*)string | |
| 15 | | Axiom of | |
| 16 | (*specialisation*)int * | |
| 17 | (*extension*)string | |
| 18 | ||
| 19 | type tptp_problem_name = | |
| 20 |     {prob_domain : string,
 | |
| 21 | prob_number : int, | |
| 22 | prob_form : TPTP_Syntax.language, | |
| 23 | suffix : suffix} | |
| 24 | ||
| 25 | datatype problem_name = | |
| 26 | Standard of tptp_problem_name | |
| 27 | | Nonstandard of string | |
| 28 | ||
| 29 | exception TPTP_PROBLEM_NAME of string | |
| 30 | ||
| 31 | val parse_problem_name : string -> problem_name | |
| 32 | val mangle_problem_name : problem_name -> string | |
| 33 | end | |
| 34 | ||
| 35 | structure TPTP_Problem_Name: TPTP_PROBLEM_NAME = | |
| 36 | struct | |
| 37 | ||
| 38 | (*some basic tokens*) | |
| 39 | val numerics = map Char.chr (48 upto 57) (*0..9*) | |
| 40 | val alphabetics = | |
| 41 | map Char.chr (65 upto 90) @ (*A..Z*) | |
| 42 | map Char.chr (97 upto 122) (*a..z*) | |
| 43 | (*TPTP formula forms*) | |
| 44 | val forms = [#"^", #"_", #"=", #"+", #"-"] | |
| 45 | ||
| 46 | (*lift a list of characters into a scanner combinator matching any one of the | |
| 47 | characters in that list.*) | |
| 48 | fun lift l = | |
| 49 | (map (Char.toString #> ($$)) l, Scan.fail) | |
| 50 | |-> fold (fn x => fn y => x || y) | |
| 51 | ||
| 52 | (*combinators for parsing letters and numbers*) | |
| 53 | val alpha = lift alphabetics | |
| 54 | val numer = lift numerics | |
| 55 | ||
| 56 | datatype suffix = | |
| 57 | Problem of | |
| 58 | ((*version*)int * | |
| 47520 | 59 | (*size parameter*)int option) * | 
| 46844 | 60 | (*extension*)string | 
| 61 | | Axiom of | |
| 62 | (*specialisation*)int * | |
| 63 | (*extension*)string | |
| 64 | ||
| 65 | val to_int = Int.fromString #> the | |
| 66 | val rm_ending = Scan.this_string "rm" | |
| 67 | val ax_ending = | |
| 68 | ((numer >> to_int) --| | |
| 69 | $$ "." -- (Scan.this_string "eq" || Scan.this_string "ax" || rm_ending)) | |
| 70 | >> Axiom | |
| 71 | val prob_ending = $$ "p" || $$ "g" || rm_ending | |
| 72 | val prob_suffix = | |
| 73 | ((numer >> to_int) -- | |
| 74 | Scan.option ($$ "." |-- numer ^^ numer ^^ numer >> to_int) --| $$ "." | |
| 75 | -- prob_ending) | |
| 76 | >> Problem | |
| 77 | ||
| 78 | type tptp_problem_name = | |
| 79 |   {prob_domain : string,
 | |
| 80 | prob_number : int, | |
| 81 | prob_form : TPTP_Syntax.language, | |
| 82 | suffix : suffix} | |
| 83 | ||
| 84 | datatype problem_name = | |
| 85 | Standard of tptp_problem_name | |
| 86 | | Nonstandard of string | |
| 87 | ||
| 88 | exception TPTP_PROBLEM_NAME of string | |
| 89 | ||
| 90 | fun parse_problem_name str' : problem_name = | |
| 91 | let | |
| 92 | val str = Symbol.explode str' | |
| 93 | (*NOTE there's an ambiguity in the spec: there's no way of knowing if a | |
| 94 | file ending in "rm" used to be "ax" or "p". Here we default to "p".*) | |
| 47527 
b0a7d235b23b
tidied exception-handling relating to tptp problem names;
 sultana parents: 
47520diff
changeset | 95 | val (parsed_name, rest) = | 
| 47520 | 96 | Scan.finite Symbol.stopper | 
| 47527 
b0a7d235b23b
tidied exception-handling relating to tptp problem names;
 sultana parents: 
47520diff
changeset | 97 | (((alpha ^^ alpha ^^ alpha) -- | 
| 
b0a7d235b23b
tidied exception-handling relating to tptp problem names;
 sultana parents: 
47520diff
changeset | 98 | (numer ^^ numer ^^ numer >> to_int) -- | 
| 
b0a7d235b23b
tidied exception-handling relating to tptp problem names;
 sultana parents: 
47520diff
changeset | 99 | lift forms -- (prob_suffix || ax_ending)) >> SOME | 
| 
b0a7d235b23b
tidied exception-handling relating to tptp problem names;
 sultana parents: 
47520diff
changeset | 100 | || Scan.succeed NONE) str | 
| 46844 | 101 | |
| 102 | fun parse_form str = | |
| 103 | case str of | |
| 104 | "^" => TPTP_Syntax.THF | |
| 105 | | "_" => TPTP_Syntax.TFF | |
| 106 | | "=" => TPTP_Syntax.TFF_with_arithmetic | |
| 107 | | "+" => TPTP_Syntax.FOF | |
| 108 | | "-" => TPTP_Syntax.CNF | |
| 109 |       | _ => raise TPTP_PROBLEM_NAME ("Unknown TPTP form: " ^ str)
 | |
| 110 | in | |
| 47527 
b0a7d235b23b
tidied exception-handling relating to tptp problem names;
 sultana parents: 
47520diff
changeset | 111 | if not (null rest) orelse is_none parsed_name then Nonstandard str' | 
| 
b0a7d235b23b
tidied exception-handling relating to tptp problem names;
 sultana parents: 
47520diff
changeset | 112 | else | 
| 
b0a7d235b23b
tidied exception-handling relating to tptp problem names;
 sultana parents: 
47520diff
changeset | 113 | let | 
| 
b0a7d235b23b
tidied exception-handling relating to tptp problem names;
 sultana parents: 
47520diff
changeset | 114 | val (((prob_domain, prob_number), prob_form), suffix) = | 
| 
b0a7d235b23b
tidied exception-handling relating to tptp problem names;
 sultana parents: 
47520diff
changeset | 115 | the parsed_name | 
| 
b0a7d235b23b
tidied exception-handling relating to tptp problem names;
 sultana parents: 
47520diff
changeset | 116 | in | 
| 
b0a7d235b23b
tidied exception-handling relating to tptp problem names;
 sultana parents: 
47520diff
changeset | 117 | Standard | 
| 
b0a7d235b23b
tidied exception-handling relating to tptp problem names;
 sultana parents: 
47520diff
changeset | 118 |           {prob_domain = prob_domain,
 | 
| 
b0a7d235b23b
tidied exception-handling relating to tptp problem names;
 sultana parents: 
47520diff
changeset | 119 | prob_number = prob_number, | 
| 
b0a7d235b23b
tidied exception-handling relating to tptp problem names;
 sultana parents: 
47520diff
changeset | 120 | prob_form = parse_form prob_form, | 
| 
b0a7d235b23b
tidied exception-handling relating to tptp problem names;
 sultana parents: 
47520diff
changeset | 121 | suffix = suffix} | 
| 
b0a7d235b23b
tidied exception-handling relating to tptp problem names;
 sultana parents: 
47520diff
changeset | 122 | end | 
| 46844 | 123 | end | 
| 124 | ||
| 53384 | 125 | (*Restricts the character set used in a string*) | 
| 126 | fun restricted_ascii only_exclude = | |
| 127 | let | |
| 128 | fun restrict x = "_" ^ Int.toString (Char.ord x) | |
| 129 | fun restrict_uniform x = | |
| 130 | if member (op =) (numerics @ alphabetics) x then Char.toString x else restrict x | |
| 131 | fun restrict_specific x = | |
| 132 | if member (op =) only_exclude x then restrict x else Char.toString x | |
| 133 | val restrict_ascii = | |
| 134 | if List.null only_exclude | |
| 135 | then map restrict_uniform | |
| 136 | else map restrict_specific | |
| 137 | in String.explode #> restrict_ascii #> implode end; | |
| 138 | ||
| 46844 | 139 | (*Produces an ASCII encoding of a TPTP problem-file name.*) | 
| 140 | fun mangle_problem_name (prob : problem_name) : string = | |
| 141 | case prob of | |
| 47520 | 142 | Standard tptp_prob => | 
| 143 | let | |
| 144 | val prob_form = | |
| 145 | case #prob_form tptp_prob of | |
| 146 | TPTP_Syntax.THF => "_thf_" | |
| 147 | | TPTP_Syntax.TFF => "_tff_" | |
| 148 | | TPTP_Syntax.TFF_with_arithmetic => "_thfwa_" | |
| 149 | | TPTP_Syntax.FOF => "_fof_" | |
| 150 | | TPTP_Syntax.CNF => "_cnf_" | |
| 151 | val suffix = | |
| 152 | case #suffix tptp_prob of | |
| 153 | Problem ((version, size), extension) => | |
| 154 | Int.toString version ^ "_" ^ | |
| 155 | (if is_some size then Int.toString (the size) ^ "_" else "") ^ | |
| 156 | extension | |
| 157 | | Axiom (specialisation, extension) => | |
| 158 | Int.toString specialisation ^ "_" ^ extension | |
| 159 | in | |
| 160 | #prob_domain tptp_prob ^ | |
| 161 | Int.toString (#prob_number tptp_prob) ^ | |
| 162 | prob_form ^ | |
| 163 | suffix | |
| 164 | end | |
| 53384 | 165 | | Nonstandard str => restricted_ascii [] str | 
| 46844 | 166 | |
| 167 | end |