author | blanchet |
Wed, 12 Dec 2012 00:14:58 +0100 | |
changeset 50482 | d7be7ccf428b |
parent 50441 | 1e71f9d3cd57 |
child 50619 | b958a94cf811 |
permissions | -rwxr-xr-x |
50220 | 1 |
#!/usr/bin/python |
50222 | 2 |
# Title: HOL/Tools/Sledgehammer/MaSh/src/mash.py |
3 |
# Author: Daniel Kuehlwein, ICIS, Radboud University Nijmegen |
|
4 |
# Copyright 2012 |
|
5 |
# |
|
6 |
# Entry point for MaSh (Machine Learning for Sledgehammer). |
|
7 |
||
50220 | 8 |
''' |
9 |
MaSh - Machine Learning for Sledgehammer |
|
10 |
||
11 |
MaSh allows to use different machine learning algorithms to predict relevant fact for Sledgehammer. |
|
12 |
||
13 |
Created on July 12, 2012 |
|
14 |
||
15 |
@author: Daniel Kuehlwein |
|
16 |
''' |
|
17 |
||
18 |
import logging,datetime,string,os,sys |
|
19 |
from argparse import ArgumentParser,RawDescriptionHelpFormatter |
|
20 |
from time import time |
|
21 |
from stats import Statistics |
|
22 |
from dictionaries import Dictionaries |
|
50399 | 23 |
#from fullNaiveBayes import NBClassifier |
50482 | 24 |
from sparseNaiveBayes import sparseNBClassifier |
25 |
#from naiveBayes import sparseNBClassifier |
|
50220 | 26 |
from snow import SNoW |
27 |
from predefined import Predefined |
|
28 |
||
29 |
# Set up command-line parser |
|
30 |
parser = ArgumentParser(description='MaSh - Machine Learning for Sledgehammer. \n\n\ |
|
31 |
MaSh allows to use different machine learning algorithms to predict relevant facts for Sledgehammer.\n\n\ |
|
32 |
--------------- Example Usage ---------------\n\ |
|
50434
960a3429615c
more MaSh tweaking -- in particular, export the same facts in "MaSh_Export" as are later tried in "MaSh_Eval"
blanchet
parents:
50399
diff
changeset
|
33 |
First initialize:\n./mash.py -l test.log -o ../tmp/ --init --inputDir ../data/Jinja/ \n\ |
960a3429615c
more MaSh tweaking -- in particular, export the same facts in "MaSh_Export" as are later tried in "MaSh_Eval"
blanchet
parents:
50399
diff
changeset
|
34 |
Then create predictions:\n./mash.py -i ../data/Jinja/mash_commands -p ../data/Jinja/mash_suggestions -l test.log -o ../tmp/ --statistics\n\ |
50220 | 35 |
\n\n\ |
36 |
Author: Daniel Kuehlwein, July 2012',formatter_class=RawDescriptionHelpFormatter) |
|
37 |
parser.add_argument('-i','--inputFile',help='File containing all problems to be solved.') |
|
38 |
parser.add_argument('-o','--outputDir', default='../tmp/',help='Directory where all created files are stored. Default=../tmp/.') |
|
50388 | 39 |
parser.add_argument('-p','--predictions',default='../tmp/%s.predictions' % datetime.datetime.now(), |
50220 | 40 |
help='File where the predictions stored. Default=../tmp/dateTime.predictions.') |
41 |
parser.add_argument('--numberOfPredictions',default=200,help="Number of premises to write in the output. Default=200.",type=int) |
|
42 |
||
43 |
parser.add_argument('--init',default=False,action='store_true',help="Initialize Mash. Requires --inputDir to be defined. Default=False.") |
|
50434
960a3429615c
more MaSh tweaking -- in particular, export the same facts in "MaSh_Export" as are later tried in "MaSh_Eval"
blanchet
parents:
50399
diff
changeset
|
44 |
parser.add_argument('--inputDir',default='../data/Jinja/',\ |
50220 | 45 |
help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility') |
46 |
parser.add_argument('--depFile', default='mash_dependencies', |
|
47 |
help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies') |
|
48 |
parser.add_argument('--saveModel',default=False,action='store_true',help="Stores the learned Model at the end of a prediction run. Default=False.") |
|
49 |
||
50 |
parser.add_argument('--nb',default=False,action='store_true',help="Use Naive Bayes for learning. This is the default learning method.") |
|
51 |
parser.add_argument('--snow',default=False,action='store_true',help="Use SNoW's naive bayes instead of Naive Bayes for learning.") |
|
52 |
parser.add_argument('--predef',default=False,action='store_true',\ |
|
50399 | 53 |
help="Use predefined predictions. Used only for comparison with the actual learning. Expects mash_mepo_suggestions in inputDir.") |
50220 | 54 |
parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\ |
55 |
WARNING: This will make the program a lot slower! Default=False.") |
|
56 |
parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.") |
|
57 |
parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int) |
|
58 |
parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log') |
|
59 |
parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.") |
|
60 |
||
50388 | 61 |
def main(argv = sys.argv[1:]): |
50220 | 62 |
# Initializing command-line arguments |
63 |
args = parser.parse_args(argv) |
|
64 |
||
50388 | 65 |
# Set up logging |
50220 | 66 |
logging.basicConfig(level=logging.DEBUG, |
67 |
format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', |
|
68 |
datefmt='%d-%m %H:%M:%S', |
|
69 |
filename=args.log, |
|
70 |
filemode='w') |
|
71 |
console = logging.StreamHandler(sys.stdout) |
|
72 |
console.setLevel(logging.INFO) |
|
73 |
formatter = logging.Formatter('# %(message)s') |
|
74 |
console.setFormatter(formatter) |
|
75 |
logging.getLogger('').addHandler(console) |
|
76 |
logger = logging.getLogger('main.py') |
|
77 |
if args.quiet: |
|
78 |
logger.setLevel(logging.WARNING) |
|
79 |
console.setLevel(logging.WARNING) |
|
80 |
if not os.path.exists(args.outputDir): |
|
81 |
os.makedirs(args.outputDir) |
|
82 |
||
83 |
logger.info('Using the following settings: %s',args) |
|
84 |
# Pick algorithm |
|
85 |
if args.nb: |
|
50441
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
86 |
logger.info('Using sparse Naive Bayes for learning.') |
50482 | 87 |
model = sparseNBClassifier() |
50220 | 88 |
modelFile = os.path.join(args.outputDir,'NB.pickle') |
89 |
elif args.snow: |
|
90 |
logger.info('Using naive bayes (SNoW) for learning.') |
|
91 |
model = SNoW() |
|
92 |
modelFile = os.path.join(args.outputDir,'SNoW.pickle') |
|
93 |
elif args.predef: |
|
94 |
logger.info('Using predefined predictions.') |
|
50399 | 95 |
#predictionFile = os.path.join(args.inputDir,'mash_meng_paulson_suggestions') |
96 |
predictionFile = os.path.join(args.inputDir,'mash_mepo_suggestions') |
|
50220 | 97 |
model = Predefined(predictionFile) |
50399 | 98 |
modelFile = os.path.join(args.outputDir,'mepo.pickle') |
50220 | 99 |
else: |
50441
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
100 |
logger.info('No algorithm specified. Using sparse Naive Bayes.') |
50482 | 101 |
model = sparseNBClassifier() |
50388 | 102 |
modelFile = os.path.join(args.outputDir,'NB.pickle') |
103 |
dictsFile = os.path.join(args.outputDir,'dicts.pickle') |
|
104 |
||
50220 | 105 |
# Initializing model |
50388 | 106 |
if args.init: |
50220 | 107 |
logger.info('Initializing Model.') |
108 |
startTime = time() |
|
50388 | 109 |
|
110 |
# Load all data |
|
50220 | 111 |
dicts = Dictionaries() |
112 |
dicts.init_all(args.inputDir,depFileName=args.depFile) |
|
50388 | 113 |
|
50220 | 114 |
# Create Model |
115 |
trainData = dicts.featureDict.keys() |
|
116 |
if args.predef: |
|
50441
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
117 |
model.initializeModel(trainData,dicts) |
50220 | 118 |
else: |
119 |
model.initializeModel(trainData,dicts) |
|
50388 | 120 |
|
50220 | 121 |
model.save(modelFile) |
122 |
dicts.save(dictsFile) |
|
123 |
||
124 |
logger.info('All Done. %s seconds needed.',round(time()-startTime,2)) |
|
125 |
return 0 |
|
50388 | 126 |
# Create predictions and/or update model |
50220 | 127 |
else: |
50399 | 128 |
lineCounter = 1 |
129 |
statementCounter = 1 |
|
130 |
computeStats = False |
|
50220 | 131 |
dicts = Dictionaries() |
132 |
# Load Files |
|
133 |
if os.path.isfile(dictsFile): |
|
134 |
dicts.load(dictsFile) |
|
135 |
if os.path.isfile(modelFile): |
|
136 |
model.load(modelFile) |
|
50388 | 137 |
|
50220 | 138 |
# IO Streams |
50441
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
139 |
OS = open(args.predictions,'w') |
50220 | 140 |
IS = open(args.inputFile,'r') |
50388 | 141 |
|
50220 | 142 |
# Statistics |
143 |
if args.statistics: |
|
144 |
stats = Statistics(args.cutOff) |
|
50388 | 145 |
|
50220 | 146 |
predictions = None |
147 |
#Reading Input File |
|
148 |
for line in IS: |
|
50399 | 149 |
# try: |
50220 | 150 |
if True: |
151 |
if line.startswith('!'): |
|
50441
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
152 |
problemId = dicts.parse_fact(line) |
50220 | 153 |
# Statistics |
50399 | 154 |
if args.statistics and computeStats: |
155 |
computeStats = False |
|
50220 | 156 |
acc = dicts.accessibleDict[problemId] |
157 |
if args.predef: |
|
50399 | 158 |
predictions = model.predict(problemId) |
50220 | 159 |
else: |
50441
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
160 |
if args.snow: |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
161 |
predictions,_predictionsValues = model.predict(dicts.featureDict[problemId],dicts.expand_accessibles(acc),dicts) |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
162 |
else: |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
163 |
predictions,_predictionsValues = model.predict(dicts.featureDict[problemId],dicts.expand_accessibles(acc)) |
50399 | 164 |
stats.update(predictions,dicts.dependenciesDict[problemId],statementCounter) |
50220 | 165 |
if not stats.badPreds == []: |
166 |
bp = string.join([str(dicts.idNameDict[x]) for x in stats.badPreds], ',') |
|
50388 | 167 |
logger.debug('Bad predictions: %s',bp) |
50399 | 168 |
statementCounter += 1 |
50220 | 169 |
# Update Dependencies, p proves p |
170 |
dicts.dependenciesDict[problemId] = [problemId]+dicts.dependenciesDict[problemId] |
|
50441
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
171 |
if args.snow: |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
172 |
model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts) |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
173 |
else: |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
174 |
model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId]) |
50220 | 175 |
elif line.startswith('p'): |
176 |
# Overwrite old proof. |
|
177 |
problemId,newDependencies = dicts.parse_overwrite(line) |
|
178 |
newDependencies = [problemId]+newDependencies |
|
179 |
model.overwrite(problemId,newDependencies,dicts) |
|
180 |
dicts.dependenciesDict[problemId] = newDependencies |
|
50399 | 181 |
elif line.startswith('?'): |
50220 | 182 |
startTime = time() |
50399 | 183 |
computeStats = True |
50220 | 184 |
if args.predef: |
185 |
continue |
|
186 |
name,features,accessibles = dicts.parse_problem(line) |
|
187 |
# Create predictions |
|
50388 | 188 |
logger.info('Starting computation for problem on line %s',lineCounter) |
50441
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
189 |
if args.snow: |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
190 |
predictions,predictionValues = model.predict(features,accessibles,dicts) |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
191 |
else: |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
192 |
predictions,predictionValues = model.predict(features,accessibles) |
50220 | 193 |
assert len(predictions) == len(predictionValues) |
194 |
logger.info('Done. %s seconds needed.',round(time()-startTime,2)) |
|
50399 | 195 |
# Output |
50220 | 196 |
predictionNames = [str(dicts.idNameDict[p]) for p in predictions[:args.numberOfPredictions]] |
50388 | 197 |
predictionValues = [str(x) for x in predictionValues[:args.numberOfPredictions]] |
198 |
predictionsStringList = ['%s=%s' % (predictionNames[i],predictionValues[i]) for i in range(len(predictionNames))] |
|
50220 | 199 |
predictionsString = string.join(predictionsStringList,' ') |
200 |
outString = '%s: %s' % (name,predictionsString) |
|
201 |
OS.write('%s\n' % outString) |
|
202 |
else: |
|
203 |
logger.warning('Unspecified input format: \n%s',line) |
|
204 |
sys.exit(-1) |
|
50399 | 205 |
lineCounter += 1 |
50220 | 206 |
""" |
207 |
except: |
|
208 |
logger.warning('An error occurred on line %s .',line) |
|
209 |
lineCounter += 1 |
|
210 |
continue |
|
50388 | 211 |
""" |
50220 | 212 |
OS.close() |
213 |
IS.close() |
|
50388 | 214 |
|
50220 | 215 |
# Statistics |
216 |
if args.statistics: |
|
217 |
stats.printAvg() |
|
50388 | 218 |
|
50220 | 219 |
# Save |
220 |
if args.saveModel: |
|
221 |
model.save(modelFile) |
|
222 |
dicts.save(dictsFile) |
|
223 |
if not args.saveStats == None: |
|
224 |
statsFile = os.path.join(args.outputDir,args.saveStats) |
|
225 |
stats.save(statsFile) |
|
226 |
return 0 |
|
227 |
||
228 |
if __name__ == '__main__': |
|
229 |
# Example: |
|
50434
960a3429615c
more MaSh tweaking -- in particular, export the same facts in "MaSh_Export" as are later tried in "MaSh_Eval"
blanchet
parents:
50399
diff
changeset
|
230 |
# Jinja |
960a3429615c
more MaSh tweaking -- in particular, export the same facts in "MaSh_Export" as are later tried in "MaSh_Eval"
blanchet
parents:
50399
diff
changeset
|
231 |
#args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--predef'] |
960a3429615c
more MaSh tweaking -- in particular, export the same facts in "MaSh_Export" as are later tried in "MaSh_Eval"
blanchet
parents:
50399
diff
changeset
|
232 |
#args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testIsabelle.pred','-l','testIsabelle.log','--predef','-o','../tmp/','--statistics','--saveStats','../tmp/natATPMP.stats'] |
960a3429615c
more MaSh tweaking -- in particular, export the same facts in "MaSh_Export" as are later tried in "MaSh_Eval"
blanchet
parents:
50399
diff
changeset
|
233 |
#args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/'] |
960a3429615c
more MaSh tweaking -- in particular, export the same facts in "MaSh_Export" as are later tried in "MaSh_Eval"
blanchet
parents:
50399
diff
changeset
|
234 |
#args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/natATPNB.stats','--cutOff','500'] |
50399 | 235 |
# List |
50220 | 236 |
#args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/List/','--isabelle'] |
237 |
#args = ['-i', '../data/List/mash_commands','-p','../tmp/testIsabelle.pred','-l','testIsabelle.log','--isabelle','-o','../tmp/','--statistics'] |
|
50399 | 238 |
# Huffmann |
239 |
#args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Huffman/','--depFile','mash_atp_dependencies'] |
|
240 |
#args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Huffman/'] |
|
241 |
#args = ['-i', '../data/Huffman/mash_commands','-p','../tmp/testNB.pred','-l','testNB.log','--nb','-o','../tmp/','--statistics'] |
|
242 |
# Jinja |
|
50441
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
243 |
# ISAR |
50399 | 244 |
#args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/'] |
245 |
#args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500'] |
|
50441
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
246 |
#args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--predef'] |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
247 |
#args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats'] |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
248 |
#args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--depFile','mash_atp_dependencies','--snow'] |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
249 |
#args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies'] |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
250 |
|
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
251 |
# ATP |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
252 |
#args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--depFile','mash_atp_dependencies'] |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
253 |
#args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies'] |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
254 |
#args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--predef','--depFile','mash_atp_dependencies'] |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
255 |
#args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats','--depFile','mash_atp_dependencies'] |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
256 |
#args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--depFile','mash_atp_dependencies','--snow'] |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
257 |
#args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--snow','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies'] |
1e71f9d3cd57
more changes to MaSh Python program (by Daniel K.)
blanchet
parents:
50434
diff
changeset
|
258 |
|
50399 | 259 |
|
260 |
||
50220 | 261 |
#startTime = time() |
262 |
#sys.exit(main(args)) |
|
50388 | 263 |
#print 'New ' + str(round(time()-startTime,2)) |
50220 | 264 |
sys.exit(main()) |