--- a/src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py Tue Aug 20 14:36:22 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py Tue Aug 20 14:36:22 2013 +0200
@@ -179,6 +179,13 @@
assert line.startswith('? ')
line = line[2:]
name = None
+ numberOfPredictions = None
+
+ # Check whether there is a problem name:
+ tmp = line.split('#')
+ if len(tmp) == 2:
+ numberOfPredictions = int(tmp[0].strip())
+ line = tmp[1]
# Check whether there is a problem name:
tmp = line.split(':')
@@ -206,7 +213,7 @@
else:
hints = []
- return name,features,accessibles,hints
+ return name,features,accessibles,hints,numberOfPredictions
def save(self,fileName):
if self.changed:
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/fullNaiveBayes.py Tue Aug 20 14:36:22 2013 +0200
@@ -0,0 +1,203 @@
+'''
+Created on Jul 11, 2012
+
+@author: Daniel Kuehlwein
+'''
+
+from cPickle import dump,load
+from numpy import array,exp
+from math import log
+
+class NBClassifier(object):
+ '''
+ An updateable naive Bayes classifier.
+ '''
+
+ def __init__(self):
+ '''
+ Constructor
+ '''
+ self.counts = {}
+ self.negCounts = {}
+
+ def initializeModel(self,trainData,dicts):
+ """
+ Build basic model from training data.
+ """
+ for d in trainData:
+ self.counts[d] = [0,{}]
+ self.negCounts[d] = [0,{}]
+ dAccUnExp = dicts.accessibleDict[d]
+ if dicts.expandedAccessibles.has_key(d):
+ dAcc = dicts.expandedAccessibles(d)
+ else:
+ if len(dicts.expandedAccessibles.keys()) >= 100:
+ dicts.expandedAccessibles = {}
+ dAcc = dicts.expand_accessibles(dAccUnExp)
+ dicts.expandedAccessibles[d] = dAcc
+ dDeps = set(dicts.dependenciesDict[d])
+ dFeatures = dicts.featureDict[d]
+ # d proves d
+ self.counts[d][0] += 1
+ for f in dFeatures:
+ if self.counts[d][1].has_key(f):
+ self.counts[d][1][f] += 1
+ else:
+ self.counts[d][1][f] = 1
+ for acc in dAcc:
+ if not self.counts.has_key(acc):
+ self.counts[acc] = [0,{}]
+ if not self.negCounts.has_key(acc):
+ self.negCounts[acc] = [0,{}]
+ if acc in dDeps:
+ self.counts[acc][0] += 1
+ for f in dFeatures:
+ if self.counts[acc][1].has_key(f):
+ self.counts[acc][1][f] += 1
+ else:
+ self.counts[acc][1][f] = 1
+ else:
+ self.negCounts[acc][0] += 1
+ for f in dFeatures:
+ if self.negCounts[acc][1].has_key(f):
+ self.negCounts[acc][1][f] += 1
+ else:
+ self.negCounts[acc][1][f] = 1
+
+ def update(self,dataPoint,features,dependencies,dicts):
+ """
+ Updates the Model.
+ """
+ if not self.counts.has_key(dataPoint):
+ self.counts[dataPoint] = [0,{}]
+ if not self.negCounts.has_key(dataPoint):
+ self.negCounts[dataPoint] = [0,{}]
+ if dicts.expandedAccessibles.has_key(dataPoint):
+ dAcc = dicts.expandedAccessibles(dataPoint)
+ else:
+ if len(dicts.expandedAccessibles.keys()) >= 100:
+ dicts.expandedAccessibles = {}
+ dAccUnExp = dicts.accessibleDict[dataPoint]
+ dAcc = dicts.expand_accessibles(dAccUnExp)
+ dicts.expandedAccessibles[dataPoint] = dAcc
+ dDeps = set(dicts.dependenciesDict[dataPoint])
+ dFeatures = dicts.featureDict[dataPoint]
+ # d proves d
+ self.counts[dataPoint][0] += 1
+ for f in dFeatures:
+ if self.counts[dataPoint][1].has_key(f):
+ self.counts[dataPoint][1][f] += 1
+ else:
+ self.counts[dataPoint][1][f] = 1
+
+ for acc in dAcc:
+ if acc in dDeps:
+ self.counts[acc][0] += 1
+ for f in dFeatures:
+ if self.counts[acc][1].has_key(f):
+ self.counts[acc][1][f] += 1
+ else:
+ self.counts[acc][1][f] = 1
+ else:
+ self.negCounts[acc][0] += 1
+ for f in dFeatures:
+ if self.negCounts[acc][1].has_key(f):
+ self.negCounts[acc][1][f] += 1
+ else:
+ self.negCounts[acc][1][f] = 1
+
+ def delete(self,dataPoint,features,dependencies):
+ """
+ Deletes a single datapoint from the model.
+ """
+ for dep in dependencies:
+ self.counts[dep][0] -= 1
+ for f in features:
+ self.counts[dep][1][f] -= 1
+
+
+ def overwrite(self,problemId,newDependencies,dicts):
+ """
+ Deletes the old dependencies of problemId and replaces them with the new ones. Updates the model accordingly.
+ """
+ assert self.counts.has_key(problemId)
+ oldDeps = dicts.dependenciesDict[problemId]
+ features = dicts.featureDict[problemId]
+ self.delete(problemId,features,oldDeps)
+ self.update(problemId,features,newDependencies)
+
+ def predict(self,features,accessibles):
+ """
+ For each accessible, predicts the probability of it being useful given the features.
+ Returns a ranking of the accessibles.
+ """
+ predictions = []
+ for a in accessibles:
+ posA = self.counts[a][0]
+ negA = self.negCounts[a][0]
+ fPosA = set(self.counts[a][1].keys())
+ fNegA = set(self.negCounts[a][1].keys())
+ fPosWeightsA = self.counts[a][1]
+ fNegWeightsA = self.negCounts[a][1]
+ if negA == 0:
+ resultA = 0
+ elif posA == 0:
+ print a
+ print 'xx'
+ import sys
+ sys.exit(-1)
+ else:
+ resultA = log(posA) - log(negA)
+ for f in features:
+ if f in fPosA:
+ # P(f | a)
+ if fPosWeightsA[f] == 0:
+ resultA -= 15
+ else:
+ assert fPosWeightsA[f] <= posA
+ resultA += log(float(fPosWeightsA[f])/posA)
+ else:
+ resultA -= 15
+ # P(f | not a)
+ if f in fNegA:
+ if fNegWeightsA[f] == 0:
+ resultA += 15
+ else:
+ assert fNegWeightsA[f] <= negA
+ resultA -= log(float(fNegWeightsA[f])/negA)
+ else:
+ resultA += 15
+
+ predictions.append(resultA)
+ #expPredictions = array([exp(x) for x in predictions])
+ predictions = array(predictions)
+ perm = (-predictions).argsort()
+ #return array(accessibles)[perm],expPredictions[perm]
+ return array(accessibles)[perm],predictions[perm]
+
+ def save(self,fileName):
+ OStream = open(fileName, 'wb')
+ dump((self.counts,self.negCounts),OStream)
+ OStream.close()
+
+ def load(self,fileName):
+ OStream = open(fileName, 'rb')
+ self.counts,self.negCounts = load(OStream)
+ OStream.close()
+
+
+if __name__ == '__main__':
+ featureDict = {0:[0,1,2],1:[3,2,1]}
+ dependenciesDict = {0:[0],1:[0,1]}
+ libDicts = (featureDict,dependenciesDict,{})
+ c = NBClassifier()
+ c.initializeModel([0,1],libDicts)
+ c.update(2,[14,1,3],[0,2])
+ print c.counts
+ print c.predict([0,14],[0,1,2])
+ c.storeModel('x')
+ d = NBClassifier()
+ d.loadModel('x')
+ print c.counts
+ print d.counts
+ print 'Done'
\ No newline at end of file
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/mash.py Tue Aug 20 14:36:22 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/mash.py Tue Aug 20 14:36:22 2013 +0200
@@ -1,7 +1,7 @@
-#!/usr/bin/python
-# Title: HOL/Tools/Sledgehammer/MaSh/src/mash.py
+#!/usr/bin/env python
+# Title: HOL/Tools/Sledgehammer/MaSh/src/mash
# Author: Daniel Kuehlwein, ICIS, Radboud University Nijmegen
-# Copyright 2012
+# Copyright 2012 - 2013
#
# Entry point for MaSh (Machine Learning for Sledgehammer).
@@ -15,69 +15,38 @@
@author: Daniel Kuehlwein
'''
-import logging,datetime,string,os,sys
-from argparse import ArgumentParser,RawDescriptionHelpFormatter
-from time import time
-from stats import Statistics
+import socket,sys,time,logging
+
+from spawnDaemon import spawnDaemon
+
+
+import logging,string,os,sys
+
+
from theoryStats import TheoryStatistics
from theoryModels import TheoryModels
from dictionaries import Dictionaries
-#from fullNaiveBayes import NBClassifier
-from sparseNaiveBayes import sparseNBClassifier
-from snow import SNoW
from predefined import Predefined
-# Set up command-line parser
-parser = ArgumentParser(description='MaSh - Machine Learning for Sledgehammer. \n\n\
-MaSh allows to use different machine learning algorithms to predict relevant facts for Sledgehammer.\n\n\
---------------- Example Usage ---------------\n\
-First initialize:\n./mash.py -l test.log -o ../tmp/ --init --inputDir ../data/Jinja/ \n\
-Then create predictions:\n./mash.py -i ../data/Jinja/mash_commands -p ../data/Jinja/mash_suggestions -l test.log -o ../tmp/ --statistics\n\
-\n\n\
-Author: Daniel Kuehlwein, July 2012',formatter_class=RawDescriptionHelpFormatter)
-parser.add_argument('-i','--inputFile',help='File containing all problems to be solved.')
-parser.add_argument('-o','--outputDir', default='../tmp/',help='Directory where all created files are stored. Default=../tmp/.')
-parser.add_argument('-p','--predictions',default='../tmp/%s.predictions' % datetime.datetime.now(),
- help='File where the predictions stored. Default=../tmp/dateTime.predictions.')
-parser.add_argument('--numberOfPredictions',default=200,help="Number of premises to write in the output. Default=200.",type=int)
-
-parser.add_argument('--init',default=False,action='store_true',help="Initialize Mash. Requires --inputDir to be defined. Default=False.")
-parser.add_argument('--inputDir',default='../data/20121212/Jinja/',\
- help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility')
-parser.add_argument('--depFile', default='mash_dependencies',
- help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies')
-parser.add_argument('--saveModel',default=False,action='store_true',help="Stores the learned Model at the end of a prediction run. Default=False.")
+from parameters import init_parser
-parser.add_argument('--learnTheories',default=False,action='store_true',help="Uses a two-lvl prediction mode. First the theories, then the premises. Default=False.")
-# Theory Parameters
-parser.add_argument('--theoryDefValPos',default=-7.5,help="Default value for positive unknown features. Default=-7.5.",type=float)
-parser.add_argument('--theoryDefValNeg',default=-10.0,help="Default value for negative unknown features. Default=-15.0.",type=float)
-parser.add_argument('--theoryPosWeight',default=2.0,help="Weight value for positive features. Default=2.0.",type=float)
-
-parser.add_argument('--nb',default=False,action='store_true',help="Use Naive Bayes for learning. This is the default learning method.")
-# NB Parameters
-parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float)
-parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float)
-parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float)
-# TODO: Rename to sineFeatures
-parser.add_argument('--sineFeatures',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.")
-parser.add_argument('--sineWeight',default=0.5,help="How much the SInE prior is weighted. Default=0.5.",type=float)
-
-parser.add_argument('--snow',default=False,action='store_true',help="Use SNoW's naive bayes instead of Naive Bayes for learning.")
-parser.add_argument('--predef',help="Use predefined predictions. Used only for comparison with the actual learning. Argument is the filename of the predictions.")
-parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\
- WARNING: This will make the program a lot slower! Default=False.")
-parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.")
-parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int)
-parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log')
-parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.")
-parser.add_argument('--modelFile', default='../tmp/model.pickle', help='Model file name. Default=../tmp/model.pickle')
-parser.add_argument('--dictsFile', default='../tmp/dict.pickle', help='Dict file name. Default=../tmp/dict.pickle')
-parser.add_argument('--theoryFile', default='../tmp/theory.pickle', help='Model file name. Default=../tmp/theory.pickle')
+def communicate(data,host,port):
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ try:
+ sock.connect((host,port))
+ sock.sendall(data)
+ received = sock.recv(262144)
+ except:
+ logger = logging.getLogger('communicate')
+ logger.warning('Communication with server failed.')
+ received = -1
+ finally:
+ sock.close()
+ return received
def mash(argv = sys.argv[1:]):
# Initializing command-line arguments
- args = parser.parse_args(argv)
+ args = init_parser(argv)
# Set up logging
logging.basicConfig(level=logging.DEBUG,
@@ -85,18 +54,8 @@
datefmt='%d-%m %H:%M:%S',
filename=args.log,
filemode='w')
- logger = logging.getLogger('main.py')
-
- #"""
- # remove old handler for tester
- #logger.root.handlers[0].stream.close()
- logger.root.removeHandler(logger.root.handlers[0])
- file_handler = logging.FileHandler(args.log)
- file_handler.setLevel(logging.DEBUG)
- formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
- file_handler.setFormatter(formatter)
- logger.root.addHandler(file_handler)
- #"""
+ logger = logging.getLogger('mash')
+
if args.quiet:
logger.setLevel(logging.WARNING)
#console.setLevel(logging.WARNING)
@@ -110,257 +69,44 @@
if not os.path.exists(args.outputDir):
os.makedirs(args.outputDir)
- logger.info('Using the following settings: %s',args)
- # Pick algorithm
- if args.nb:
- logger.info('Using sparse Naive Bayes for learning.')
- model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal)
- elif args.snow:
- logger.info('Using naive bayes (SNoW) for learning.')
- model = SNoW()
- elif args.predef:
- logger.info('Using predefined predictions.')
- model = Predefined(args.predef)
- else:
- logger.info('No algorithm specified. Using sparse Naive Bayes.')
- model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal)
-
- # Initializing model
- if args.init:
- logger.info('Initializing Model.')
- startTime = time()
-
- # Load all data
- dicts = Dictionaries()
- dicts.init_all(args)
-
- # Create Model
- trainData = dicts.featureDict.keys()
- model.initializeModel(trainData,dicts)
-
- if args.learnTheories:
- depFile = os.path.join(args.inputDir,args.depFile)
- theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight)
- theoryModels.init(depFile,dicts)
- theoryModels.save(args.theoryFile)
-
- model.save(args.modelFile)
- dicts.save(args.dictsFile)
-
- logger.info('All Done. %s seconds needed.',round(time()-startTime,2))
- return 0
- # Create predictions and/or update model
- else:
- lineCounter = 1
- statementCounter = 1
- computeStats = False
- dicts = Dictionaries()
- theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight)
- # Load Files
- if os.path.isfile(args.dictsFile):
- #logger.info('Loading Dictionaries')
- #startTime = time()
- dicts.load(args.dictsFile)
- #logger.info('Done %s',time()-startTime)
- if os.path.isfile(args.modelFile):
- #logger.info('Loading Model')
- #startTime = time()
- model.load(args.modelFile)
- #logger.info('Done %s',time()-startTime)
- if os.path.isfile(args.theoryFile) and args.learnTheories:
- #logger.info('Loading Theory Models')
- #startTime = time()
- theoryModels.load(args.theoryFile)
- #logger.info('Done %s',time()-startTime)
- logger.info('All loading completed')
-
- # IO Streams
- OS = open(args.predictions,'w')
- IS = open(args.inputFile,'r')
-
- # Statistics
- if args.statistics:
- stats = Statistics(args.cutOff)
- if args.learnTheories:
- theoryStats = TheoryStatistics()
-
- predictions = None
- predictedTheories = None
- #Reading Input File
- for line in IS:
-# try:
- if True:
- if line.startswith('!'):
- problemId = dicts.parse_fact(line)
- # Statistics
- if args.statistics and computeStats:
- computeStats = False
- # Assume '!' comes after '?'
- if args.predef:
- predictions = model.predict(problemId)
- if args.learnTheories:
- tmp = [dicts.idNameDict[x] for x in dicts.dependenciesDict[problemId]]
- usedTheories = set([x.split('.')[0] for x in tmp])
- theoryStats.update((dicts.idNameDict[problemId]).split('.')[0],predictedTheories,usedTheories,len(theoryModels.accessibleTheories))
- stats.update(predictions,dicts.dependenciesDict[problemId],statementCounter)
- if not stats.badPreds == []:
- bp = string.join([str(dicts.idNameDict[x]) for x in stats.badPreds], ',')
- logger.debug('Bad predictions: %s',bp)
+ # If server is not running, start it.
+ try:
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.connect((args.host,args.port))
+ sock.close()
+ except:
+ logger.info('Starting Server.')
+ spawnDaemon('server.py')
+ # TODO: Make this fault tolerant
+ time.sleep(0.5)
+ # Init server
+ data = "i "+";".join(argv)
+ received = communicate(data,args.host,args.port)
+ logger.info(received)
+
+ if args.inputFile == None:
+ return
+ logger.debug('Using the following settings: %s',args)
+ # IO Streams
+ OS = open(args.predictions,'w')
+ IS = open(args.inputFile,'r')
+ count = 0
+ for line in IS:
+ count += 1
+ #if count == 127:
+ # break as
+ received = communicate(line,args.host,args.port)
+ if not received == '':
+ OS.write('%s\n' % received)
+ #logger.info(received)
+ OS.close()
+ IS.close()
- statementCounter += 1
- # Update Dependencies, p proves p
- dicts.dependenciesDict[problemId] = [problemId]+dicts.dependenciesDict[problemId]
- if args.learnTheories:
- theoryModels.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts)
- if args.snow:
- model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts)
- else:
- model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId])
- elif line.startswith('p'):
- # Overwrite old proof.
- problemId,newDependencies = dicts.parse_overwrite(line)
- newDependencies = [problemId]+newDependencies
- model.overwrite(problemId,newDependencies,dicts)
- if args.learnTheories:
- theoryModels.overwrite(problemId,newDependencies,dicts)
- dicts.dependenciesDict[problemId] = newDependencies
- elif line.startswith('?'):
- startTime = time()
- computeStats = True
- if args.predef:
- continue
- name,features,accessibles,hints = dicts.parse_problem(line)
-
- # Create predictions
- logger.info('Starting computation for problem on line %s',lineCounter)
- # Update Models with hints
- if not hints == []:
- if args.learnTheories:
- accessibleTheories = set([(dicts.idNameDict[x]).split('.')[0] for x in accessibles])
- theoryModels.update_with_acc('hints',features,hints,dicts,accessibleTheories)
- if args.snow:
- pass
- else:
- model.update('hints',features,hints)
-
- # Predict premises
- if args.learnTheories:
- predictedTheories,accessibles = theoryModels.predict(features,accessibles,dicts)
-
- # Add additional features on premise lvl if sine is enabled
- if args.sineFeatures:
- origFeatures = [f for f,_w in features]
- secondaryFeatures = []
- for f in origFeatures:
- if dicts.featureCountDict[f] == 1:
- continue
- triggeredFormulas = dicts.featureTriggeredFormulasDict[f]
- for formula in triggeredFormulas:
- tFeatures = dicts.triggerFeaturesDict[formula]
- #tFeatures = [ff for ff,_fw in dicts.featureDict[formula]]
- newFeatures = set(tFeatures).difference(secondaryFeatures+origFeatures)
- for fNew in newFeatures:
- secondaryFeatures.append((fNew,args.sineWeight))
- predictionsFeatures = features+secondaryFeatures
- else:
- predictionsFeatures = features
- predictions,predictionValues = model.predict(predictionsFeatures,accessibles,dicts)
- assert len(predictions) == len(predictionValues)
-
- # Delete hints
- if not hints == []:
- if args.learnTheories:
- theoryModels.delete('hints',features,hints,dicts)
- if args.snow:
- pass
- else:
- model.delete('hints',features,hints)
+ # Statistics
+ if args.statistics:
+ received = communicate('avgStats',args.host,args.port)
+ logger.info(received)
- logger.info('Done. %s seconds needed.',round(time()-startTime,2))
- # Output
- predictionNames = [str(dicts.idNameDict[p]) for p in predictions[:args.numberOfPredictions]]
- predictionValues = [str(x) for x in predictionValues[:args.numberOfPredictions]]
- predictionsStringList = ['%s=%s' % (predictionNames[i],predictionValues[i]) for i in range(len(predictionNames))]
- predictionsString = string.join(predictionsStringList,' ')
- outString = '%s: %s' % (name,predictionsString)
- OS.write('%s\n' % outString)
- else:
- logger.warning('Unspecified input format: \n%s',line)
- sys.exit(-1)
- lineCounter += 1
- """
- except:
- logger.warning('An error occurred on line %s .',line)
- lineCounter += 1
- continue
- """
- OS.close()
- IS.close()
- # Statistics
- if args.statistics:
- if args.learnTheories:
- theoryStats.printAvg()
- stats.printAvg()
-
- # Save
- if args.saveModel:
- model.save(args.modelFile)
- if args.learnTheories:
- theoryModels.save(args.theoryFile)
- dicts.save(args.dictsFile)
- if not args.saveStats == None:
- if args.learnTheories:
- theoryStatsFile = os.path.join(args.outputDir,'theoryStats')
- theoryStats.save(theoryStatsFile)
- statsFile = os.path.join(args.outputDir,args.saveStats)
- stats.save(statsFile)
- return 0
-
-if __name__ == '__main__':
- # Example:
- #List
- # ISAR Theories
- #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130110/List/','--learnTheories']
- #args = ['-i', '../data/20130110/List/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories']
- # ISAR predef mesh
- #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130110/List/','--predef','../data/20130110/List/mesh_suggestions']
- #args = ['-i', '../data/20130110/List/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20130110/List/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats']
-
-
- # Auth
- # ISAR Theories
- #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Auth/','--learnTheories','--sineFeatures']
- #args = ['-i', '../data/20121227b/Auth/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories']
- # ISAR predef mesh
- #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Auth/','--predef','../data/20121227b/Auth/mesh_suggestions']
- #args = ['-i', '../data/20121227b/Auth/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20121227b/Auth/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats']
-
-
- # Jinja
- # ISAR Theories
- #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--learnTheories']
- #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--cutOff','500','--learnTheories']
- # ISAR Theories SinePrior
- #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--learnTheories','--sineFeatures']
- #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories','--sineFeatures']
- # ISAR NB
- #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/']
- #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500']
- # ISAR predef mesh
- #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--predef','../data/20130111/Jinja/mesh_suggestions']
- #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20130111/Jinja/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats']
- # ISAR NB ATP
- #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121212/Jinja/','--depFile','mash_atp_dependencies']
- #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies']
- #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--predef','--depFile','mash_atp_dependencies']
- #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats','--depFile','mash_atp_dependencies']
- #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--depFile','mash_atp_dependencies','--snow']
- #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--snow','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies']
- # ISAR Snow
- #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121212/Jinja/','--snow']
- #args = ['-i', '../data/20121212/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--snow','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500']
-
-
- #sys.exit(mash(args))
+if __name__ == "__main__":
sys.exit(mash())
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/mashOld.py Tue Aug 20 14:36:22 2013 +0200
@@ -0,0 +1,329 @@
+#!/usr/bin/python
+# Title: HOL/Tools/Sledgehammer/MaSh/src/mash.py
+# Author: Daniel Kuehlwein, ICIS, Radboud University Nijmegen
+# Copyright 2012
+#
+# Entry point for MaSh (Machine Learning for Sledgehammer).
+
+'''
+MaSh - Machine Learning for Sledgehammer
+
+MaSh allows to use different machine learning algorithms to predict relevant fact for Sledgehammer.
+
+Created on July 12, 2012
+
+@author: Daniel Kuehlwein
+'''
+
+import logging,datetime,string,os,sys
+from argparse import ArgumentParser,RawDescriptionHelpFormatter
+from time import time
+from stats import Statistics
+from theoryStats import TheoryStatistics
+from theoryModels import TheoryModels
+from dictionaries import Dictionaries
+#from fullNaiveBayes import NBClassifier
+from sparseNaiveBayes import sparseNBClassifier
+from snow import SNoW
+from predefined import Predefined
+
+# Set up command-line parser
+parser = ArgumentParser(description='MaSh - Machine Learning for Sledgehammer. \n\n\
+MaSh allows to use different machine learning algorithms to predict relevant facts for Sledgehammer.\n\n\
+--------------- Example Usage ---------------\n\
+First initialize:\n./mash.py -l test.log -o ../tmp/ --init --inputDir ../data/Jinja/ \n\
+Then create predictions:\n./mash.py -i ../data/Jinja/mash_commands -p ../data/Jinja/mash_suggestions -l test.log -o ../tmp/ --statistics\n\
+\n\n\
+Author: Daniel Kuehlwein, July 2012',formatter_class=RawDescriptionHelpFormatter)
+parser.add_argument('-i','--inputFile',help='File containing all problems to be solved.')
+parser.add_argument('-o','--outputDir', default='../tmp/',help='Directory where all created files are stored. Default=../tmp/.')
+parser.add_argument('-p','--predictions',default='../tmp/%s.predictions' % datetime.datetime.now(),
+ help='File where the predictions stored. Default=../tmp/dateTime.predictions.')
+parser.add_argument('--numberOfPredictions',default=200,help="Number of premises to write in the output. Default=200.",type=int)
+
+parser.add_argument('--init',default=False,action='store_true',help="Initialize Mash. Requires --inputDir to be defined. Default=False.")
+parser.add_argument('--inputDir',default='../data/20121212/Jinja/',\
+ help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility')
+parser.add_argument('--depFile', default='mash_dependencies',
+ help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies')
+parser.add_argument('--saveModel',default=False,action='store_true',help="Stores the learned Model at the end of a prediction run. Default=False.")
+
+parser.add_argument('--learnTheories',default=False,action='store_true',help="Uses a two-lvl prediction mode. First the theories, then the premises. Default=False.")
+# Theory Parameters
+parser.add_argument('--theoryDefValPos',default=-7.5,help="Default value for positive unknown features. Default=-7.5.",type=float)
+parser.add_argument('--theoryDefValNeg',default=-10.0,help="Default value for negative unknown features. Default=-15.0.",type=float)
+parser.add_argument('--theoryPosWeight',default=2.0,help="Weight value for positive features. Default=2.0.",type=float)
+
+parser.add_argument('--nb',default=False,action='store_true',help="Use Naive Bayes for learning. This is the default learning method.")
+# NB Parameters
+parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float)
+parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float)
+parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float)
+# TODO: Rename to sineFeatures
+parser.add_argument('--sineFeatures',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.")
+parser.add_argument('--sineWeight',default=0.5,help="How much the SInE prior is weighted. Default=0.5.",type=float)
+
+parser.add_argument('--snow',default=False,action='store_true',help="Use SNoW's naive bayes instead of Naive Bayes for learning.")
+parser.add_argument('--predef',help="Use predefined predictions. Used only for comparison with the actual learning. Argument is the filename of the predictions.")
+parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\
+ WARNING: This will make the program a lot slower! Default=False.")
+parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.")
+parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int)
+parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log')
+parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.")
+parser.add_argument('--modelFile', default='../tmp/model.pickle', help='Model file name. Default=../tmp/model.pickle')
+parser.add_argument('--dictsFile', default='../tmp/dict.pickle', help='Dict file name. Default=../tmp/dict.pickle')
+parser.add_argument('--theoryFile', default='../tmp/theory.pickle', help='Model file name. Default=../tmp/theory.pickle')
+
+def mash(argv = sys.argv[1:]):
+ # Initializing command-line arguments
+ args = parser.parse_args(argv)
+
+ # Set up logging
+ logging.basicConfig(level=logging.DEBUG,
+ format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
+ datefmt='%d-%m %H:%M:%S',
+ filename=args.log,
+ filemode='w')
+ logger = logging.getLogger('main.py')
+
+ """
+ # remove old handler for tester
+ # TODO: Comment out for Jasmins version. This crashes python 2.6.1
+ logger.root.handlers[0].stream.close()
+ logger.root.removeHandler(logger.root.handlers[0])
+ file_handler = logging.FileHandler(args.log)
+ file_handler.setLevel(logging.DEBUG)
+ formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
+ file_handler.setFormatter(formatter)
+ logger.root.addHandler(file_handler)
+ #"""
+ if args.quiet:
+ logger.setLevel(logging.WARNING)
+ #console.setLevel(logging.WARNING)
+ else:
+ console = logging.StreamHandler(sys.stdout)
+ console.setLevel(logging.INFO)
+ formatter = logging.Formatter('# %(message)s')
+ console.setFormatter(formatter)
+ logging.getLogger('').addHandler(console)
+
+ if not os.path.exists(args.outputDir):
+ os.makedirs(args.outputDir)
+
+ logger.info('Using the following settings: %s',args)
+ # Pick algorithm
+ if args.nb:
+ logger.info('Using sparse Naive Bayes for learning.')
+ model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal)
+ elif args.snow:
+ logger.info('Using naive bayes (SNoW) for learning.')
+ model = SNoW()
+ elif args.predef:
+ logger.info('Using predefined predictions.')
+ model = Predefined(args.predef)
+ else:
+ logger.info('No algorithm specified. Using sparse Naive Bayes.')
+ model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal)
+
+ # Initializing model
+ if args.init:
+ logger.info('Initializing Model.')
+ startTime = time()
+
+ # Load all data
+ dicts = Dictionaries()
+ dicts.init_all(args)
+
+ # Create Model
+ trainData = dicts.featureDict.keys()
+ model.initializeModel(trainData,dicts)
+
+ if args.learnTheories:
+ depFile = os.path.join(args.inputDir,args.depFile)
+ theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight)
+ theoryModels.init(depFile,dicts)
+ theoryModels.save(args.theoryFile)
+
+ model.save(args.modelFile)
+ dicts.save(args.dictsFile)
+
+ logger.info('All Done. %s seconds needed.',round(time()-startTime,2))
+ return 0
+ # Create predictions and/or update model
+ else:
+ lineCounter = 1
+ statementCounter = 1
+ computeStats = False
+ dicts = Dictionaries()
+ theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight)
+ # Load Files
+ if os.path.isfile(args.dictsFile):
+ #logger.info('Loading Dictionaries')
+ #startTime = time()
+ dicts.load(args.dictsFile)
+ #logger.info('Done %s',time()-startTime)
+ if os.path.isfile(args.modelFile):
+ #logger.info('Loading Model')
+ #startTime = time()
+ model.load(args.modelFile)
+ #logger.info('Done %s',time()-startTime)
+ if os.path.isfile(args.theoryFile) and args.learnTheories:
+ #logger.info('Loading Theory Models')
+ #startTime = time()
+ theoryModels.load(args.theoryFile)
+ #logger.info('Done %s',time()-startTime)
+ logger.info('All loading completed')
+
+ # IO Streams
+ OS = open(args.predictions,'w')
+ IS = open(args.inputFile,'r')
+
+ # Statistics
+ if args.statistics:
+ stats = Statistics(args.cutOff)
+ if args.learnTheories:
+ theoryStats = TheoryStatistics()
+
+ predictions = None
+ predictedTheories = None
+ #Reading Input File
+ for line in IS:
+# try:
+ if True:
+ if line.startswith('!'):
+ problemId = dicts.parse_fact(line)
+ # Statistics
+ if args.statistics and computeStats:
+ computeStats = False
+ # Assume '!' comes after '?'
+ if args.predef:
+ predictions = model.predict(problemId)
+ if args.learnTheories:
+ tmp = [dicts.idNameDict[x] for x in dicts.dependenciesDict[problemId]]
+ usedTheories = set([x.split('.')[0] for x in tmp])
+ theoryStats.update((dicts.idNameDict[problemId]).split('.')[0],predictedTheories,usedTheories,len(theoryModels.accessibleTheories))
+ stats.update(predictions,dicts.dependenciesDict[problemId],statementCounter)
+ if not stats.badPreds == []:
+ bp = string.join([str(dicts.idNameDict[x]) for x in stats.badPreds], ',')
+ logger.debug('Bad predictions: %s',bp)
+
+ statementCounter += 1
+ # Update Dependencies, p proves p
+ dicts.dependenciesDict[problemId] = [problemId]+dicts.dependenciesDict[problemId]
+ if args.learnTheories:
+ theoryModels.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts)
+ if args.snow:
+ model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts)
+ else:
+ model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId])
+ elif line.startswith('p'):
+ # Overwrite old proof.
+ problemId,newDependencies = dicts.parse_overwrite(line)
+ newDependencies = [problemId]+newDependencies
+ model.overwrite(problemId,newDependencies,dicts)
+ if args.learnTheories:
+ theoryModels.overwrite(problemId,newDependencies,dicts)
+ dicts.dependenciesDict[problemId] = newDependencies
+ elif line.startswith('?'):
+ startTime = time()
+ computeStats = True
+ if args.predef:
+ continue
+ name,features,accessibles,hints = dicts.parse_problem(line)
+
+ # Create predictions
+ logger.info('Starting computation for problem on line %s',lineCounter)
+ # Update Models with hints
+ if not hints == []:
+ if args.learnTheories:
+ accessibleTheories = set([(dicts.idNameDict[x]).split('.')[0] for x in accessibles])
+ theoryModels.update_with_acc('hints',features,hints,dicts,accessibleTheories)
+ if args.snow:
+ pass
+ else:
+ model.update('hints',features,hints)
+
+ # Predict premises
+ if args.learnTheories:
+ predictedTheories,accessibles = theoryModels.predict(features,accessibles,dicts)
+
+ # Add additional features on premise lvl if sine is enabled
+ if args.sineFeatures:
+ origFeatures = [f for f,_w in features]
+ secondaryFeatures = []
+ for f in origFeatures:
+ if dicts.featureCountDict[f] == 1:
+ continue
+ triggeredFormulas = dicts.featureTriggeredFormulasDict[f]
+ for formula in triggeredFormulas:
+ tFeatures = dicts.triggerFeaturesDict[formula]
+ #tFeatures = [ff for ff,_fw in dicts.featureDict[formula]]
+ newFeatures = set(tFeatures).difference(secondaryFeatures+origFeatures)
+ for fNew in newFeatures:
+ secondaryFeatures.append((fNew,args.sineWeight))
+ predictionsFeatures = features+secondaryFeatures
+ else:
+ predictionsFeatures = features
+ predictions,predictionValues = model.predict(predictionsFeatures,accessibles,dicts)
+ assert len(predictions) == len(predictionValues)
+
+ # Delete hints
+ if not hints == []:
+ if args.learnTheories:
+ theoryModels.delete('hints',features,hints,dicts)
+ if args.snow:
+ pass
+ else:
+ model.delete('hints',features,hints)
+
+ logger.info('Done. %s seconds needed.',round(time()-startTime,2))
+ # Output
+ predictionNames = [str(dicts.idNameDict[p]) for p in predictions[:args.numberOfPredictions]]
+ predictionValues = [str(x) for x in predictionValues[:args.numberOfPredictions]]
+ predictionsStringList = ['%s=%s' % (predictionNames[i],predictionValues[i]) for i in range(len(predictionNames))]
+ predictionsString = string.join(predictionsStringList,' ')
+ outString = '%s: %s' % (name,predictionsString)
+ OS.write('%s\n' % outString)
+ else:
+ logger.warning('Unspecified input format: \n%s',line)
+ sys.exit(-1)
+ lineCounter += 1
+ """
+ except:
+ logger.warning('An error occurred on line %s .',line)
+ lineCounter += 1
+ continue
+ """
+ OS.close()
+ IS.close()
+
+ # Statistics
+ if args.statistics:
+ if args.learnTheories:
+ theoryStats.printAvg()
+ stats.printAvg()
+
+ # Save
+ if args.saveModel:
+ model.save(args.modelFile)
+ if args.learnTheories:
+ theoryModels.save(args.theoryFile)
+ dicts.save(args.dictsFile)
+ if not args.saveStats == None:
+ if args.learnTheories:
+ theoryStatsFile = os.path.join(args.outputDir,'theoryStats')
+ theoryStats.save(theoryStatsFile)
+ statsFile = os.path.join(args.outputDir,args.saveStats)
+ stats.save(statsFile)
+ return 0
+
+if __name__ == '__main__':
+ # Cezary Auth
+ args = ['--statistics', '--init', '--inputDir', '../data/20130118/Jinja', '--log', '../tmp/auth.log', '--theoryFile', '../tmp/t0', '--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0','--NBDefaultPriorWeight', '20.0', '--NBDefVal', '-15.0', '--NBPosWeight', '10.0']
+ mash(args)
+ args = ['-i', '../data/20130118/Jinja/mash_commands', '-p', '../tmp/auth.pred0', '--statistics', '--cutOff', '500', '--log', '../tmp/auth.log','--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0']
+ mash(args)
+
+ #sys.exit(mash(args))
+ sys.exit(mash())
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/mashTest.py Tue Aug 20 14:36:22 2013 +0200
@@ -0,0 +1,12 @@
+'''
+Created on Aug 20, 2013
+
+@author: daniel
+'''
+from mash import mash
+
+if __name__ == "__main__":
+ args = ['--statistics', '--init', '--inputDir', '../data/20130118/Jinja', '--log', '../tmp/auth.log', '--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0','--NBDefaultPriorWeight', '20.0', '--NBDefVal', '-15.0', '--NBPosWeight', '10.0']
+ mash(args)
+ args = ['-i', '../data/20130118/Jinja/mash_commands', '-p', '../tmp/auth.pred0', '--statistics', '--cutOff', '500', '--log', '../tmp/auth.log','--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0']
+ mash(args)
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/parameters.py Tue Aug 20 14:36:22 2013 +0200
@@ -0,0 +1,46 @@
+import datetime
+from argparse import ArgumentParser,RawDescriptionHelpFormatter
+
+def init_parser(argv):
+ # Set up command-line parser
+ parser = ArgumentParser(description='MaSh - Machine Learning for Sledgehammer. \n\n\
+ MaSh allows to use different machine learning algorithms to predict relevant facts for Sledgehammer.\n\n\
+ --------------- Example Usage ---------------\n\
+ First initialize:\n./mash.py -l test.log -o ../tmp/ --init --inputDir ../data/Jinja/ \n\
+ Then create predictions:\n./mash.py -i ../data/Jinja/mash_commands -p ../data/Jinja/mash_suggestions -l test.log -o ../tmp/ --statistics\n\
+ \n\n\
+ Author: Daniel Kuehlwein, July 2012',formatter_class=RawDescriptionHelpFormatter)
+ parser.add_argument('-i','--inputFile',help='File containing all problems to be solved.')
+ parser.add_argument('-o','--outputDir', default='../tmp/',help='Directory where all created files are stored. Default=../tmp/.')
+ parser.add_argument('-p','--predictions',default='../tmp/%s.predictions' % datetime.datetime.now(),
+ help='File where the predictions stored. Default=../tmp/dateTime.predictions.')
+ parser.add_argument('--numberOfPredictions',default=500,help="Default number of premises to write in the output. Default=500.",type=int)
+
+ parser.add_argument('--init',default=False,action='store_true',help="Initialize Mash. Requires --inputDir to be defined. Default=False.")
+ parser.add_argument('--inputDir',\
+ help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility')
+ parser.add_argument('--depFile', default='mash_dependencies',
+ help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies')
+
+ parser.add_argument('--algorithm',default='nb',action='store_true',help="Which learning algorithm is used. nb = Naive Bayes,predef=predefined. Default=nb.")
+ # NB Parameters
+ parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float)
+ parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float)
+ parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float)
+ # TODO: Rename to sineFeatures
+ parser.add_argument('--sineFeatures',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.")
+ parser.add_argument('--sineWeight',default=0.5,help="How much the SInE prior is weighted. Default=0.5.",type=float)
+
+ parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\
+ WARNING: This will make the program a lot slower! Default=False.")
+ parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.")
+ parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int)
+ parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log')
+ parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.")
+ parser.add_argument('--modelFile', default='../tmp/model.pickle', help='Model file name. Default=../tmp/model.pickle')
+ parser.add_argument('--dictsFile', default='../tmp/dict.pickle', help='Dict file name. Default=../tmp/dict.pickle')
+
+ parser.add_argument('--port', default='9255', help='Port of the Mash server. Default=9255',type=int)
+ parser.add_argument('--host', default='localhost', help='Host of the Mash server. Default=localhost')
+ args = parser.parse_args(argv)
+ return args
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/predefined.py Tue Aug 20 14:36:22 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/predefined.py Tue Aug 20 14:36:22 2013 +0200
@@ -35,7 +35,11 @@
name = line[0].strip()
predId = dicts.get_name_id(name)
line = line[1].split()
- preds = [dicts.get_name_id(x.strip())for x in line]
+ predsTmp = []
+ for x in line:
+ x = x.split('=')
+ predsTmp.append(x[0])
+ preds = [dicts.get_name_id(x.strip())for x in predsTmp]
self.predictions[predId] = preds
IS.close()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/server.py Tue Aug 20 14:36:22 2013 +0200
@@ -0,0 +1,170 @@
+#!/usr/bin/env python
+# Title: HOL/Tools/Sledgehammer/MaSh/src/server.py
+# Author: Daniel Kuehlwein, ICIS, Radboud University Nijmegen
+# Copyright 2013
+#
+# The MaSh Server.
+
+import SocketServer,os,string,logging
+from multiprocessing import Manager
+from time import time
+from dictionaries import Dictionaries
+from parameters import init_parser
+from sparseNaiveBayes import sparseNBClassifier
+from stats import Statistics
+
+
+class ThreadingTCPServer(SocketServer.ThreadingTCPServer):
+ def __init__(self, *args, **kwargs):
+ SocketServer.ThreadingTCPServer.__init__(self,*args, **kwargs)
+ self.manager = Manager()
+ self.lock = Manager().Lock()
+
+class MaShHandler(SocketServer.BaseRequestHandler):
+
+ def init(self,argv):
+ if argv == '':
+ self.server.args = init_parser([])
+ else:
+ argv = argv.split(';')
+ self.server.args = init_parser(argv)
+ # Pick model
+ if self.server.args.algorithm == 'nb':
+ self.server.model = sparseNBClassifier(self.server.args.NBDefaultPriorWeight,self.server.args.NBPosWeight,self.server.args.NBDefVal)
+ else: # Default case
+ self.server.model = sparseNBClassifier(self.server.args.NBDefaultPriorWeight,self.server.args.NBPosWeight,self.server.args.NBDefVal)
+ # Load all data
+ # TODO: rewrite dicts for concurrency and without sine
+ self.server.dicts = Dictionaries()
+ if os.path.isfile(self.server.args.dictsFile):
+ self.server.dicts.load(self.server.args.dictsFile)
+ elif self.server.args.init:
+ self.server.dicts.init_all(self.server.args)
+ # Create Model
+ if os.path.isfile(self.server.args.modelFile):
+ self.server.model.load(self.server.args.modelFile)
+ elif self.server.args.init:
+ trainData = self.server.dicts.featureDict.keys()
+ self.server.model.initializeModel(trainData,self.server.dicts)
+
+ if self.server.args.statistics:
+ self.server.stats = Statistics(self.server.args.cutOff)
+ self.server.statementCounter = 1
+ self.server.computeStats = False
+
+ # Set up logging
+ logging.basicConfig(level=logging.DEBUG,
+ format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
+ datefmt='%d-%m %H:%M:%S',
+ filename=self.server.args.log+'server',
+ filemode='w')
+ self.server.logger = logging.getLogger('server')
+ self.server.logger.debug('Initialized in '+str(round(time()-self.startTime,2))+' seconds.')
+ self.request.sendall('Server initialized in '+str(round(time()-self.startTime,2))+' seconds.')
+ self.server.callCounter = 1
+
+ def update(self):
+ problemId = self.server.dicts.parse_fact(self.data)
+ # Statistics
+ if self.server.args.statistics and self.server.computeStats:
+ self.server.computeStats = False
+ # Assume '!' comes after '?'
+ if self.server.args.algorithm == 'predef':
+ self.server.predictions = self.server.model.predict(problemId)
+ self.server.stats.update(self.server.predictions,self.server.dicts.dependenciesDict[problemId],self.server.statementCounter)
+ if not self.server.stats.badPreds == []:
+ bp = string.join([str(self.server.dicts.idNameDict[x]) for x in self.server.stats.badPreds], ',')
+ self.server.logger.debug('Poor predictions: %s',bp)
+ self.server.statementCounter += 1
+
+ # Update Dependencies, p proves p
+ self.server.dicts.dependenciesDict[problemId] = [problemId]+self.server.dicts.dependenciesDict[problemId]
+ self.server.model.update(problemId,self.server.dicts.featureDict[problemId],self.server.dicts.dependenciesDict[problemId])
+
+ def overwrite(self):
+ # Overwrite old proof.
+ problemId,newDependencies = self.server.dicts.parse_overwrite(self.data)
+ newDependencies = [problemId]+newDependencies
+ self.server.model.overwrite(problemId,newDependencies,self.server.dicts)
+ self.server.dicts.dependenciesDict[problemId] = newDependencies
+
+ def predict(self):
+ self.server.computeStats = True
+ if self.server.args.algorithm == 'predef':
+ return
+ name,features,accessibles,hints,numberOfPredictions = self.server.dicts.parse_problem(self.data)
+ if numberOfPredictions == None:
+ numberOfPredictions = self.server.args.numberOfPredictions
+ if not hints == []:
+ self.server.model.update('hints',features,hints)
+
+ # Create predictions
+ self.server.logger.debug('Starting computation for line %s',self.server.callCounter)
+ predictionsFeatures = features
+ self.server.predictions,predictionValues = self.server.model.predict(predictionsFeatures,accessibles,self.server.dicts)
+ assert len(self.server.predictions) == len(predictionValues)
+ self.server.logger.debug('Time needed: '+str(round(time()-self.startTime,2)))
+
+ # Output
+ predictionNames = [str(self.server.dicts.idNameDict[p]) for p in self.server.predictions[:numberOfPredictions]]
+ predictionValues = [str(x) for x in predictionValues[:self.server.args.numberOfPredictions]]
+ predictionsStringList = ['%s=%s' % (predictionNames[i],predictionValues[i]) for i in range(len(predictionNames))]
+ predictionsString = string.join(predictionsStringList,' ')
+ outString = '%s: %s' % (name,predictionsString)
+ self.request.sendall(outString)
+
+ def shutdown(self):
+ self.request.sendall('Shutting down server.')
+ # Save Models
+ self.server.model.save(self.server.args.modelFile)
+ self.server.dicts.save(self.server.args.dictsFile)
+ if not self.server.args.saveStats == None:
+ statsFile = os.path.join(self.server.args.outputDir,self.server.args.saveStats)
+ self.server.stats.save(statsFile)
+ self.server.shutdown()
+
+ def handle(self):
+ # self.request is the TCP socket connected to the client
+ self.data = self.request.recv(262144).strip()
+ #print "{} wrote:".format(self.client_address[0])
+ #print self.data
+ self.startTime = time()
+ if self.data == 's':
+ self.shutdown()
+ elif self.data.startswith('i'):
+ self.init(self.data[2:])
+ elif self.data.startswith('!'):
+ self.update()
+ elif self.data.startswith('p'):
+ self.overwrite()
+ elif self.data.startswith('?'):
+ self.predict()
+ elif self.data == '':
+ # Empty Socket
+ return
+ elif self.data == 'avgStats':
+ self.request.sendall(self.server.stats.printAvg())
+ else:
+ self.request.sendall('Unspecified input format: \n%s',self.data)
+ self.server.callCounter += 1
+
+ #returnString = 'Time needed: '+str(round(time()-self.startTime,2))
+ #print returnString
+
+if __name__ == "__main__":
+ HOST, PORT = "localhost", 9255
+ #print 'Started Server'
+ # Create the server, binding to localhost on port 9999
+ SocketServer.TCPServer.allow_reuse_address = True
+ server = ThreadingTCPServer((HOST, PORT), MaShHandler)
+ #server = SocketServer.TCPServer((HOST, PORT), MaShHandler)
+
+ # Activate the server; this will keep running until you
+ # interrupt the program with Ctrl-C
+ server.serve_forever()
+
+
+
+
+
+
\ No newline at end of file
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/snow.py Tue Aug 20 14:36:22 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/snow.py Tue Aug 20 14:36:22 2013 +0200
@@ -27,7 +27,7 @@
self.SNoWTrainFile = '../tmp/snow.train'
self.SNoWTestFile = '../snow.test'
self.SNoWNetFile = '../tmp/snow.net'
- self.defMaxNameId = 20000
+ self.defMaxNameId = 100000
def initializeModel(self,trainData,dicts):
"""
@@ -51,6 +51,7 @@
# Build Model
self.logger.debug('Building Model START.')
snowTrainCommand = '../bin/snow -train -M+ -I %s -F %s -g- -B :0-%s' % (self.SNoWTrainFile,self.SNoWNetFile,dicts.maxNameId-1)
+ #print snowTrainCommand
#snowTrainCommand = '../bin/snow -train -M+ -I %s -F %s -g- -B :0-%s' % (self.SNoWTrainFile,self.SNoWNetFile,self.defMaxNameId-1)
args = shlex.split(snowTrainCommand)
p = subprocess.Popen(args,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py Tue Aug 20 14:36:22 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py Tue Aug 20 14:36:22 2013 +0200
@@ -95,6 +95,7 @@
For each accessible, predicts the probability of it being useful given the features.
Returns a ranking of the accessibles.
"""
+ tau = 0.01 # Jasmin, change value here
predictions = []
for a in accessibles:
posA = self.counts[a][0]
@@ -112,6 +113,11 @@
resultA += w*log(float(self.posWeight*fWeightsA[f])/posA)
else:
resultA += w*self.defVal
+ if not tau == 0.0:
+ observedFeatures = [f for f,_w in features]
+ missingFeatures = list(fA.difference(observedFeatures))
+ sumOfWeights = sum([log(float(fWeightsA[x])/posA) for x in missingFeatures])
+ resultA -= tau * sumOfWeights
predictions.append(resultA)
predictions = array(predictions)
perm = (-predictions).argsort()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/spawnDaemon.py Tue Aug 20 14:36:22 2013 +0200
@@ -0,0 +1,35 @@
+# http://stackoverflow.com/questions/972362/spawning-process-from-python/972383#972383
+import os
+
+def spawnDaemon(path_to_executable, *args):
+ """Spawn a completely detached subprocess (i.e., a daemon).
+
+ E.g. for mark:
+ spawnDaemon("../bin/producenotify.py", "producenotify.py", "xx")
+ """
+ # fork the first time (to make a non-session-leader child process)
+ try:
+ pid = os.fork()
+ except OSError, e:
+ raise RuntimeError("1st fork failed: %s [%d]" % (e.strerror, e.errno))
+ if pid != 0:
+ # parent (calling) process is all done
+ return
+
+ # detach from controlling terminal (to make child a session-leader)
+ os.setsid()
+ try:
+ pid = os.fork()
+ except OSError, e:
+ raise RuntimeError("2nd fork failed: %s [%d]" % (e.strerror, e.errno))
+ raise Exception, "%s [%d]" % (e.strerror, e.errno)
+ if pid != 0:
+ # child process is all done
+ os._exit(0)
+
+ # and finally let's execute the executable for the daemon!
+ try:
+ os.execv(path_to_executable, [path_to_executable])
+ except Exception, e:
+ # oops, we're cut off from the world, let's just give up
+ os._exit(255)
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/stats.py Tue Aug 20 14:36:22 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/stats.py Tue Aug 20 14:36:22 2013 +0200
@@ -30,6 +30,7 @@
self.problems = 0.0
self.cutOff = cutOff
self.recallData = [0]*cutOff
+ self.recall100Median = []
self.recall100Data = [0]*cutOff
self.aucData = []
self.premiseOccurenceCounter = {}
@@ -107,6 +108,7 @@
self.aucData.append(auc)
self.avgAUC += auc
self.avgRecall100 += recall100
+ self.recall100Median.append(recall100)
self.problems += 1
self.badPreds = badPreds
self.avgAvailable += available
@@ -116,8 +118,29 @@
def printAvg(self):
self.logger.info('Average results:')
- self.logger.info('avgAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t cutOff:%s', \
- round(100*self.avgAUC/self.problems,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),self.cutOff)
+ #self.logger.info('avgAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t cutOff:%s', \
+ # round(100*self.avgAUC/self.problems,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),self.cutOff)
+ # HACK FOR PAPER
+ assert len(self.aucData) == len(self.recall100Median)
+ nrDataPoints = len(self.aucData)
+ if nrDataPoints % 2 == 1:
+ medianAUC = sorted(self.aucData)[nrDataPoints/2 + 1]
+ else:
+ medianAUC = float(sorted(self.aucData)[nrDataPoints/2] + sorted(self.aucData)[nrDataPoints/2 + 1])/2
+ #nrDataPoints = len(self.recall100Median)
+ if nrDataPoints % 2 == 1:
+ medianrecall100 = sorted(self.recall100Median)[nrDataPoints/2 + 1]
+ else:
+ medianrecall100 = float(sorted(self.recall100Median)[nrDataPoints/2] + sorted(self.recall100Median)[nrDataPoints/2 + 1])/2
+
+ returnString = 'avgAUC: %s \t medianAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t medianRecall100: %s \t cutOff:%s' %\
+ (round(100*self.avgAUC/self.problems,2),round(100*medianAUC,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),round(medianrecall100,2),self.cutOff)
+ self.logger.info(returnString)
+ return returnString
+
+ """
+ self.logger.info('avgAUC: %s \t medianAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t medianRecall100: %s \t cutOff:%s', \
+ round(100*self.avgAUC/self.problems,2),round(100*medianAUC,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),round(medianrecall100,2),self.cutOff)
#try:
#if True:
@@ -156,6 +179,7 @@
show()
#except:
# self.logger.warning('Matplotlib module missing. Skipping graphs.')
+ """
def save(self,fileName):
oStream = open(fileName, 'wb')
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/tester.py Tue Aug 20 14:36:22 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/tester.py Tue Aug 20 14:36:22 2013 +0200
@@ -16,29 +16,39 @@
#print '%s says that %s%s = %s' % (current_process().name, func.__name__, args, result)
outQueue.put(result)
-def run_mash(runId,inputDir,logFile,predictionFile,\
+def run_mash(runId,inputDir,logFile,predictionFile,predef,\
learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
- sineFeatures,sineWeight):
+ sineFeatures,sineWeight,quiet=True):
# Init
runId = str(runId)
predictionFile = predictionFile + runId
- args = ['--statistics','--init','--inputDir',inputDir,'-q','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,
+ args = ['--statistics','--init','--inputDir',inputDir,'--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,
'--theoryDefValPos',str(theoryDefValPos),'--theoryDefValNeg',str(theoryDefValNeg),'--theoryPosWeight',str(theoryPosWeight),\
'--NBDefaultPriorWeight',str(NBDefaultPriorWeight),'--NBDefVal',str(NBDefVal),'--NBPosWeight',str(NBPosWeight)]
if learnTheories:
- args = args + ['--learnTheories']
+ args += ['--learnTheories']
if sineFeatures:
args += ['--sineFeatures','--sineWeight',str(sineWeight)]
+ if not predef == '':
+ args += ['--predef',predef]
+ if quit:
+ args += ['-q']
+ #print args
mash(args)
# Run
- args = ['-q','-i',inputFile,'-p',predictionFile,'--statistics','--cutOff','500','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,\
+ args = ['-i',inputFile,'-p',predictionFile,'--statistics','--cutOff','1024','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,\
'--theoryDefValPos',str(theoryDefValPos),'--theoryDefValNeg',str(theoryDefValNeg),'--theoryPosWeight',str(theoryPosWeight),\
'--NBDefaultPriorWeight',str(NBDefaultPriorWeight),'--NBDefVal',str(NBDefVal),'--NBPosWeight',str(NBPosWeight)]
if learnTheories:
- args = args + ['--learnTheories']
+ args += ['--learnTheories']
if sineFeatures:
args += ['--sineFeatures','--sineWeight',str(sineWeight)]
+ if not predef == '':
+ args += ['--predef',predef]
+ if quit:
+ args += ['-q']
+ #print args
mash(args)
# Get Results
@@ -46,29 +56,42 @@
lines = IS.readlines()
tmpRes = lines[-1].split()
avgAuc = tmpRes[5]
- avgRecall100 = tmpRes[9]
+ medianAuc = tmpRes[7]
+ avgRecall100 = tmpRes[11]
+ medianRecall100 = tmpRes[13]
tmpTheoryRes = lines[-3].split()
- avgTheoryPrecision = tmpTheoryRes[5]
- avgTheoryRecall100 = tmpTheoryRes[7]
- avgTheoryRecall = tmpTheoryRes[9]
- avgTheoryPredictedPercent = tmpTheoryRes[11]
+ if learnTheories:
+ avgTheoryPrecision = tmpTheoryRes[5]
+ avgTheoryRecall100 = tmpTheoryRes[7]
+ avgTheoryRecall = tmpTheoryRes[9]
+ avgTheoryPredictedPercent = tmpTheoryRes[11]
+ else:
+ avgTheoryPrecision = 'NA'
+ avgTheoryRecall100 = 'NA'
+ avgTheoryRecall = 'NA'
+ avgTheoryPredictedPercent = 'NA'
IS.close()
# Delete old models
os.remove(logFile)
os.remove(predictionFile)
- os.remove('../tmp/t'+runId)
+ if learnTheories:
+ os.remove('../tmp/t'+runId)
os.remove('../tmp/m'+runId)
os.remove('../tmp/d'+runId)
outFile = open('tester','a')
#print 'avgAuc %s avgRecall100 %s avgTheoryPrecision %s avgTheoryRecall100 %s avgTheoryRecall %s avgTheoryPredictedPercent %s'
- outFile.write('\t'.join([str(learnTheories),str(theoryDefValPos),str(theoryDefValNeg),str(theoryPosWeight),str(NBDefaultPriorWeight),str(NBDefVal),str(NBPosWeight),str(sineFeatures),str(sineWeight),str(avgAuc),str(avgRecall100),str(avgTheoryPrecision),str(avgTheoryRecall100),str(avgTheoryRecall),str(avgTheoryPredictedPercent)])+'\n')
+ outFile.write('\t'.join([str(learnTheories),str(theoryDefValPos),str(theoryDefValNeg),str(theoryPosWeight),\
+ str(NBDefaultPriorWeight),str(NBDefVal),str(NBPosWeight),str(sineFeatures),str(sineWeight),\
+ str(avgAuc),str(medianAuc),str(avgRecall100),str(medianRecall100),str(avgTheoryPrecision),\
+ str(avgTheoryRecall100),str(avgTheoryRecall),str(avgTheoryPredictedPercent)])+'\n')
outFile.close()
print learnTheories,'\t',theoryDefValPos,'\t',theoryDefValNeg,'\t',theoryPosWeight,'\t',\
NBDefaultPriorWeight,'\t',NBDefVal,'\t',NBPosWeight,'\t',\
sineFeatures,'\t',sineWeight,'\t',\
- avgAuc,'\t',avgRecall100,'\t',avgTheoryPrecision,'\t',avgTheoryRecall100,'\t',avgTheoryRecall,'\t',avgTheoryPredictedPercent
+ avgAuc,'\t',medianAuc,'\t',avgRecall100,'\t',medianRecall100,'\t',\
+ avgTheoryPrecision,'\t',avgTheoryRecall100,'\t',avgTheoryRecall,'\t',avgTheoryPredictedPercent
return learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
sineFeatures,sineWeight,\
@@ -93,8 +116,9 @@
#cores = 1
# Options
depFile = 'mash_dependencies'
+ predef = ''
outputDir = '../tmp/'
- numberOfPredictions = 500
+ numberOfPredictions = 1024
learnTheoriesRange = [True,False]
theoryDefValPosRange = [-x for x in range(1,20)]
@@ -107,6 +131,7 @@
sineFeaturesRange = [True,False]
sineWeightRange = [0.1,0.25,0.5,0.75,1.0]
+ """
# Test 1
inputFile = '../data/20121227b/Auth/mash_commands'
inputDir = '../data/20121227b/Auth/'
@@ -179,4 +204,78 @@
NBDefaultPriorWeight,NBDefVal,NBPosWeight,sineFeatures,sineWeight,\
learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight)
print 'bestAvgRecall100 %s bestNBDefaultPriorWeight %s bestNBDefVal %s bestNBPosWeight %s bestSineFeatures %s bestSineWeight %s',bestAvgRecall100,bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight
-
\ No newline at end of file
+
+ """
+ # Test 2
+ #inputDir = '../data/20130118/Jinja'
+ inputDir = '../data/notheory/Prob'
+ inputFile = inputDir+'/mash_commands'
+ #inputFile = inputDir+'/mash_prover_commands'
+
+ #depFile = 'mash_prover_dependencies'
+ depFile = 'mash_dependencies'
+ outputDir = '../tmp/'
+ numberOfPredictions = 1024
+ predictionFile = '../tmp/auth.pred'
+ logFile = '../tmp/auth.log'
+ learnTheories = False
+ theoryDefValPos = -7.5
+ theoryDefValNeg = -10.0
+ theoryPosWeight = 2.0
+ NBDefaultPriorWeight = 20.0
+ NBDefVal =- 15.0
+ NBPosWeight = 10.0
+ sineFeatures = False
+ sineWeight = 0.5
+ quiet = False
+ print inputDir
+
+ #predef = inputDir+'/mash_prover_suggestions'
+ predef = inputDir+'/mash_suggestions'
+ print 'Mash Isar'
+ run_mash(0,inputDir,logFile,predictionFile,predef,\
+ learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+ NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+ sineFeatures,sineWeight,quiet=quiet)
+
+ #"""
+ predef = inputDir+'/mesh_suggestions'
+ #predef = inputDir+'/mesh_prover_suggestions'
+ print 'Mesh Isar'
+ run_mash(0,inputDir,logFile,predictionFile,predef,\
+ learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+ NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+ sineFeatures,sineWeight,quiet=quiet)
+ #"""
+ predef = inputDir+'/mepo_suggestions'
+ print 'Mepo Isar'
+ run_mash(0,inputDir,logFile,predictionFile,predef,\
+ learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+ NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+ sineFeatures,sineWeight,quiet=quiet)
+
+ """
+ inputFile = inputDir+'/mash_prover_commands'
+ depFile = 'mash_prover_dependencies'
+
+ predef = inputDir+'/mash_prover_suggestions'
+ print 'Mash Prover Isar'
+ run_mash(0,inputDir,logFile,predictionFile,predef,\
+ learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+ NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+ sineFeatures,sineWeight,quiet=quiet)
+
+ predef = inputDir+'/mesh_prover_suggestions'
+ print 'Mesh Prover Isar'
+ run_mash(0,inputDir,logFile,predictionFile,predef,\
+ learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+ NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+ sineFeatures,sineWeight,quiet=quiet)
+
+ predef = inputDir+'/mepo_suggestions'
+ print 'Mepo Prover Isar'
+ run_mash(0,inputDir,logFile,predictionFile,predef,\
+ learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+ NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+ sineFeatures,sineWeight,quiet=quiet)
+ #"""
\ No newline at end of file