# HG changeset patch
# User blanchet
# Date 1377002182 -7200
# Node ID 1133b9e83f09eb5b73a669750d176b6f589a2915
# Parent  5c7780d21d248413a1df368eaf58b38d08a65f0d
new version of MaSh tool -- experimental server

diff -r 5c7780d21d24 -r 1133b9e83f09 src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py	Tue Aug 20 14:36:22 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py	Tue Aug 20 14:36:22 2013 +0200
@@ -179,6 +179,13 @@
         assert line.startswith('? ')
         line = line[2:]
         name = None
+        numberOfPredictions = None
+
+        # Check whether there is a problem name:
+        tmp = line.split('#')
+        if len(tmp) == 2:
+            numberOfPredictions = int(tmp[0].strip())
+            line = tmp[1]
 
         # Check whether there is a problem name:
         tmp = line.split(':')
@@ -206,7 +213,7 @@
         else:
             hints = []
 
-        return name,features,accessibles,hints
+        return name,features,accessibles,hints,numberOfPredictions
 
     def save(self,fileName):
         if self.changed:
diff -r 5c7780d21d24 -r 1133b9e83f09 src/HOL/Tools/Sledgehammer/MaSh/src/fullNaiveBayes.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/fullNaiveBayes.py	Tue Aug 20 14:36:22 2013 +0200
@@ -0,0 +1,203 @@
+'''
+Created on Jul 11, 2012
+
+@author: Daniel Kuehlwein
+'''
+
+from cPickle import dump,load
+from numpy import array,exp
+from math import log
+
+class NBClassifier(object):
+    '''
+    An updateable naive Bayes classifier.
+    '''
+
+    def __init__(self):
+        '''
+        Constructor
+        '''
+        self.counts = {}
+        self.negCounts = {}
+    
+    def initializeModel(self,trainData,dicts):
+        """
+        Build basic model from training data.
+        """        
+        for d in trainData:
+            self.counts[d] = [0,{}]
+            self.negCounts[d] = [0,{}]
+            dAccUnExp = dicts.accessibleDict[d]
+            if dicts.expandedAccessibles.has_key(d):
+                dAcc = dicts.expandedAccessibles(d)
+            else:
+                if len(dicts.expandedAccessibles.keys()) >= 100:
+                    dicts.expandedAccessibles = {}
+                dAcc = dicts.expand_accessibles(dAccUnExp)
+                dicts.expandedAccessibles[d] = dAcc 
+            dDeps = set(dicts.dependenciesDict[d])
+            dFeatures = dicts.featureDict[d]
+            # d proves d
+            self.counts[d][0] += 1
+            for f in dFeatures:
+                if self.counts[d][1].has_key(f):
+                    self.counts[d][1][f] += 1
+                else:
+                    self.counts[d][1][f] = 1
+            for acc in dAcc:
+                if not self.counts.has_key(acc):
+                    self.counts[acc] = [0,{}]
+                if not self.negCounts.has_key(acc):
+                    self.negCounts[acc] = [0,{}]        
+                if acc in dDeps:
+                    self.counts[acc][0] += 1
+                    for f in dFeatures:
+                        if self.counts[acc][1].has_key(f):
+                            self.counts[acc][1][f] += 1
+                        else:
+                            self.counts[acc][1][f] = 1
+                else:
+                    self.negCounts[acc][0] += 1
+                    for f in dFeatures:
+                        if self.negCounts[acc][1].has_key(f):
+                            self.negCounts[acc][1][f] += 1
+                        else:
+                            self.negCounts[acc][1][f] = 1
+    
+    def update(self,dataPoint,features,dependencies,dicts):
+        """
+        Updates the Model.
+        """
+        if not self.counts.has_key(dataPoint):
+            self.counts[dataPoint] = [0,{}]
+        if not self.negCounts.has_key(dataPoint):
+            self.negCounts[dataPoint] = [0,{}]
+        if dicts.expandedAccessibles.has_key(dataPoint):
+            dAcc = dicts.expandedAccessibles(dataPoint)
+        else:
+            if len(dicts.expandedAccessibles.keys()) >= 100:
+                dicts.expandedAccessibles = {}
+            dAccUnExp = dicts.accessibleDict[dataPoint]
+            dAcc = dicts.expand_accessibles(dAccUnExp)
+            dicts.expandedAccessibles[dataPoint] = dAcc 
+        dDeps = set(dicts.dependenciesDict[dataPoint])
+        dFeatures = dicts.featureDict[dataPoint]
+        # d proves d
+        self.counts[dataPoint][0] += 1
+        for f in dFeatures:
+            if self.counts[dataPoint][1].has_key(f):
+                self.counts[dataPoint][1][f] += 1
+            else:
+                self.counts[dataPoint][1][f] = 1
+
+        for acc in dAcc:
+            if acc in dDeps:
+                self.counts[acc][0] += 1
+                for f in dFeatures:
+                    if self.counts[acc][1].has_key(f):
+                        self.counts[acc][1][f] += 1
+                    else:
+                        self.counts[acc][1][f] = 1
+            else:
+                self.negCounts[acc][0] += 1
+                for f in dFeatures:
+                    if self.negCounts[acc][1].has_key(f):
+                        self.negCounts[acc][1][f] += 1
+                    else:
+                        self.negCounts[acc][1][f] = 1
+
+    def delete(self,dataPoint,features,dependencies):
+        """
+        Deletes a single datapoint from the model.
+        """
+        for dep in dependencies:
+            self.counts[dep][0] -= 1
+            for f in features:
+                self.counts[dep][1][f] -= 1
+
+            
+    def overwrite(self,problemId,newDependencies,dicts):
+        """
+        Deletes the old dependencies of problemId and replaces them with the new ones. Updates the model accordingly.
+        """
+        assert self.counts.has_key(problemId)
+        oldDeps = dicts.dependenciesDict[problemId]
+        features = dicts.featureDict[problemId]
+        self.delete(problemId,features,oldDeps)
+        self.update(problemId,features,newDependencies)
+    
+    def predict(self,features,accessibles):
+        """
+        For each accessible, predicts the probability of it being useful given the features.
+        Returns a ranking of the accessibles.
+        """
+        predictions = []
+        for a in accessibles:            
+            posA = self.counts[a][0]
+            negA = self.negCounts[a][0]
+            fPosA = set(self.counts[a][1].keys())
+            fNegA = set(self.negCounts[a][1].keys())
+            fPosWeightsA = self.counts[a][1]
+            fNegWeightsA = self.negCounts[a][1]
+            if negA == 0:
+                resultA = 0 
+            elif posA == 0:
+                print a
+                print 'xx'
+                import sys
+                sys.exit(-1)
+            else:
+                resultA = log(posA) - log(negA) 
+                for f in features:
+                    if f in fPosA:
+                        # P(f | a)
+                        if fPosWeightsA[f] == 0:
+                            resultA -= 15
+                        else:
+                            assert fPosWeightsA[f] <= posA
+                            resultA += log(float(fPosWeightsA[f])/posA)
+                    else:
+                        resultA -= 15
+                    # P(f | not a)
+                    if f in fNegA:
+                        if fNegWeightsA[f] == 0:
+                            resultA += 15
+                        else:
+                            assert fNegWeightsA[f] <= negA
+                            resultA -= log(float(fNegWeightsA[f])/negA)
+                    else: 
+                        resultA += 15
+
+            predictions.append(resultA)
+        #expPredictions = array([exp(x) for x in predictions])
+        predictions = array(predictions)
+        perm = (-predictions).argsort()        
+        #return array(accessibles)[perm],expPredictions[perm] 
+        return array(accessibles)[perm],predictions[perm]
+    
+    def save(self,fileName):
+        OStream = open(fileName, 'wb')
+        dump((self.counts,self.negCounts),OStream)        
+        OStream.close()
+        
+    def load(self,fileName):
+        OStream = open(fileName, 'rb')
+        self.counts,self.negCounts = load(OStream)      
+        OStream.close()
+
+    
+if __name__ == '__main__':
+    featureDict = {0:[0,1,2],1:[3,2,1]}
+    dependenciesDict = {0:[0],1:[0,1]}
+    libDicts = (featureDict,dependenciesDict,{})
+    c = NBClassifier()
+    c.initializeModel([0,1],libDicts)
+    c.update(2,[14,1,3],[0,2])
+    print c.counts
+    print c.predict([0,14],[0,1,2])
+    c.storeModel('x')
+    d = NBClassifier()
+    d.loadModel('x')
+    print c.counts
+    print d.counts
+    print 'Done'
\ No newline at end of file
diff -r 5c7780d21d24 -r 1133b9e83f09 src/HOL/Tools/Sledgehammer/MaSh/src/mash.py
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/mash.py	Tue Aug 20 14:36:22 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/mash.py	Tue Aug 20 14:36:22 2013 +0200
@@ -1,7 +1,7 @@
-#!/usr/bin/python
-#     Title:      HOL/Tools/Sledgehammer/MaSh/src/mash.py
+#!/usr/bin/env python
+#     Title:      HOL/Tools/Sledgehammer/MaSh/src/mash
 #     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
-#     Copyright   2012
+#     Copyright   2012 - 2013
 #
 # Entry point for MaSh (Machine Learning for Sledgehammer).
 
@@ -15,69 +15,38 @@
 @author: Daniel Kuehlwein
 '''
 
-import logging,datetime,string,os,sys
-from argparse import ArgumentParser,RawDescriptionHelpFormatter
-from time import time
-from stats import Statistics
+import socket,sys,time,logging
+
+from spawnDaemon import spawnDaemon
+
+
+import logging,string,os,sys
+
+
 from theoryStats import TheoryStatistics
 from theoryModels import TheoryModels
 from dictionaries import Dictionaries
-#from fullNaiveBayes import NBClassifier
-from sparseNaiveBayes import sparseNBClassifier
-from snow import SNoW
 from predefined import Predefined
 
-# Set up command-line parser
-parser = ArgumentParser(description='MaSh - Machine Learning for Sledgehammer.  \n\n\
-MaSh allows to use different machine learning algorithms to predict relevant facts for Sledgehammer.\n\n\
---------------- Example Usage ---------------\n\
-First initialize:\n./mash.py -l test.log -o ../tmp/ --init --inputDir ../data/Jinja/ \n\
-Then create predictions:\n./mash.py -i ../data/Jinja/mash_commands -p ../data/Jinja/mash_suggestions -l test.log -o ../tmp/ --statistics\n\
-\n\n\
-Author: Daniel Kuehlwein, July 2012',formatter_class=RawDescriptionHelpFormatter)
-parser.add_argument('-i','--inputFile',help='File containing all problems to be solved.')
-parser.add_argument('-o','--outputDir', default='../tmp/',help='Directory where all created files are stored. Default=../tmp/.')
-parser.add_argument('-p','--predictions',default='../tmp/%s.predictions' % datetime.datetime.now(),
-                    help='File where the predictions stored. Default=../tmp/dateTime.predictions.')
-parser.add_argument('--numberOfPredictions',default=200,help="Number of premises to write in the output. Default=200.",type=int)
-
-parser.add_argument('--init',default=False,action='store_true',help="Initialize Mash. Requires --inputDir to be defined. Default=False.")
-parser.add_argument('--inputDir',default='../data/20121212/Jinja/',\
-                    help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility')
-parser.add_argument('--depFile', default='mash_dependencies',
-                    help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies')
-parser.add_argument('--saveModel',default=False,action='store_true',help="Stores the learned Model at the end of a prediction run. Default=False.")
+from parameters import init_parser
 
-parser.add_argument('--learnTheories',default=False,action='store_true',help="Uses a two-lvl prediction mode. First the theories, then the premises. Default=False.")
-# Theory Parameters
-parser.add_argument('--theoryDefValPos',default=-7.5,help="Default value for positive unknown features. Default=-7.5.",type=float)
-parser.add_argument('--theoryDefValNeg',default=-10.0,help="Default value for negative unknown features. Default=-15.0.",type=float)
-parser.add_argument('--theoryPosWeight',default=2.0,help="Weight value for positive features. Default=2.0.",type=float)
-
-parser.add_argument('--nb',default=False,action='store_true',help="Use Naive Bayes for learning. This is the default learning method.")
-# NB Parameters
-parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float)
-parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float)
-parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float)
-# TODO: Rename to sineFeatures
-parser.add_argument('--sineFeatures',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.")
-parser.add_argument('--sineWeight',default=0.5,help="How much the SInE prior is weighted. Default=0.5.",type=float)
-
-parser.add_argument('--snow',default=False,action='store_true',help="Use SNoW's naive bayes instead of Naive Bayes for learning.")
-parser.add_argument('--predef',help="Use predefined predictions. Used only for comparison with the actual learning. Argument is the filename of the predictions.")
-parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\
-                    WARNING: This will make the program a lot slower! Default=False.")
-parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.")
-parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int)
-parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log')
-parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.")
-parser.add_argument('--modelFile', default='../tmp/model.pickle', help='Model file name. Default=../tmp/model.pickle')
-parser.add_argument('--dictsFile', default='../tmp/dict.pickle', help='Dict file name. Default=../tmp/dict.pickle')
-parser.add_argument('--theoryFile', default='../tmp/theory.pickle', help='Model file name. Default=../tmp/theory.pickle')
+def communicate(data,host,port):
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    try:
+        sock.connect((host,port))
+        sock.sendall(data)
+        received = sock.recv(262144)
+    except:
+        logger = logging.getLogger('communicate')
+        logger.warning('Communication with server failed.')
+        received = -1
+    finally:
+        sock.close()    
+    return received
 
 def mash(argv = sys.argv[1:]):
     # Initializing command-line arguments
-    args = parser.parse_args(argv)
+    args = init_parser(argv)
     
     # Set up logging
     logging.basicConfig(level=logging.DEBUG,
@@ -85,18 +54,8 @@
                         datefmt='%d-%m %H:%M:%S',
                         filename=args.log,
                         filemode='w')    
-    logger = logging.getLogger('main.py')
-    
-    #"""
-    # remove old handler for tester
-    #logger.root.handlers[0].stream.close()
-    logger.root.removeHandler(logger.root.handlers[0])
-    file_handler = logging.FileHandler(args.log)
-    file_handler.setLevel(logging.DEBUG)
-    formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
-    file_handler.setFormatter(formatter)
-    logger.root.addHandler(file_handler)
-    #"""
+    logger = logging.getLogger('mash')
+
     if args.quiet:
         logger.setLevel(logging.WARNING)
         #console.setLevel(logging.WARNING)
@@ -110,257 +69,44 @@
     if not os.path.exists(args.outputDir):
         os.makedirs(args.outputDir)
 
-    logger.info('Using the following settings: %s',args)
-    # Pick algorithm
-    if args.nb:
-        logger.info('Using sparse Naive Bayes for learning.')
-        model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal)
-    elif args.snow:
-        logger.info('Using naive bayes (SNoW) for learning.')
-        model = SNoW()
-    elif args.predef:
-        logger.info('Using predefined predictions.')
-        model = Predefined(args.predef)
-    else:
-        logger.info('No algorithm specified. Using sparse Naive Bayes.')
-        model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal)
-
-    # Initializing model
-    if args.init:
-        logger.info('Initializing Model.')
-        startTime = time()
-
-        # Load all data
-        dicts = Dictionaries()
-        dicts.init_all(args)
-        
-        # Create Model
-        trainData = dicts.featureDict.keys()
-        model.initializeModel(trainData,dicts)
-
-        if args.learnTheories:
-            depFile = os.path.join(args.inputDir,args.depFile)
-            theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight)
-            theoryModels.init(depFile,dicts)
-            theoryModels.save(args.theoryFile)
-            
-        model.save(args.modelFile)
-        dicts.save(args.dictsFile)
-
-        logger.info('All Done. %s seconds needed.',round(time()-startTime,2))
-        return 0
-    # Create predictions and/or update model
-    else:
-        lineCounter = 1
-        statementCounter = 1
-        computeStats = False
-        dicts = Dictionaries()
-        theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight)
-        # Load Files
-        if os.path.isfile(args.dictsFile):
-            #logger.info('Loading Dictionaries')
-            #startTime = time()
-            dicts.load(args.dictsFile)            
-            #logger.info('Done %s',time()-startTime)
-        if os.path.isfile(args.modelFile):
-            #logger.info('Loading Model')
-            #startTime = time()
-            model.load(args.modelFile)            
-            #logger.info('Done %s',time()-startTime)
-        if os.path.isfile(args.theoryFile) and args.learnTheories:
-            #logger.info('Loading Theory Models')
-            #startTime = time()
-            theoryModels.load(args.theoryFile)
-            #logger.info('Done %s',time()-startTime)
-        logger.info('All loading completed')
-
-        # IO Streams
-        OS = open(args.predictions,'w')
-        IS = open(args.inputFile,'r')
-
-        # Statistics
-        if args.statistics:
-            stats = Statistics(args.cutOff)
-            if args.learnTheories:
-                theoryStats = TheoryStatistics()
-
-        predictions = None
-        predictedTheories = None
-        #Reading Input File
-        for line in IS:
-#           try:
-            if True:
-                if line.startswith('!'):
-                    problemId = dicts.parse_fact(line)    
-                    # Statistics
-                    if args.statistics and computeStats:
-                        computeStats = False
-                        # Assume '!' comes after '?'
-                        if args.predef:
-                            predictions = model.predict(problemId)
-                        if args.learnTheories:
-                            tmp = [dicts.idNameDict[x] for x in dicts.dependenciesDict[problemId]]
-                            usedTheories = set([x.split('.')[0] for x in tmp]) 
-                            theoryStats.update((dicts.idNameDict[problemId]).split('.')[0],predictedTheories,usedTheories,len(theoryModels.accessibleTheories))                        
-                        stats.update(predictions,dicts.dependenciesDict[problemId],statementCounter)
-                        if not stats.badPreds == []:
-                            bp = string.join([str(dicts.idNameDict[x]) for x in stats.badPreds], ',')
-                            logger.debug('Bad predictions: %s',bp)
+    # If server is not running, start it.
+    try:
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.connect((args.host,args.port))       
+        sock.close()
+    except:
+        logger.info('Starting Server.')
+        spawnDaemon('server.py')
+        # TODO: Make this fault tolerant
+        time.sleep(0.5)
+        # Init server
+        data = "i "+";".join(argv)
+        received = communicate(data,args.host,args.port)
+        logger.info(received)     
+    
+    if args.inputFile == None:
+        return
+    logger.debug('Using the following settings: %s',args)
+    # IO Streams
+    OS = open(args.predictions,'w')
+    IS = open(args.inputFile,'r')
+    count = 0
+    for line in IS:
+        count += 1
+        #if count == 127:
+        #    break as       
+        received = communicate(line,args.host,args.port)
+        if not received == '':
+            OS.write('%s\n' % received)
+        #logger.info(received)
+    OS.close()
+    IS.close()
 
-                    statementCounter += 1
-                    # Update Dependencies, p proves p
-                    dicts.dependenciesDict[problemId] = [problemId]+dicts.dependenciesDict[problemId]
-                    if args.learnTheories:
-                        theoryModels.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts)
-                    if args.snow:
-                        model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts)
-                    else:
-                        model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId])
-                elif line.startswith('p'):
-                    # Overwrite old proof.
-                    problemId,newDependencies = dicts.parse_overwrite(line)
-                    newDependencies = [problemId]+newDependencies
-                    model.overwrite(problemId,newDependencies,dicts)
-                    if args.learnTheories:
-                        theoryModels.overwrite(problemId,newDependencies,dicts)
-                    dicts.dependenciesDict[problemId] = newDependencies
-                elif line.startswith('?'):               
-                    startTime = time()
-                    computeStats = True
-                    if args.predef:
-                        continue
-                    name,features,accessibles,hints = dicts.parse_problem(line)  
-                        
-                    # Create predictions
-                    logger.info('Starting computation for problem on line %s',lineCounter)
-                    # Update Models with hints
-                    if not hints == []:
-                        if args.learnTheories:
-                            accessibleTheories = set([(dicts.idNameDict[x]).split('.')[0] for x in accessibles])
-                            theoryModels.update_with_acc('hints',features,hints,dicts,accessibleTheories)
-                        if args.snow:
-                            pass
-                        else:
-                            model.update('hints',features,hints)
-
-                    # Predict premises
-                    if args.learnTheories:
-                        predictedTheories,accessibles = theoryModels.predict(features,accessibles,dicts)
-
-                    # Add additional features on premise lvl if sine is enabled
-                    if args.sineFeatures:
-                        origFeatures = [f for f,_w in features]
-                        secondaryFeatures = []
-                        for f in origFeatures:
-                            if dicts.featureCountDict[f] == 1:
-                                continue
-                            triggeredFormulas = dicts.featureTriggeredFormulasDict[f]                                
-                            for formula in triggeredFormulas: 
-                                tFeatures = dicts.triggerFeaturesDict[formula]                                
-                                #tFeatures = [ff for ff,_fw in dicts.featureDict[formula]]
-                                newFeatures = set(tFeatures).difference(secondaryFeatures+origFeatures)
-                            for fNew in newFeatures:
-                                secondaryFeatures.append((fNew,args.sineWeight))
-                        predictionsFeatures = features+secondaryFeatures
-                    else:
-                        predictionsFeatures = features                    
-                    predictions,predictionValues = model.predict(predictionsFeatures,accessibles,dicts)
-                    assert len(predictions) == len(predictionValues)
-                    
-                    # Delete hints
-                    if not hints == []:
-                        if args.learnTheories:
-                            theoryModels.delete('hints',features,hints,dicts)
-                        if args.snow:
-                            pass
-                        else:
-                            model.delete('hints',features,hints)
+    # Statistics
+    if args.statistics:
+        received = communicate('avgStats',args.host,args.port)
+        logger.info(received)
 
-                    logger.info('Done. %s seconds needed.',round(time()-startTime,2))
-                    # Output        
-                    predictionNames = [str(dicts.idNameDict[p]) for p in predictions[:args.numberOfPredictions]]
-                    predictionValues = [str(x) for x in predictionValues[:args.numberOfPredictions]]
-                    predictionsStringList = ['%s=%s' % (predictionNames[i],predictionValues[i]) for i in range(len(predictionNames))]
-                    predictionsString = string.join(predictionsStringList,' ')
-                    outString = '%s: %s' % (name,predictionsString)
-                    OS.write('%s\n' % outString)
-                else:
-                    logger.warning('Unspecified input format: \n%s',line)
-                    sys.exit(-1)
-                lineCounter += 1
-            """
-            except:
-                logger.warning('An error occurred on line %s .',line)
-                lineCounter += 1
-                continue
-            """
-        OS.close()
-        IS.close()
 
-        # Statistics
-        if args.statistics:
-            if args.learnTheories:
-                theoryStats.printAvg()
-            stats.printAvg()
-
-        # Save
-        if args.saveModel:
-            model.save(args.modelFile)
-            if args.learnTheories:
-                theoryModels.save(args.theoryFile)
-        dicts.save(args.dictsFile)
-        if not args.saveStats == None:
-            if args.learnTheories:
-                theoryStatsFile = os.path.join(args.outputDir,'theoryStats')
-                theoryStats.save(theoryStatsFile)
-            statsFile = os.path.join(args.outputDir,args.saveStats)
-            stats.save(statsFile)
-    return 0
-
-if __name__ == '__main__':
-    # Example:
-    #List
-    # ISAR Theories
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130110/List/','--learnTheories']    
-    #args = ['-i', '../data/20130110/List/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories']
-    # ISAR predef mesh 
-    #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130110/List/','--predef','../data/20130110/List/mesh_suggestions']
-    #args = ['-i', '../data/20130110/List/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20130110/List/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats']
- 
-    
-    # Auth
-    # ISAR Theories
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Auth/','--learnTheories','--sineFeatures']    
-    #args = ['-i', '../data/20121227b/Auth/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories']
-    # ISAR predef mesh 
-    #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Auth/','--predef','../data/20121227b/Auth/mesh_suggestions']
-    #args = ['-i', '../data/20121227b/Auth/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20121227b/Auth/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats']
-
-    
-    # Jinja
-    # ISAR Theories
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--learnTheories']    
-    #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--cutOff','500','--learnTheories']
-    # ISAR Theories SinePrior
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--learnTheories','--sineFeatures']    
-    #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories','--sineFeatures']    
-    # ISAR NB
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/']    
-    #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500']
-    # ISAR predef mesh
-    #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--predef','../data/20130111/Jinja/mesh_suggestions']
-    #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20130111/Jinja/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats']
-    # ISAR NB ATP
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121212/Jinja/','--depFile','mash_atp_dependencies']    
-    #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies']
-    #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--predef','--depFile','mash_atp_dependencies']
-    #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats','--depFile','mash_atp_dependencies']
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--depFile','mash_atp_dependencies','--snow']    
-    #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--snow','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies']
-    # ISAR Snow
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121212/Jinja/','--snow']    
-    #args = ['-i', '../data/20121212/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--snow','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500']
- 
-
-    #sys.exit(mash(args))
+if __name__ == "__main__":
     sys.exit(mash())
diff -r 5c7780d21d24 -r 1133b9e83f09 src/HOL/Tools/Sledgehammer/MaSh/src/mashOld.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/mashOld.py	Tue Aug 20 14:36:22 2013 +0200
@@ -0,0 +1,329 @@
+#!/usr/bin/python
+#     Title:      HOL/Tools/Sledgehammer/MaSh/src/mash.py
+#     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
+#     Copyright   2012
+#
+# Entry point for MaSh (Machine Learning for Sledgehammer).
+
+'''
+MaSh - Machine Learning for Sledgehammer
+
+MaSh allows to use different machine learning algorithms to predict relevant fact for Sledgehammer.
+
+Created on July 12, 2012
+
+@author: Daniel Kuehlwein
+'''
+
+import logging,datetime,string,os,sys
+from argparse import ArgumentParser,RawDescriptionHelpFormatter
+from time import time
+from stats import Statistics
+from theoryStats import TheoryStatistics
+from theoryModels import TheoryModels
+from dictionaries import Dictionaries
+#from fullNaiveBayes import NBClassifier
+from sparseNaiveBayes import sparseNBClassifier
+from snow import SNoW
+from predefined import Predefined
+
+# Set up command-line parser
+parser = ArgumentParser(description='MaSh - Machine Learning for Sledgehammer.  \n\n\
+MaSh allows to use different machine learning algorithms to predict relevant facts for Sledgehammer.\n\n\
+--------------- Example Usage ---------------\n\
+First initialize:\n./mash.py -l test.log -o ../tmp/ --init --inputDir ../data/Jinja/ \n\
+Then create predictions:\n./mash.py -i ../data/Jinja/mash_commands -p ../data/Jinja/mash_suggestions -l test.log -o ../tmp/ --statistics\n\
+\n\n\
+Author: Daniel Kuehlwein, July 2012',formatter_class=RawDescriptionHelpFormatter)
+parser.add_argument('-i','--inputFile',help='File containing all problems to be solved.')
+parser.add_argument('-o','--outputDir', default='../tmp/',help='Directory where all created files are stored. Default=../tmp/.')
+parser.add_argument('-p','--predictions',default='../tmp/%s.predictions' % datetime.datetime.now(),
+                    help='File where the predictions stored. Default=../tmp/dateTime.predictions.')
+parser.add_argument('--numberOfPredictions',default=200,help="Number of premises to write in the output. Default=200.",type=int)
+
+parser.add_argument('--init',default=False,action='store_true',help="Initialize Mash. Requires --inputDir to be defined. Default=False.")
+parser.add_argument('--inputDir',default='../data/20121212/Jinja/',\
+                    help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility')
+parser.add_argument('--depFile', default='mash_dependencies',
+                    help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies')
+parser.add_argument('--saveModel',default=False,action='store_true',help="Stores the learned Model at the end of a prediction run. Default=False.")
+
+parser.add_argument('--learnTheories',default=False,action='store_true',help="Uses a two-lvl prediction mode. First the theories, then the premises. Default=False.")
+# Theory Parameters
+parser.add_argument('--theoryDefValPos',default=-7.5,help="Default value for positive unknown features. Default=-7.5.",type=float)
+parser.add_argument('--theoryDefValNeg',default=-10.0,help="Default value for negative unknown features. Default=-15.0.",type=float)
+parser.add_argument('--theoryPosWeight',default=2.0,help="Weight value for positive features. Default=2.0.",type=float)
+
+parser.add_argument('--nb',default=False,action='store_true',help="Use Naive Bayes for learning. This is the default learning method.")
+# NB Parameters
+parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float)
+parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float)
+parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float)
+# TODO: Rename to sineFeatures
+parser.add_argument('--sineFeatures',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.")
+parser.add_argument('--sineWeight',default=0.5,help="How much the SInE prior is weighted. Default=0.5.",type=float)
+
+parser.add_argument('--snow',default=False,action='store_true',help="Use SNoW's naive bayes instead of Naive Bayes for learning.")
+parser.add_argument('--predef',help="Use predefined predictions. Used only for comparison with the actual learning. Argument is the filename of the predictions.")
+parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\
+                    WARNING: This will make the program a lot slower! Default=False.")
+parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.")
+parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int)
+parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log')
+parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.")
+parser.add_argument('--modelFile', default='../tmp/model.pickle', help='Model file name. Default=../tmp/model.pickle')
+parser.add_argument('--dictsFile', default='../tmp/dict.pickle', help='Dict file name. Default=../tmp/dict.pickle')
+parser.add_argument('--theoryFile', default='../tmp/theory.pickle', help='Model file name. Default=../tmp/theory.pickle')
+
+def mash(argv = sys.argv[1:]):
+    # Initializing command-line arguments
+    args = parser.parse_args(argv)
+    
+    # Set up logging
+    logging.basicConfig(level=logging.DEBUG,
+                        format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
+                        datefmt='%d-%m %H:%M:%S',
+                        filename=args.log,
+                        filemode='w')    
+    logger = logging.getLogger('main.py')
+    
+    """
+    # remove old handler for tester
+    # TODO: Comment out for Jasmins version. This crashes python 2.6.1
+    logger.root.handlers[0].stream.close()
+    logger.root.removeHandler(logger.root.handlers[0])
+    file_handler = logging.FileHandler(args.log)
+    file_handler.setLevel(logging.DEBUG)
+    formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
+    file_handler.setFormatter(formatter)
+    logger.root.addHandler(file_handler)
+    #"""
+    if args.quiet:
+        logger.setLevel(logging.WARNING)
+        #console.setLevel(logging.WARNING)
+    else:
+        console = logging.StreamHandler(sys.stdout)
+        console.setLevel(logging.INFO)
+        formatter = logging.Formatter('# %(message)s')
+        console.setFormatter(formatter)
+        logging.getLogger('').addHandler(console)
+        
+    if not os.path.exists(args.outputDir):
+        os.makedirs(args.outputDir)
+
+    logger.info('Using the following settings: %s',args)
+    # Pick algorithm
+    if args.nb:
+        logger.info('Using sparse Naive Bayes for learning.')
+        model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal)
+    elif args.snow:
+        logger.info('Using naive bayes (SNoW) for learning.')
+        model = SNoW()
+    elif args.predef:
+        logger.info('Using predefined predictions.')
+        model = Predefined(args.predef)
+    else:
+        logger.info('No algorithm specified. Using sparse Naive Bayes.')
+        model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal)
+
+    # Initializing model
+    if args.init:
+        logger.info('Initializing Model.')
+        startTime = time()
+
+        # Load all data
+        dicts = Dictionaries()
+        dicts.init_all(args)
+        
+        # Create Model
+        trainData = dicts.featureDict.keys()
+        model.initializeModel(trainData,dicts)
+
+        if args.learnTheories:
+            depFile = os.path.join(args.inputDir,args.depFile)
+            theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight)
+            theoryModels.init(depFile,dicts)
+            theoryModels.save(args.theoryFile)
+            
+        model.save(args.modelFile)
+        dicts.save(args.dictsFile)
+
+        logger.info('All Done. %s seconds needed.',round(time()-startTime,2))
+        return 0
+    # Create predictions and/or update model
+    else:
+        lineCounter = 1
+        statementCounter = 1
+        computeStats = False
+        dicts = Dictionaries()
+        theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight)
+        # Load Files
+        if os.path.isfile(args.dictsFile):
+            #logger.info('Loading Dictionaries')
+            #startTime = time()
+            dicts.load(args.dictsFile)            
+            #logger.info('Done %s',time()-startTime)
+        if os.path.isfile(args.modelFile):
+            #logger.info('Loading Model')
+            #startTime = time()
+            model.load(args.modelFile)            
+            #logger.info('Done %s',time()-startTime)
+        if os.path.isfile(args.theoryFile) and args.learnTheories:
+            #logger.info('Loading Theory Models')
+            #startTime = time()
+            theoryModels.load(args.theoryFile)
+            #logger.info('Done %s',time()-startTime)
+        logger.info('All loading completed')
+
+        # IO Streams
+        OS = open(args.predictions,'w')
+        IS = open(args.inputFile,'r')
+
+        # Statistics
+        if args.statistics:
+            stats = Statistics(args.cutOff)
+            if args.learnTheories:
+                theoryStats = TheoryStatistics()
+
+        predictions = None
+        predictedTheories = None
+        #Reading Input File
+        for line in IS:
+#           try:
+            if True:
+                if line.startswith('!'):
+                    problemId = dicts.parse_fact(line)    
+                    # Statistics
+                    if args.statistics and computeStats:
+                        computeStats = False
+                        # Assume '!' comes after '?'
+                        if args.predef:
+                            predictions = model.predict(problemId)
+                        if args.learnTheories:
+                            tmp = [dicts.idNameDict[x] for x in dicts.dependenciesDict[problemId]]
+                            usedTheories = set([x.split('.')[0] for x in tmp]) 
+                            theoryStats.update((dicts.idNameDict[problemId]).split('.')[0],predictedTheories,usedTheories,len(theoryModels.accessibleTheories))                        
+                        stats.update(predictions,dicts.dependenciesDict[problemId],statementCounter)
+                        if not stats.badPreds == []:
+                            bp = string.join([str(dicts.idNameDict[x]) for x in stats.badPreds], ',')
+                            logger.debug('Bad predictions: %s',bp)
+
+                    statementCounter += 1
+                    # Update Dependencies, p proves p
+                    dicts.dependenciesDict[problemId] = [problemId]+dicts.dependenciesDict[problemId]
+                    if args.learnTheories:
+                        theoryModels.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts)
+                    if args.snow:
+                        model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts)
+                    else:
+                        model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId])
+                elif line.startswith('p'):
+                    # Overwrite old proof.
+                    problemId,newDependencies = dicts.parse_overwrite(line)
+                    newDependencies = [problemId]+newDependencies
+                    model.overwrite(problemId,newDependencies,dicts)
+                    if args.learnTheories:
+                        theoryModels.overwrite(problemId,newDependencies,dicts)
+                    dicts.dependenciesDict[problemId] = newDependencies
+                elif line.startswith('?'):               
+                    startTime = time()
+                    computeStats = True
+                    if args.predef:
+                        continue
+                    name,features,accessibles,hints = dicts.parse_problem(line)  
+                        
+                    # Create predictions
+                    logger.info('Starting computation for problem on line %s',lineCounter)
+                    # Update Models with hints
+                    if not hints == []:
+                        if args.learnTheories:
+                            accessibleTheories = set([(dicts.idNameDict[x]).split('.')[0] for x in accessibles])
+                            theoryModels.update_with_acc('hints',features,hints,dicts,accessibleTheories)
+                        if args.snow:
+                            pass
+                        else:
+                            model.update('hints',features,hints)
+
+                    # Predict premises
+                    if args.learnTheories:
+                        predictedTheories,accessibles = theoryModels.predict(features,accessibles,dicts)
+
+                    # Add additional features on premise lvl if sine is enabled
+                    if args.sineFeatures:
+                        origFeatures = [f for f,_w in features]
+                        secondaryFeatures = []
+                        for f in origFeatures:
+                            if dicts.featureCountDict[f] == 1:
+                                continue
+                            triggeredFormulas = dicts.featureTriggeredFormulasDict[f]                                
+                            for formula in triggeredFormulas: 
+                                tFeatures = dicts.triggerFeaturesDict[formula]                                
+                                #tFeatures = [ff for ff,_fw in dicts.featureDict[formula]]
+                                newFeatures = set(tFeatures).difference(secondaryFeatures+origFeatures)
+                            for fNew in newFeatures:
+                                secondaryFeatures.append((fNew,args.sineWeight))
+                        predictionsFeatures = features+secondaryFeatures
+                    else:
+                        predictionsFeatures = features                    
+                    predictions,predictionValues = model.predict(predictionsFeatures,accessibles,dicts)
+                    assert len(predictions) == len(predictionValues)
+                    
+                    # Delete hints
+                    if not hints == []:
+                        if args.learnTheories:
+                            theoryModels.delete('hints',features,hints,dicts)
+                        if args.snow:
+                            pass
+                        else:
+                            model.delete('hints',features,hints)
+
+                    logger.info('Done. %s seconds needed.',round(time()-startTime,2))
+                    # Output        
+                    predictionNames = [str(dicts.idNameDict[p]) for p in predictions[:args.numberOfPredictions]]
+                    predictionValues = [str(x) for x in predictionValues[:args.numberOfPredictions]]
+                    predictionsStringList = ['%s=%s' % (predictionNames[i],predictionValues[i]) for i in range(len(predictionNames))]
+                    predictionsString = string.join(predictionsStringList,' ')
+                    outString = '%s: %s' % (name,predictionsString)
+                    OS.write('%s\n' % outString)
+                else:
+                    logger.warning('Unspecified input format: \n%s',line)
+                    sys.exit(-1)
+                lineCounter += 1
+            """
+            except:
+                logger.warning('An error occurred on line %s .',line)
+                lineCounter += 1
+                continue
+            """
+        OS.close()
+        IS.close()
+
+        # Statistics
+        if args.statistics:
+            if args.learnTheories:
+                theoryStats.printAvg()
+            stats.printAvg()
+
+        # Save
+        if args.saveModel:
+            model.save(args.modelFile)
+            if args.learnTheories:
+                theoryModels.save(args.theoryFile)
+        dicts.save(args.dictsFile)
+        if not args.saveStats == None:
+            if args.learnTheories:
+                theoryStatsFile = os.path.join(args.outputDir,'theoryStats')
+                theoryStats.save(theoryStatsFile)
+            statsFile = os.path.join(args.outputDir,args.saveStats)
+            stats.save(statsFile)
+    return 0
+
+if __name__ == '__main__':
+    # Cezary Auth  
+    args = ['--statistics', '--init', '--inputDir', '../data/20130118/Jinja', '--log', '../tmp/auth.log', '--theoryFile', '../tmp/t0', '--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0','--NBDefaultPriorWeight', '20.0', '--NBDefVal', '-15.0', '--NBPosWeight', '10.0']
+    mash(args)
+    args = ['-i', '../data/20130118/Jinja/mash_commands', '-p', '../tmp/auth.pred0', '--statistics', '--cutOff', '500', '--log', '../tmp/auth.log','--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0']
+    mash(args)   
+
+    #sys.exit(mash(args))
+    sys.exit(mash())
diff -r 5c7780d21d24 -r 1133b9e83f09 src/HOL/Tools/Sledgehammer/MaSh/src/mashTest.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/mashTest.py	Tue Aug 20 14:36:22 2013 +0200
@@ -0,0 +1,12 @@
+'''
+Created on Aug 20, 2013
+
+@author: daniel
+'''
+from mash import mash
+
+if __name__ == "__main__":
+    args = ['--statistics', '--init', '--inputDir', '../data/20130118/Jinja', '--log', '../tmp/auth.log', '--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0','--NBDefaultPriorWeight', '20.0', '--NBDefVal', '-15.0', '--NBPosWeight', '10.0']
+    mash(args)
+    args = ['-i', '../data/20130118/Jinja/mash_commands', '-p', '../tmp/auth.pred0', '--statistics', '--cutOff', '500', '--log', '../tmp/auth.log','--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0']
+    mash(args) 
\ No newline at end of file
diff -r 5c7780d21d24 -r 1133b9e83f09 src/HOL/Tools/Sledgehammer/MaSh/src/parameters.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/parameters.py	Tue Aug 20 14:36:22 2013 +0200
@@ -0,0 +1,46 @@
+import datetime
+from argparse import ArgumentParser,RawDescriptionHelpFormatter
+
+def init_parser(argv):
+    # Set up command-line parser
+    parser = ArgumentParser(description='MaSh - Machine Learning for Sledgehammer.  \n\n\
+    MaSh allows to use different machine learning algorithms to predict relevant facts for Sledgehammer.\n\n\
+    --------------- Example Usage ---------------\n\
+    First initialize:\n./mash.py -l test.log -o ../tmp/ --init --inputDir ../data/Jinja/ \n\
+    Then create predictions:\n./mash.py -i ../data/Jinja/mash_commands -p ../data/Jinja/mash_suggestions -l test.log -o ../tmp/ --statistics\n\
+    \n\n\
+    Author: Daniel Kuehlwein, July 2012',formatter_class=RawDescriptionHelpFormatter)
+    parser.add_argument('-i','--inputFile',help='File containing all problems to be solved.')
+    parser.add_argument('-o','--outputDir', default='../tmp/',help='Directory where all created files are stored. Default=../tmp/.')
+    parser.add_argument('-p','--predictions',default='../tmp/%s.predictions' % datetime.datetime.now(),
+                        help='File where the predictions stored. Default=../tmp/dateTime.predictions.')
+    parser.add_argument('--numberOfPredictions',default=500,help="Default number of premises to write in the output. Default=500.",type=int)
+    
+    parser.add_argument('--init',default=False,action='store_true',help="Initialize Mash. Requires --inputDir to be defined. Default=False.")
+    parser.add_argument('--inputDir',\
+                        help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility')
+    parser.add_argument('--depFile', default='mash_dependencies',
+                        help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies')
+    
+    parser.add_argument('--algorithm',default='nb',action='store_true',help="Which learning algorithm is used. nb = Naive Bayes,predef=predefined. Default=nb.")
+    # NB Parameters
+    parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float)
+    parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float)
+    parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float)
+    # TODO: Rename to sineFeatures
+    parser.add_argument('--sineFeatures',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.")
+    parser.add_argument('--sineWeight',default=0.5,help="How much the SInE prior is weighted. Default=0.5.",type=float)
+    
+    parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\
+                        WARNING: This will make the program a lot slower! Default=False.")
+    parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.")
+    parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int)
+    parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log')
+    parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.")
+    parser.add_argument('--modelFile', default='../tmp/model.pickle', help='Model file name. Default=../tmp/model.pickle')
+    parser.add_argument('--dictsFile', default='../tmp/dict.pickle', help='Dict file name. Default=../tmp/dict.pickle')
+    
+    parser.add_argument('--port', default='9255', help='Port of the Mash server. Default=9255',type=int)
+    parser.add_argument('--host', default='localhost', help='Host of the Mash server. Default=localhost')
+    args = parser.parse_args(argv)    
+    return args
diff -r 5c7780d21d24 -r 1133b9e83f09 src/HOL/Tools/Sledgehammer/MaSh/src/predefined.py
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/predefined.py	Tue Aug 20 14:36:22 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/predefined.py	Tue Aug 20 14:36:22 2013 +0200
@@ -35,7 +35,11 @@
             name = line[0].strip()
             predId = dicts.get_name_id(name)
             line = line[1].split()
-            preds = [dicts.get_name_id(x.strip())for x in line]
+            predsTmp = []
+            for x in line:
+                x = x.split('=')
+                predsTmp.append(x[0])
+            preds = [dicts.get_name_id(x.strip())for x in predsTmp]
             self.predictions[predId] = preds
         IS.close()        
 
diff -r 5c7780d21d24 -r 1133b9e83f09 src/HOL/Tools/Sledgehammer/MaSh/src/server.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/server.py	Tue Aug 20 14:36:22 2013 +0200
@@ -0,0 +1,170 @@
+#!/usr/bin/env python
+#     Title:      HOL/Tools/Sledgehammer/MaSh/src/server.py
+#     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
+#     Copyright   2013
+#
+# The MaSh Server.
+
+import SocketServer,os,string,logging
+from multiprocessing import Manager
+from time import time
+from dictionaries import Dictionaries
+from parameters import init_parser
+from sparseNaiveBayes import sparseNBClassifier
+from stats import Statistics
+
+
+class ThreadingTCPServer(SocketServer.ThreadingTCPServer): 
+    def __init__(self, *args, **kwargs):
+        SocketServer.ThreadingTCPServer.__init__(self,*args, **kwargs)
+        self.manager = Manager()
+        self.lock = Manager().Lock()
+
+class MaShHandler(SocketServer.BaseRequestHandler):
+
+    def init(self,argv):
+        if argv == '':
+            self.server.args = init_parser([])
+        else:
+            argv = argv.split(';')
+            self.server.args = init_parser(argv)
+        # Pick model
+        if self.server.args.algorithm == 'nb':
+            self.server.model = sparseNBClassifier(self.server.args.NBDefaultPriorWeight,self.server.args.NBPosWeight,self.server.args.NBDefVal)
+        else: # Default case
+            self.server.model = sparseNBClassifier(self.server.args.NBDefaultPriorWeight,self.server.args.NBPosWeight,self.server.args.NBDefVal)
+        # Load all data
+        # TODO: rewrite dicts for concurrency and without sine
+        self.server.dicts = Dictionaries()
+        if os.path.isfile(self.server.args.dictsFile):
+            self.server.dicts.load(self.server.args.dictsFile)            
+        elif self.server.args.init:
+            self.server.dicts.init_all(self.server.args)
+        # Create Model
+        if os.path.isfile(self.server.args.modelFile):
+            self.server.model.load(self.server.args.modelFile)          
+        elif self.server.args.init:
+            trainData = self.server.dicts.featureDict.keys()
+            self.server.model.initializeModel(trainData,self.server.dicts)
+            
+        if self.server.args.statistics:
+            self.server.stats = Statistics(self.server.args.cutOff)
+            self.server.statementCounter = 1
+            self.server.computeStats = False
+
+        # Set up logging
+        logging.basicConfig(level=logging.DEBUG,
+                            format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
+                            datefmt='%d-%m %H:%M:%S',
+                            filename=self.server.args.log+'server',
+                            filemode='w')    
+        self.server.logger = logging.getLogger('server')
+        self.server.logger.debug('Initialized in '+str(round(time()-self.startTime,2))+' seconds.')
+        self.request.sendall('Server initialized in '+str(round(time()-self.startTime,2))+' seconds.')
+        self.server.callCounter = 1
+
+    def update(self):
+        problemId = self.server.dicts.parse_fact(self.data)    
+        # Statistics
+        if self.server.args.statistics and self.server.computeStats:
+            self.server.computeStats = False
+            # Assume '!' comes after '?'
+            if self.server.args.algorithm == 'predef':
+                self.server.predictions = self.server.model.predict(problemId)
+            self.server.stats.update(self.server.predictions,self.server.dicts.dependenciesDict[problemId],self.server.statementCounter)
+            if not self.server.stats.badPreds == []:
+                bp = string.join([str(self.server.dicts.idNameDict[x]) for x in self.server.stats.badPreds], ',')
+                self.server.logger.debug('Poor predictions: %s',bp)
+            self.server.statementCounter += 1
+
+        # Update Dependencies, p proves p
+        self.server.dicts.dependenciesDict[problemId] = [problemId]+self.server.dicts.dependenciesDict[problemId]
+        self.server.model.update(problemId,self.server.dicts.featureDict[problemId],self.server.dicts.dependenciesDict[problemId])
+
+    def overwrite(self):
+        # Overwrite old proof.
+        problemId,newDependencies = self.server.dicts.parse_overwrite(self.data)
+        newDependencies = [problemId]+newDependencies
+        self.server.model.overwrite(problemId,newDependencies,self.server.dicts)
+        self.server.dicts.dependenciesDict[problemId] = newDependencies
+        
+    def predict(self):
+        self.server.computeStats = True
+        if self.server.args.algorithm == 'predef':
+            return
+        name,features,accessibles,hints,numberOfPredictions = self.server.dicts.parse_problem(self.data)  
+        if numberOfPredictions == None:
+            numberOfPredictions = self.server.args.numberOfPredictions
+        if not hints == []:
+            self.server.model.update('hints',features,hints)
+        
+        # Create predictions
+        self.server.logger.debug('Starting computation for line %s',self.server.callCounter)
+        predictionsFeatures = features                    
+        self.server.predictions,predictionValues = self.server.model.predict(predictionsFeatures,accessibles,self.server.dicts)
+        assert len(self.server.predictions) == len(predictionValues)
+        self.server.logger.debug('Time needed: '+str(round(time()-self.startTime,2)))
+
+        # Output        
+        predictionNames = [str(self.server.dicts.idNameDict[p]) for p in self.server.predictions[:numberOfPredictions]]
+        predictionValues = [str(x) for x in predictionValues[:self.server.args.numberOfPredictions]]
+        predictionsStringList = ['%s=%s' % (predictionNames[i],predictionValues[i]) for i in range(len(predictionNames))]
+        predictionsString = string.join(predictionsStringList,' ')
+        outString = '%s: %s' % (name,predictionsString)
+        self.request.sendall(outString)
+    
+    def shutdown(self):
+        self.request.sendall('Shutting down server.')
+        # Save Models
+        self.server.model.save(self.server.args.modelFile)
+        self.server.dicts.save(self.server.args.dictsFile)
+        if not self.server.args.saveStats == None:
+            statsFile = os.path.join(self.server.args.outputDir,self.server.args.saveStats)
+            self.server.stats.save(statsFile)
+        self.server.shutdown()
+    
+    def handle(self):
+        # self.request is the TCP socket connected to the client
+        self.data = self.request.recv(262144).strip()
+        #print "{} wrote:".format(self.client_address[0])
+        #print self.data
+        self.startTime = time()
+        if self.data == 's':
+            self.shutdown()            
+        elif self.data.startswith('i'):            
+            self.init(self.data[2:])
+        elif self.data.startswith('!'):
+            self.update()
+        elif self.data.startswith('p'):
+            self.overwrite()
+        elif self.data.startswith('?'):               
+            self.predict()
+        elif self.data == '':
+            # Empty Socket
+            return
+        elif self.data == 'avgStats':
+            self.request.sendall(self.server.stats.printAvg())            
+        else:
+            self.request.sendall('Unspecified input format: \n%s',self.data)
+        self.server.callCounter += 1
+
+        #returnString = 'Time needed: '+str(round(time()-self.startTime,2))
+        #print returnString
+
+if __name__ == "__main__":
+    HOST, PORT = "localhost", 9255
+    #print 'Started Server'
+    # Create the server, binding to localhost on port 9999
+    SocketServer.TCPServer.allow_reuse_address = True
+    server = ThreadingTCPServer((HOST, PORT), MaShHandler)
+    #server = SocketServer.TCPServer((HOST, PORT), MaShHandler)
+
+    # Activate the server; this will keep running until you
+    # interrupt the program with Ctrl-C
+    server.serve_forever()        
+
+
+
+    
+    
+    
\ No newline at end of file
diff -r 5c7780d21d24 -r 1133b9e83f09 src/HOL/Tools/Sledgehammer/MaSh/src/snow.py
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/snow.py	Tue Aug 20 14:36:22 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/snow.py	Tue Aug 20 14:36:22 2013 +0200
@@ -27,7 +27,7 @@
         self.SNoWTrainFile = '../tmp/snow.train'
         self.SNoWTestFile = '../snow.test'
         self.SNoWNetFile = '../tmp/snow.net'
-        self.defMaxNameId = 20000
+        self.defMaxNameId = 100000
 
     def initializeModel(self,trainData,dicts):
         """
@@ -51,6 +51,7 @@
         # Build Model
         self.logger.debug('Building Model START.')
         snowTrainCommand = '../bin/snow -train -M+ -I %s -F %s -g- -B :0-%s' % (self.SNoWTrainFile,self.SNoWNetFile,dicts.maxNameId-1)
+        #print snowTrainCommand
         #snowTrainCommand = '../bin/snow -train -M+ -I %s -F %s -g- -B :0-%s' % (self.SNoWTrainFile,self.SNoWNetFile,self.defMaxNameId-1)
         args = shlex.split(snowTrainCommand)
         p = subprocess.Popen(args,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
diff -r 5c7780d21d24 -r 1133b9e83f09 src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py	Tue Aug 20 14:36:22 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py	Tue Aug 20 14:36:22 2013 +0200
@@ -95,6 +95,7 @@
         For each accessible, predicts the probability of it being useful given the features.
         Returns a ranking of the accessibles.
         """
+        tau = 0.01 # Jasmin, change value here
         predictions = []
         for a in accessibles:
             posA = self.counts[a][0]
@@ -112,6 +113,11 @@
                         resultA += w*log(float(self.posWeight*fWeightsA[f])/posA)
                 else:
                     resultA += w*self.defVal
+            if not tau == 0.0:
+                observedFeatures = [f for f,_w in features]
+                missingFeatures = list(fA.difference(observedFeatures))
+                sumOfWeights = sum([log(float(fWeightsA[x])/posA) for x in missingFeatures])
+                resultA -= tau * sumOfWeights
             predictions.append(resultA)
         predictions = array(predictions)
         perm = (-predictions).argsort()
diff -r 5c7780d21d24 -r 1133b9e83f09 src/HOL/Tools/Sledgehammer/MaSh/src/spawnDaemon.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/spawnDaemon.py	Tue Aug 20 14:36:22 2013 +0200
@@ -0,0 +1,35 @@
+# http://stackoverflow.com/questions/972362/spawning-process-from-python/972383#972383
+import os
+
+def spawnDaemon(path_to_executable, *args):
+    """Spawn a completely detached subprocess (i.e., a daemon).
+
+    E.g. for mark:
+    spawnDaemon("../bin/producenotify.py", "producenotify.py", "xx")
+    """
+    # fork the first time (to make a non-session-leader child process)
+    try:
+        pid = os.fork()
+    except OSError, e:
+        raise RuntimeError("1st fork failed: %s [%d]" % (e.strerror, e.errno))
+    if pid != 0:
+        # parent (calling) process is all done
+        return
+
+    # detach from controlling terminal (to make child a session-leader)
+    os.setsid()
+    try:
+        pid = os.fork()
+    except OSError, e:
+        raise RuntimeError("2nd fork failed: %s [%d]" % (e.strerror, e.errno))
+        raise Exception, "%s [%d]" % (e.strerror, e.errno)
+    if pid != 0:
+        # child process is all done
+        os._exit(0)
+
+    # and finally let's execute the executable for the daemon!
+    try:
+        os.execv(path_to_executable, [path_to_executable])
+    except Exception, e:
+        # oops, we're cut off from the world, let's just give up
+        os._exit(255)
diff -r 5c7780d21d24 -r 1133b9e83f09 src/HOL/Tools/Sledgehammer/MaSh/src/stats.py
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/stats.py	Tue Aug 20 14:36:22 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/stats.py	Tue Aug 20 14:36:22 2013 +0200
@@ -30,6 +30,7 @@
         self.problems = 0.0
         self.cutOff = cutOff
         self.recallData = [0]*cutOff
+        self.recall100Median = []
         self.recall100Data = [0]*cutOff
         self.aucData = []
         self.premiseOccurenceCounter = {}
@@ -107,6 +108,7 @@
         self.aucData.append(auc)
         self.avgAUC += auc
         self.avgRecall100 += recall100
+        self.recall100Median.append(recall100)
         self.problems += 1
         self.badPreds = badPreds
         self.avgAvailable += available
@@ -116,8 +118,29 @@
 
     def printAvg(self):
         self.logger.info('Average results:')
-        self.logger.info('avgAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t cutOff:%s', \
-                         round(100*self.avgAUC/self.problems,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),self.cutOff)
+        #self.logger.info('avgAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t cutOff:%s', \
+        #                 round(100*self.avgAUC/self.problems,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),self.cutOff)
+        # HACK FOR PAPER
+        assert len(self.aucData) == len(self.recall100Median)
+        nrDataPoints = len(self.aucData)
+        if nrDataPoints % 2 == 1:
+            medianAUC = sorted(self.aucData)[nrDataPoints/2 + 1]
+        else:
+            medianAUC = float(sorted(self.aucData)[nrDataPoints/2] + sorted(self.aucData)[nrDataPoints/2 + 1])/2
+        #nrDataPoints = len(self.recall100Median)
+        if nrDataPoints % 2 == 1:
+            medianrecall100 = sorted(self.recall100Median)[nrDataPoints/2 + 1]
+        else:
+            medianrecall100 = float(sorted(self.recall100Median)[nrDataPoints/2] + sorted(self.recall100Median)[nrDataPoints/2 + 1])/2
+
+        returnString = 'avgAUC: %s \t medianAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t medianRecall100: %s \t cutOff:%s' %\
+                         (round(100*self.avgAUC/self.problems,2),round(100*medianAUC,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),round(medianrecall100,2),self.cutOff)
+        self.logger.info(returnString)
+        return returnString
+    
+        """
+        self.logger.info('avgAUC: %s \t medianAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t medianRecall100: %s \t cutOff:%s', \
+                         round(100*self.avgAUC/self.problems,2),round(100*medianAUC,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),round(medianrecall100,2),self.cutOff)
 
         #try:
         #if True:
@@ -156,6 +179,7 @@
             show()
         #except:
         #    self.logger.warning('Matplotlib module missing. Skipping graphs.')
+        """
 
     def save(self,fileName):
         oStream = open(fileName, 'wb')
diff -r 5c7780d21d24 -r 1133b9e83f09 src/HOL/Tools/Sledgehammer/MaSh/src/tester.py
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/tester.py	Tue Aug 20 14:36:22 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/tester.py	Tue Aug 20 14:36:22 2013 +0200
@@ -16,29 +16,39 @@
         #print '%s says that %s%s = %s' % (current_process().name, func.__name__, args, result)
         outQueue.put(result)
 
-def run_mash(runId,inputDir,logFile,predictionFile,\
+def run_mash(runId,inputDir,logFile,predictionFile,predef,\
              learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
              NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
-             sineFeatures,sineWeight):
+             sineFeatures,sineWeight,quiet=True):
     # Init
     runId = str(runId)
     predictionFile = predictionFile + runId
-    args = ['--statistics','--init','--inputDir',inputDir,'-q','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,
+    args = ['--statistics','--init','--inputDir',inputDir,'--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,
             '--theoryDefValPos',str(theoryDefValPos),'--theoryDefValNeg',str(theoryDefValNeg),'--theoryPosWeight',str(theoryPosWeight),\
             '--NBDefaultPriorWeight',str(NBDefaultPriorWeight),'--NBDefVal',str(NBDefVal),'--NBPosWeight',str(NBPosWeight)]    
     if learnTheories:
-        args = args + ['--learnTheories']    
+        args += ['--learnTheories']    
     if sineFeatures:
         args += ['--sineFeatures','--sineWeight',str(sineWeight)]
+    if not predef == '':
+        args += ['--predef',predef]
+    if quit:
+        args += ['-q']
+    #print args
     mash(args)
     # Run
-    args = ['-q','-i',inputFile,'-p',predictionFile,'--statistics','--cutOff','500','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,\
+    args = ['-i',inputFile,'-p',predictionFile,'--statistics','--cutOff','1024','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,\
             '--theoryDefValPos',str(theoryDefValPos),'--theoryDefValNeg',str(theoryDefValNeg),'--theoryPosWeight',str(theoryPosWeight),\
             '--NBDefaultPriorWeight',str(NBDefaultPriorWeight),'--NBDefVal',str(NBDefVal),'--NBPosWeight',str(NBPosWeight)]
     if learnTheories:
-        args = args + ['--learnTheories']    
+        args += ['--learnTheories']    
     if sineFeatures:
         args += ['--sineFeatures','--sineWeight',str(sineWeight)]
+    if not predef == '':
+        args += ['--predef',predef]
+    if quit:
+        args += ['-q']
+    #print args
     mash(args)
 
     # Get Results
@@ -46,29 +56,42 @@
     lines =  IS.readlines()
     tmpRes = lines[-1].split()
     avgAuc = tmpRes[5]
-    avgRecall100 = tmpRes[9]
+    medianAuc = tmpRes[7]
+    avgRecall100 = tmpRes[11]
+    medianRecall100 = tmpRes[13]
     tmpTheoryRes = lines[-3].split()
-    avgTheoryPrecision = tmpTheoryRes[5] 
-    avgTheoryRecall100 = tmpTheoryRes[7]
-    avgTheoryRecall = tmpTheoryRes[9]
-    avgTheoryPredictedPercent = tmpTheoryRes[11]
+    if learnTheories:
+        avgTheoryPrecision = tmpTheoryRes[5] 
+        avgTheoryRecall100 = tmpTheoryRes[7]
+        avgTheoryRecall = tmpTheoryRes[9]
+        avgTheoryPredictedPercent = tmpTheoryRes[11]
+    else:
+        avgTheoryPrecision = 'NA' 
+        avgTheoryRecall100 = 'NA'
+        avgTheoryRecall = 'NA'
+        avgTheoryPredictedPercent = 'NA'    
     IS.close()
     
     # Delete old models
     os.remove(logFile)
     os.remove(predictionFile)
-    os.remove('../tmp/t'+runId)
+    if learnTheories:
+        os.remove('../tmp/t'+runId)
     os.remove('../tmp/m'+runId)
     os.remove('../tmp/d'+runId)
     
     outFile = open('tester','a')
     #print 'avgAuc %s avgRecall100 %s avgTheoryPrecision %s avgTheoryRecall100 %s avgTheoryRecall %s avgTheoryPredictedPercent %s'
-    outFile.write('\t'.join([str(learnTheories),str(theoryDefValPos),str(theoryDefValNeg),str(theoryPosWeight),str(NBDefaultPriorWeight),str(NBDefVal),str(NBPosWeight),str(sineFeatures),str(sineWeight),str(avgAuc),str(avgRecall100),str(avgTheoryPrecision),str(avgTheoryRecall100),str(avgTheoryRecall),str(avgTheoryPredictedPercent)])+'\n')
+    outFile.write('\t'.join([str(learnTheories),str(theoryDefValPos),str(theoryDefValNeg),str(theoryPosWeight),\
+                             str(NBDefaultPriorWeight),str(NBDefVal),str(NBPosWeight),str(sineFeatures),str(sineWeight),\
+                             str(avgAuc),str(medianAuc),str(avgRecall100),str(medianRecall100),str(avgTheoryPrecision),\
+                             str(avgTheoryRecall100),str(avgTheoryRecall),str(avgTheoryPredictedPercent)])+'\n')
     outFile.close()
     print learnTheories,'\t',theoryDefValPos,'\t',theoryDefValNeg,'\t',theoryPosWeight,'\t',\
         NBDefaultPriorWeight,'\t',NBDefVal,'\t',NBPosWeight,'\t',\
         sineFeatures,'\t',sineWeight,'\t',\
-        avgAuc,'\t',avgRecall100,'\t',avgTheoryPrecision,'\t',avgTheoryRecall100,'\t',avgTheoryRecall,'\t',avgTheoryPredictedPercent    
+        avgAuc,'\t',medianAuc,'\t',avgRecall100,'\t',medianRecall100,'\t',\
+        avgTheoryPrecision,'\t',avgTheoryRecall100,'\t',avgTheoryRecall,'\t',avgTheoryPredictedPercent    
     return learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
              NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
              sineFeatures,sineWeight,\
@@ -93,8 +116,9 @@
     #cores = 1
     # Options
     depFile = 'mash_dependencies'
+    predef = ''
     outputDir = '../tmp/'
-    numberOfPredictions = 500
+    numberOfPredictions = 1024
     
     learnTheoriesRange = [True,False]
     theoryDefValPosRange = [-x for x in range(1,20)]
@@ -107,6 +131,7 @@
     sineFeaturesRange = [True,False]    
     sineWeightRange = [0.1,0.25,0.5,0.75,1.0]
     
+    """
     # Test 1
     inputFile = '../data/20121227b/Auth/mash_commands'
     inputDir = '../data/20121227b/Auth/'
@@ -179,4 +204,78 @@
                        NBDefaultPriorWeight,NBDefVal,NBPosWeight,sineFeatures,sineWeight,\
                        learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight)              
         print 'bestAvgRecall100 %s bestNBDefaultPriorWeight %s bestNBDefVal %s bestNBPosWeight %s bestSineFeatures %s bestSineWeight %s',bestAvgRecall100,bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight
-    
\ No newline at end of file
+    
+    """
+    # Test 2
+    #inputDir = '../data/20130118/Jinja'
+    inputDir = '../data/notheory/Prob'
+    inputFile = inputDir+'/mash_commands'
+    #inputFile = inputDir+'/mash_prover_commands'
+    
+    #depFile = 'mash_prover_dependencies'
+    depFile = 'mash_dependencies'    
+    outputDir = '../tmp/'
+    numberOfPredictions = 1024
+    predictionFile = '../tmp/auth.pred'
+    logFile = '../tmp/auth.log'
+    learnTheories = False
+    theoryDefValPos = -7.5
+    theoryDefValNeg = -10.0
+    theoryPosWeight = 2.0
+    NBDefaultPriorWeight = 20.0
+    NBDefVal =- 15.0
+    NBPosWeight = 10.0
+    sineFeatures = False
+    sineWeight =  0.5    
+    quiet = False
+    print inputDir
+    
+    #predef = inputDir+'/mash_prover_suggestions'
+    predef = inputDir+'/mash_suggestions'    
+    print 'Mash Isar'
+    run_mash(0,inputDir,logFile,predictionFile,predef,\
+             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+             sineFeatures,sineWeight,quiet=quiet)
+
+    #"""
+    predef = inputDir+'/mesh_suggestions'
+    #predef = inputDir+'/mesh_prover_suggestions'
+    print 'Mesh Isar'
+    run_mash(0,inputDir,logFile,predictionFile,predef,\
+             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+             sineFeatures,sineWeight,quiet=quiet)
+    #"""
+    predef = inputDir+'/mepo_suggestions'
+    print 'Mepo Isar'
+    run_mash(0,inputDir,logFile,predictionFile,predef,\
+             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+             sineFeatures,sineWeight,quiet=quiet)    
+    
+    """
+    inputFile = inputDir+'/mash_prover_commands'
+    depFile = 'mash_prover_dependencies'
+    
+    predef = inputDir+'/mash_prover_suggestions'    
+    print 'Mash Prover Isar'
+    run_mash(0,inputDir,logFile,predictionFile,predef,\
+             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+             sineFeatures,sineWeight,quiet=quiet)
+
+    predef = inputDir+'/mesh_prover_suggestions'
+    print 'Mesh Prover Isar'
+    run_mash(0,inputDir,logFile,predictionFile,predef,\
+             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+             sineFeatures,sineWeight,quiet=quiet)
+
+    predef = inputDir+'/mepo_suggestions'
+    print 'Mepo Prover Isar'
+    run_mash(0,inputDir,logFile,predictionFile,predef,\
+             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+             sineFeatures,sineWeight,quiet=quiet)
+    #"""
\ No newline at end of file