updated MaSh
authorblanchet
Thu, 17 Jan 2013 17:55:03 +0100
changeset 50951 e1cbaa7d5536
parent 50950 a145ee10371b
child 50952 3fd83a0cc4bd
updated MaSh
src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py
src/HOL/Tools/Sledgehammer/MaSh/src/mash.py
src/HOL/Tools/Sledgehammer/MaSh/src/readData.py
src/HOL/Tools/Sledgehammer/MaSh/src/singleNaiveBayes.py
src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py
src/HOL/Tools/Sledgehammer/MaSh/src/stats.py
src/HOL/Tools/Sledgehammer/MaSh/src/tester.py
src/HOL/Tools/Sledgehammer/MaSh/src/theoryStats.py
src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py	Thu Jan 17 17:55:02 2013 +0100
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py	Thu Jan 17 17:55:03 2013 +0100
@@ -32,27 +32,33 @@
         self.dependenciesDict = {}
         self.accessibleDict = {}
         self.expandedAccessibles = {}
-        # For SInE-like prior
-        self.featureCountDict = {}
-        self.triggerFeatures = {}
+        # For SInE features
+        self.useSine = False
+        self.featureCountDict = {} 
+        self.triggerFeaturesDict = {} 
+        self.featureTriggeredFormulasDict = {}
         self.changed = True
 
     """
-    Init functions. Side Effect: nameIdDict, idNameDict, featureIdDict, articleDict get filled!
+    Init functions. nameIdDict, idNameDict, featureIdDict, articleDict get filled!
     """
-    def init_featureDict(self,featureFile):
-        self.featureDict,self.maxNameId,self.maxFeatureId, self.featureCountDict,self.triggerFeatures = create_feature_dict(self.nameIdDict,self.idNameDict,self.maxNameId,self.featureIdDict,\
-                                                                                                       self.maxFeatureId,self.featureCountDict,self.triggerFeatures,featureFile)
+    def init_featureDict(self,featureFile,sineFeatures):
+        self.featureDict,self.maxNameId,self.maxFeatureId,self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict =\
+         create_feature_dict(self.nameIdDict,self.idNameDict,self.maxNameId,self.featureIdDict,self.maxFeatureId,self.featureCountDict,\
+                             self.triggerFeaturesDict,self.featureTriggeredFormulasDict,sineFeatures,featureFile)
     def init_dependenciesDict(self,depFile):
         self.dependenciesDict = create_dependencies_dict(self.nameIdDict,depFile)
     def init_accessibleDict(self,accFile):
         self.accessibleDict,self.maxNameId = create_accessible_dict(self.nameIdDict,self.idNameDict,self.maxNameId,accFile)
 
-    def init_all(self,inputFolder,featureFileName = 'mash_features',depFileName='mash_dependencies',accFileName = 'mash_accessibility'):
-        featureFile = join(inputFolder,featureFileName)
-        depFile = join(inputFolder,depFileName)
-        accFile = join(inputFolder,accFileName)
-        self.init_featureDict(featureFile)
+    def init_all(self,args):
+        self.featureFileName = 'mash_features'
+        self.accFileName = 'mash_accessibility'
+        self.useSine = args.sineFeatures
+        featureFile = join(args.inputDir,self.featureFileName)
+        depFile = join(args.inputDir,args.depFile)
+        accFile = join(args.inputDir,self.accFileName)
+        self.init_featureDict(featureFile,self.useSine)
         self.init_accessibleDict(accFile)
         self.init_dependenciesDict(depFile)
         self.expandedAccessibles = {}
@@ -76,11 +82,13 @@
     def add_feature(self,featureName):
         if not self.featureIdDict.has_key(featureName):
             self.featureIdDict[featureName] = self.maxFeatureId
-            self.featureCountDict[self.maxFeatureId] = 0
+            if self.useSine:
+                self.featureCountDict[self.maxFeatureId] = 0
             self.maxFeatureId += 1
             self.changed = True
         fId = self.featureIdDict[featureName]
-        self.featureCountDict[fId] += 1
+        if self.useSine:
+            self.featureCountDict[fId] += 1
         return fId
 
     def get_features(self,line):
@@ -128,15 +136,22 @@
         line = line.split(':')
         name = line[0].strip()
         nameId = self.get_name_id(name)
-
         line = line[1].split(';')
         # Accessible Ids
         unExpAcc = [self.nameIdDict[a.strip()] for a in line[0].split()]
         self.accessibleDict[nameId] = unExpAcc
         features = self.get_features(line)
         self.featureDict[nameId] = features
-        minVal = min([self.featureCountDict[f] for f,_w in features])
-        self.triggerFeatures[nameId] = [f for f,_w in features if self.featureCountDict[f] == minVal]
+        if self.useSine:
+            # SInE Features
+            minFeatureCount = min([self.featureCountDict[f] for f,_w in features])
+            triggerFeatures = [f for f,_w in features if self.featureCountDict[f] == minFeatureCount]
+            self.triggerFeaturesDict[nameId] = triggerFeatures
+            for f in triggerFeatures:
+                if self.featureTriggeredFormulasDict.has_key(f): 
+                    self.featureTriggeredFormulasDict[f].append(nameId)
+                else:
+                    self.featureTriggeredFormulasDict[f] = [nameId]        
         self.dependenciesDict[nameId] = [self.nameIdDict[d.strip()] for d in line[2].split()]        
         self.changed = True
         return nameId
@@ -197,12 +212,14 @@
         if self.changed:
             dictsStream = open(fileName, 'wb')
             dump((self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\
-                self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,self.featureCountDict,self.triggerFeatures),dictsStream)
+                self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,\
+                self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict,self.useSine),dictsStream)
             self.changed = False
             dictsStream.close()
     def load(self,fileName):
         dictsStream = open(fileName, 'rb')
         self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\
-              self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,self.featureCountDict,self.triggerFeatures = load(dictsStream)
+              self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,\
+              self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict,self.useSine = load(dictsStream)
         self.changed = False
         dictsStream.close()
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/mash.py	Thu Jan 17 17:55:02 2013 +0100
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/mash.py	Thu Jan 17 17:55:03 2013 +0100
@@ -51,46 +51,62 @@
 parser.add_argument('--learnTheories',default=False,action='store_true',help="Uses a two-lvl prediction mode. First the theories, then the premises. Default=False.")
 # Theory Parameters
 parser.add_argument('--theoryDefValPos',default=-7.5,help="Default value for positive unknown features. Default=-7.5.",type=float)
-parser.add_argument('--theoryDefValNeg',default=-15.0,help="Default value for negative unknown features. Default=-15.0.",type=float)
-parser.add_argument('--theoryPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float)
+parser.add_argument('--theoryDefValNeg',default=-10.0,help="Default value for negative unknown features. Default=-15.0.",type=float)
+parser.add_argument('--theoryPosWeight',default=2.0,help="Weight value for positive features. Default=2.0.",type=float)
 
 parser.add_argument('--nb',default=False,action='store_true',help="Use Naive Bayes for learning. This is the default learning method.")
 # NB Parameters
 parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float)
 parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float)
 parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float)
-parser.add_argument('--NBSinePrior',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.")
-parser.add_argument('--NBSineWeight',default=100.0,help="How much the SInE prior is weighted. Default=100.0.",type=float)
+# TODO: Rename to sineFeatures
+parser.add_argument('--sineFeatures',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.")
+parser.add_argument('--sineWeight',default=0.5,help="How much the SInE prior is weighted. Default=0.5.",type=float)
 
 parser.add_argument('--snow',default=False,action='store_true',help="Use SNoW's naive bayes instead of Naive Bayes for learning.")
-parser.add_argument('--predef',default=False,action='store_true',\
-                    help="Use predefined predictions. Used only for comparison with the actual learning. Expects mash_mepo_suggestions in inputDir.")
+parser.add_argument('--predef',help="Use predefined predictions. Used only for comparison with the actual learning. Argument is the filename of the predictions.")
 parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\
                     WARNING: This will make the program a lot slower! Default=False.")
 parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.")
 parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int)
 parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log')
 parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.")
+parser.add_argument('--modelFile', default='../tmp/model.pickle', help='Model file name. Default=../tmp/model.pickle')
+parser.add_argument('--dictsFile', default='../tmp/dict.pickle', help='Dict file name. Default=../tmp/dict.pickle')
+parser.add_argument('--theoryFile', default='../tmp/theory.pickle', help='Model file name. Default=../tmp/theory.pickle')
 
-def main(argv = sys.argv[1:]):
+def mash(argv = sys.argv[1:]):
     # Initializing command-line arguments
     args = parser.parse_args(argv)
-
+    
     # Set up logging
     logging.basicConfig(level=logging.DEBUG,
                         format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
                         datefmt='%d-%m %H:%M:%S',
                         filename=args.log,
-                        filemode='w')
-    console = logging.StreamHandler(sys.stdout)
-    console.setLevel(logging.INFO)
-    formatter = logging.Formatter('# %(message)s')
-    console.setFormatter(formatter)
-    logging.getLogger('').addHandler(console)
+                        filemode='w')    
     logger = logging.getLogger('main.py')
+    
+    #"""
+    # remove old handler for tester
+    #logger.root.handlers[0].stream.close()
+    logger.root.removeHandler(logger.root.handlers[0])
+    file_handler = logging.FileHandler(args.log)
+    file_handler.setLevel(logging.DEBUG)
+    formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
+    file_handler.setFormatter(formatter)
+    logger.root.addHandler(file_handler)
+    #"""
     if args.quiet:
         logger.setLevel(logging.WARNING)
-        console.setLevel(logging.WARNING)
+        #console.setLevel(logging.WARNING)
+    else:
+        console = logging.StreamHandler(sys.stdout)
+        console.setLevel(logging.INFO)
+        formatter = logging.Formatter('# %(message)s')
+        console.setFormatter(formatter)
+        logging.getLogger('').addHandler(console)
+        
     if not os.path.exists(args.outputDir):
         os.makedirs(args.outputDir)
 
@@ -98,24 +114,16 @@
     # Pick algorithm
     if args.nb:
         logger.info('Using sparse Naive Bayes for learning.')
-        model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal,args.NBSinePrior,args.NBSineWeight)
-        modelFile = os.path.join(args.outputDir,'NB.pickle')
+        model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal)
     elif args.snow:
         logger.info('Using naive bayes (SNoW) for learning.')
         model = SNoW()
-        modelFile = os.path.join(args.outputDir,'SNoW.pickle')
     elif args.predef:
         logger.info('Using predefined predictions.')
-        #predictionFile = os.path.join(args.inputDir,'mash_meng_paulson_suggestions') 
-        predictionFile = os.path.join(args.inputDir,'mash_mepo_suggestions')
-        model = Predefined(predictionFile)
-        modelFile = os.path.join(args.outputDir,'mepo.pickle')        
+        model = Predefined(args.predef)
     else:
         logger.info('No algorithm specified. Using sparse Naive Bayes.')
-        model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal,args.NBSinePrior,args.NBSineWeight)
-        modelFile = os.path.join(args.outputDir,'NB.pickle')
-    dictsFile = os.path.join(args.outputDir,'dicts.pickle')
-    theoryFile = os.path.join(args.outputDir,'theory.pickle')
+        model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal)
 
     # Initializing model
     if args.init:
@@ -124,7 +132,7 @@
 
         # Load all data
         dicts = Dictionaries()
-        dicts.init_all(args.inputDir,depFileName=args.depFile)
+        dicts.init_all(args)
         
         # Create Model
         trainData = dicts.featureDict.keys()
@@ -134,10 +142,10 @@
             depFile = os.path.join(args.inputDir,args.depFile)
             theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight)
             theoryModels.init(depFile,dicts)
-            theoryModels.save(theoryFile)
+            theoryModels.save(args.theoryFile)
             
-        model.save(modelFile)
-        dicts.save(dictsFile)
+        model.save(args.modelFile)
+        dicts.save(args.dictsFile)
 
         logger.info('All Done. %s seconds needed.',round(time()-startTime,2))
         return 0
@@ -149,12 +157,22 @@
         dicts = Dictionaries()
         theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight)
         # Load Files
-        if os.path.isfile(dictsFile):
-            dicts.load(dictsFile)
-        if os.path.isfile(modelFile):
-            model.load(modelFile)
-        if os.path.isfile(theoryFile) and args.learnTheories:
-            theoryModels.load(theoryFile)
+        if os.path.isfile(args.dictsFile):
+            #logger.info('Loading Dictionaries')
+            #startTime = time()
+            dicts.load(args.dictsFile)            
+            #logger.info('Done %s',time()-startTime)
+        if os.path.isfile(args.modelFile):
+            #logger.info('Loading Model')
+            #startTime = time()
+            model.load(args.modelFile)            
+            #logger.info('Done %s',time()-startTime)
+        if os.path.isfile(args.theoryFile) and args.learnTheories:
+            #logger.info('Loading Theory Models')
+            #startTime = time()
+            theoryModels.load(args.theoryFile)
+            #logger.info('Done %s',time()-startTime)
+        logger.info('All loading completed')
 
         # IO Streams
         OS = open(args.predictions,'w')
@@ -173,7 +191,7 @@
 #           try:
             if True:
                 if line.startswith('!'):
-                    problemId = dicts.parse_fact(line)                        
+                    problemId = dicts.parse_fact(line)    
                     # Statistics
                     if args.statistics and computeStats:
                         computeStats = False
@@ -183,7 +201,7 @@
                         if args.learnTheories:
                             tmp = [dicts.idNameDict[x] for x in dicts.dependenciesDict[problemId]]
                             usedTheories = set([x.split('.')[0] for x in tmp]) 
-                            theoryStats.update((dicts.idNameDict[problemId]).split('.')[0],predictedTheories,usedTheories)                        
+                            theoryStats.update((dicts.idNameDict[problemId]).split('.')[0],predictedTheories,usedTheories,len(theoryModels.accessibleTheories))                        
                         stats.update(predictions,dicts.dependenciesDict[problemId],statementCounter)
                         if not stats.badPreds == []:
                             bp = string.join([str(dicts.idNameDict[x]) for x in stats.badPreds], ',')
@@ -211,7 +229,8 @@
                     computeStats = True
                     if args.predef:
                         continue
-                    name,features,accessibles,hints = dicts.parse_problem(line)    
+                    name,features,accessibles,hints = dicts.parse_problem(line)  
+                        
                     # Create predictions
                     logger.info('Starting computation for problem on line %s',lineCounter)
                     # Update Models with hints
@@ -223,11 +242,29 @@
                             pass
                         else:
                             model.update('hints',features,hints)
-                    
+
                     # Predict premises
                     if args.learnTheories:
                         predictedTheories,accessibles = theoryModels.predict(features,accessibles,dicts)
-                    predictions,predictionValues = model.predict(features,accessibles,dicts)
+
+                    # Add additional features on premise lvl if sine is enabled
+                    if args.sineFeatures:
+                        origFeatures = [f for f,_w in features]
+                        secondaryFeatures = []
+                        for f in origFeatures:
+                            if dicts.featureCountDict[f] == 1:
+                                continue
+                            triggeredFormulas = dicts.featureTriggeredFormulasDict[f]                                
+                            for formula in triggeredFormulas: 
+                                tFeatures = dicts.triggerFeaturesDict[formula]                                
+                                #tFeatures = [ff for ff,_fw in dicts.featureDict[formula]]
+                                newFeatures = set(tFeatures).difference(secondaryFeatures+origFeatures)
+                            for fNew in newFeatures:
+                                secondaryFeatures.append((fNew,args.sineWeight))
+                        predictionsFeatures = features+secondaryFeatures
+                    else:
+                        predictionsFeatures = features                    
+                    predictions,predictionValues = model.predict(predictionsFeatures,accessibles,dicts)
                     assert len(predictions) == len(predictionValues)
                     
                     # Delete hints
@@ -268,10 +305,10 @@
 
         # Save
         if args.saveModel:
-            model.save(modelFile)
+            model.save(args.modelFile)
             if args.learnTheories:
-                theoryModels.save(theoryFile)
-        dicts.save(dictsFile)
+                theoryModels.save(args.theoryFile)
+        dicts.save(args.dictsFile)
         if not args.saveStats == None:
             if args.learnTheories:
                 theoryStatsFile = os.path.join(args.outputDir,'theoryStats')
@@ -282,25 +319,37 @@
 
 if __name__ == '__main__':
     # Example:
+    #List
+    # ISAR Theories
+    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130110/List/','--learnTheories']    
+    #args = ['-i', '../data/20130110/List/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories']
+    # ISAR predef mesh 
+    #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130110/List/','--predef','../data/20130110/List/mesh_suggestions']
+    #args = ['-i', '../data/20130110/List/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20130110/List/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats']
+ 
+    
     # Auth
     # ISAR Theories
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Auth/','--learnTheories']    
+    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Auth/','--learnTheories','--sineFeatures']    
     #args = ['-i', '../data/20121227b/Auth/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories']
-    # ISAR MePo 
-    #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Auth/','--predef']
-    #args = ['-i', '../data/20121227b/Auth/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats']
+    # ISAR predef mesh 
+    #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Auth/','--predef','../data/20121227b/Auth/mesh_suggestions']
+    #args = ['-i', '../data/20121227b/Auth/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20121227b/Auth/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats']
 
     
     # Jinja
     # ISAR Theories
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Jinja/','--learnTheories']    
-    #args = ['-i', '../data/20121227b/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories']
+    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--learnTheories']    
+    #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--cutOff','500','--learnTheories']
+    # ISAR Theories SinePrior
+    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--learnTheories','--sineFeatures']    
+    #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories','--sineFeatures']    
     # ISAR NB
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121221/Jinja/']    
-    #args = ['-i', '../data/20121221/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500']
-    # ISAR MePo
-    #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121212/Jinja/','--predef']
-    #args = ['-i', '../data/20121212/Jinja/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats']
+    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/']    
+    #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500']
+    # ISAR predef mesh
+    #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--predef','../data/20130111/Jinja/mesh_suggestions']
+    #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20130111/Jinja/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats']
     # ISAR NB ATP
     #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121212/Jinja/','--depFile','mash_atp_dependencies']    
     #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies']
@@ -313,28 +362,5 @@
     #args = ['-i', '../data/20121212/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--snow','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500']
  
 
-
-    # Probability
-    # ISAR Theories
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121213/Probability/','--learnTheories']    
-    #args = ['-i', '../data/20121213/Probability/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories']
-    # ISAR NB
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121213/Probability/']    
-    #args = ['-i', '../data/20121213/Probability/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/ProbIsarNB.stats','--cutOff','500']
-    # ISAR MePo
-    #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121213/Probability/','--predef']
-    #args = ['-i', '../data/20121213/Probability/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats']
-    # ISAR NB ATP
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121212/Jinja/','--depFile','mash_atp_dependencies']    
-    #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies']
-    #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--predef','--depFile','mash_atp_dependencies']
-    #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats','--depFile','mash_atp_dependencies']
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--depFile','mash_atp_dependencies','--snow']    
-    #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--snow','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies']
-
-
-    
-    #startTime = time()
-    #sys.exit(main(args))
-    #print 'New ' + str(round(time()-startTime,2))
-    sys.exit(main())
+    #sys.exit(mash(args))
+    sys.exit(mash())
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/readData.py	Thu Jan 17 17:55:02 2013 +0100
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/readData.py	Thu Jan 17 17:55:03 2013 +0100
@@ -14,7 +14,8 @@
 
 import sys,logging
 
-def create_feature_dict(nameIdDict,idNameDict,maxNameId,featureIdDict,maxFeatureId,featureCountDict,triggerFeatures,inputFile):
+def create_feature_dict(nameIdDict,idNameDict,maxNameId,featureIdDict,maxFeatureId,featureCountDict,\
+                        triggerFeaturesDict,featureTriggeredFormulasDict,sineFeatures,inputFile):
     logger = logging.getLogger('create_feature_dict')
     featureDict = {}
     IS = open(inputFile,'r')
@@ -33,7 +34,7 @@
         # Feature Ids
         featureNames = [f.strip() for f in line[1].split()]
         features = []     
-        minFeatureCount = 0   
+        minFeatureCount = 9999999   
         for fn in featureNames:
             weight = 1.0
             tmp = fn.split('=')
@@ -46,13 +47,21 @@
                 maxFeatureId += 1
             fId = featureIdDict[fn]
             features.append((fId,weight))
-            featureCountDict[fId] += 1
-            minFeatureCount = min(triggerFeatures,featureCountDict[fId])
+            if sineFeatures:
+                featureCountDict[fId] += 1
+                minFeatureCount = min(minFeatureCount,featureCountDict[fId])
         # Store results
         featureDict[nameId] = features
-        triggerFeatures[nameId] = [f for f,_w in features if featureCountDict[f] == minFeatureCount]
+        if sineFeatures:
+            triggerFeatures = [f for f,_w in features if featureCountDict[f] == minFeatureCount]
+            triggerFeaturesDict[nameId] = triggerFeatures
+            for f in triggerFeatures:
+                if featureTriggeredFormulasDict.has_key(f): 
+                    featureTriggeredFormulasDict[f].append(nameId)
+                else:
+                    featureTriggeredFormulasDict[f] = [nameId]
     IS.close()
-    return featureDict,maxNameId,maxFeatureId,featureCountDict,triggerFeatures
+    return featureDict,maxNameId,maxFeatureId,featureCountDict,triggerFeaturesDict,featureTriggeredFormulasDict
 
 def create_dependencies_dict(nameIdDict,inputFile):
     logger = logging.getLogger('create_dependencies_dict')
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/singleNaiveBayes.py	Thu Jan 17 17:55:02 2013 +0100
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/singleNaiveBayes.py	Thu Jan 17 17:55:03 2013 +0100
@@ -44,9 +44,12 @@
         
         for f,_w in features:
             if not self.counts.has_key(f):
-                fPosCount = 0.0
-                fNegCount = 0.0
-                self.counts[f] = [fPosCount,fNegCount]
+                if label:
+                    fPosCount = 0.0
+                    fNegCount = 0.0
+                    self.counts[f] = [fPosCount,fNegCount]
+                else:
+                    continue
             posCount,negCount = self.counts[f]
             if label:
                 posCount += 1
@@ -89,8 +92,9 @@
         elif self.pos ==0:
             return 0
         logneg = log(self.neg)
+        lognegprior=log(float(self.neg)/5)
         logpos = log(self.pos)
-        prob = logpos - logneg
+        prob = logpos - lognegprior
         
         for f,_w in features:
             if self.counts.has_key(f):
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py	Thu Jan 17 17:55:02 2013 +0100
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py	Thu Jan 17 17:55:03 2013 +0100
@@ -19,13 +19,11 @@
     An updateable naive Bayes classifier.
     '''
 
-    def __init__(self,defaultPriorWeight = 20.0,posWeight = 20.0,defVal = -15.0,useSinePrior = False,sineWeight = 100.0):
+    def __init__(self,defaultPriorWeight = 20.0,posWeight = 20.0,defVal = -15.0):
         '''
         Constructor
         '''
         self.counts = {}
-        self.sinePrior = useSinePrior
-        self.sineWeight = sineWeight
         self.defaultPriorWeight = defaultPriorWeight
         self.posWeight = posWeight
         self.defVal = defVal
@@ -100,19 +98,11 @@
         Returns a ranking of the accessibles.
         """
         predictions = []
-        fSet = set([f for f,_w in features])
         for a in accessibles:
             posA = self.counts[a][0]
             fA = set(self.counts[a][1].keys())
             fWeightsA = self.counts[a][1]
-            prior = posA
-            if self.sinePrior:
-                triggerFeatures = dicts.triggerFeatures[a]
-                triggeredFeatures = fSet.intersection(triggerFeatures)
-                for f in triggeredFeatures:
-                    posW = dicts.featureCountDict[f]
-                    prior += self.sineWeight /  posW 
-            resultA = log(prior)
+            resultA = log(posA)
             for f,w in features:
                 # DEBUG
                 #w = 1
@@ -131,12 +121,12 @@
 
     def save(self,fileName):
         OStream = open(fileName, 'wb')
-        dump((self.counts,self.defaultPriorWeight,self.posWeight,self.defVal,self.sinePrior,self.sineWeight),OStream)
+        dump((self.counts,self.defaultPriorWeight,self.posWeight,self.defVal),OStream)
         OStream.close()
 
     def load(self,fileName):
         OStream = open(fileName, 'rb')
-        self.counts,self.defaultPriorWeight,self.posWeight,self.defVal,self.sinePrior,self.sineWeight = load(OStream)
+        self.counts,self.defaultPriorWeight,self.posWeight,self.defVal = load(OStream)
         OStream.close()
 
 
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/stats.py	Thu Jan 17 17:55:02 2013 +0100
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/stats.py	Thu Jan 17 17:55:03 2013 +0100
@@ -120,7 +120,8 @@
                          round(100*self.avgAUC/self.problems,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),self.cutOff)
 
         #try:
-        if True:
+        #if True:
+        if False:
             from matplotlib.pyplot import plot,figure,show,xlabel,ylabel,axis,hist
             avgRecall = [float(x)/self.problems for x in self.recallData]
             figure('Recall')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/tester.py	Thu Jan 17 17:55:03 2013 +0100
@@ -0,0 +1,182 @@
+'''
+Created on Jan 11, 2013
+
+Searches for the best parameters.
+
+@author: Daniel Kuehlwein
+'''
+
+import logging,sys,os
+from multiprocessing import Process,Queue,current_process,cpu_count
+from mash import mash
+
+def worker(inQueue, outQueue):
+    for func, args in iter(inQueue.get, 'STOP'):        
+        result = func(*args)
+        #print '%s says that %s%s = %s' % (current_process().name, func.__name__, args, result)
+        outQueue.put(result)
+
+def run_mash(runId,inputDir,logFile,predictionFile,\
+             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+             sineFeatures,sineWeight):
+    # Init
+    runId = str(runId)
+    predictionFile = predictionFile + runId
+    args = ['--statistics','--init','--inputDir',inputDir,'-q','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,
+            '--theoryDefValPos',str(theoryDefValPos),'--theoryDefValNeg',str(theoryDefValNeg),'--theoryPosWeight',str(theoryPosWeight),\
+            '--NBDefaultPriorWeight',str(NBDefaultPriorWeight),'--NBDefVal',str(NBDefVal),'--NBPosWeight',str(NBPosWeight)]    
+    if learnTheories:
+        args = args + ['--learnTheories']    
+    if sineFeatures:
+        args += ['--sineFeatures','--sineWeight',str(sineWeight)]
+    mash(args)
+    # Run
+    args = ['-q','-i',inputFile,'-p',predictionFile,'--statistics','--cutOff','500','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,\
+            '--theoryDefValPos',str(theoryDefValPos),'--theoryDefValNeg',str(theoryDefValNeg),'--theoryPosWeight',str(theoryPosWeight),\
+            '--NBDefaultPriorWeight',str(NBDefaultPriorWeight),'--NBDefVal',str(NBDefVal),'--NBPosWeight',str(NBPosWeight)]
+    if learnTheories:
+        args = args + ['--learnTheories']    
+    if sineFeatures:
+        args += ['--sineFeatures','--sineWeight',str(sineWeight)]
+    mash(args)
+
+    # Get Results
+    IS = open(logFile,'r')
+    lines =  IS.readlines()
+    tmpRes = lines[-1].split()
+    avgAuc = tmpRes[5]
+    avgRecall100 = tmpRes[9]
+    tmpTheoryRes = lines[-3].split()
+    avgTheoryPrecision = tmpTheoryRes[5] 
+    avgTheoryRecall100 = tmpTheoryRes[7]
+    avgTheoryRecall = tmpTheoryRes[9]
+    avgTheoryPredictedPercent = tmpTheoryRes[11]
+    IS.close()
+    
+    # Delete old models
+    os.remove(logFile)
+    os.remove(predictionFile)
+    os.remove('../tmp/t'+runId)
+    os.remove('../tmp/m'+runId)
+    os.remove('../tmp/d'+runId)
+    
+    outFile = open('tester','a')
+    #print 'avgAuc %s avgRecall100 %s avgTheoryPrecision %s avgTheoryRecall100 %s avgTheoryRecall %s avgTheoryPredictedPercent %s'
+    outFile.write('\t'.join([str(learnTheories),str(theoryDefValPos),str(theoryDefValNeg),str(theoryPosWeight),str(NBDefaultPriorWeight),str(NBDefVal),str(NBPosWeight),str(sineFeatures),str(sineWeight),str(avgAuc),str(avgRecall100),str(avgTheoryPrecision),str(avgTheoryRecall100),str(avgTheoryRecall),str(avgTheoryPredictedPercent)])+'\n')
+    outFile.close()
+    print learnTheories,'\t',theoryDefValPos,'\t',theoryDefValNeg,'\t',theoryPosWeight,'\t',\
+        NBDefaultPriorWeight,'\t',NBDefVal,'\t',NBPosWeight,'\t',\
+        sineFeatures,'\t',sineWeight,'\t',\
+        avgAuc,'\t',avgRecall100,'\t',avgTheoryPrecision,'\t',avgTheoryRecall100,'\t',avgTheoryRecall,'\t',avgTheoryPredictedPercent    
+    return learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+             sineFeatures,sineWeight,\
+             avgAuc,avgRecall100,avgTheoryPrecision,avgTheoryRecall100,avgTheoryRecall,avgTheoryPredictedPercent 
+
+def update_best_params(avgRecall100,bestAvgRecall100,\
+                       bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight,\
+                       bestlearnTheories,besttheoryDefValPos,besttheoryDefValNeg,besttheoryPosWeight,\
+                       NBDefaultPriorWeight,NBDefVal,NBPosWeight,sineFeatures,sineWeight,\
+                       learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight):
+                        if avgRecall100 > bestAvgRecall100:
+                            bestAvgRecall100 = avgRecall100
+                            bestNBDefaultPriorWeight = NBDefaultPriorWeight
+                            bestNBDefVal = NBDefVal
+                            bestNBPosWeight = NBPosWeight
+                            bestSineFeatures = sineFeatures
+                            bestSineWeight = sineWeight  
+                        return bestlearnTheories,besttheoryDefValPos,besttheoryDefValNeg,besttheoryPosWeight,bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight
+
+if __name__ == '__main__':
+    cores = cpu_count()
+    #cores = 1
+    # Options
+    depFile = 'mash_dependencies'
+    outputDir = '../tmp/'
+    numberOfPredictions = 500
+    
+    learnTheoriesRange = [True,False]
+    theoryDefValPosRange = [-x for x in range(1,20)]
+    theoryDefValNegRange = [-x for x in range(1,20)]
+    theoryPosWeightRange = [x for x in range(1,10)]
+    
+    NBDefaultPriorWeightRange = [10*x for x in range(10)]
+    NBDefValRange = [-x for x in range(1,20)]
+    NBPosWeightRange = [10*x for x in range(1,10)]
+    sineFeaturesRange = [True,False]    
+    sineWeightRange = [0.1,0.25,0.5,0.75,1.0]
+    
+    # Test 1
+    inputFile = '../data/20121227b/Auth/mash_commands'
+    inputDir = '../data/20121227b/Auth/'
+    predictionFile = '../tmp/auth.pred'
+    logFile = '../tmp/auth.log'
+    learnTheories = True
+    theoryDefValPos = -7.5
+    theoryDefValNeg = -15.0
+    theoryPosWeight = 10.0
+    NBDefaultPriorWeight = 20.0
+    NBDefVal =- 15.0
+    NBPosWeight = 10.0
+    sineFeatures = True
+    sineWeight =  0.5
+
+    task_queue = Queue()
+    done_queue = Queue()
+
+    runs = 0
+    for inputDir in ['../data/20121227b/Auth/']:
+        problemId = inputDir.split('/')[-2]
+        inputFile = os.path.join(inputDir,'mash_commands')
+        predictionFile = os.path.join('../tmp/',problemId+'.pred')
+        logFile = os.path.join('../tmp/',problemId+'.log')
+        learnTheories = True
+        theoryDefValPos = -7.5
+        theoryDefValNeg = -15.0
+        theoryPosWeight = 10.0
+        
+        bestAvgRecall100 = 0.0
+        bestNBDefaultPriorWeight = 1.0
+        bestNBDefVal = 1.0
+        bestNBPosWeight = 1.0
+        bestSineFeatures = False
+        bestSineWeight = 0.0
+        bestlearnTheories = True
+        besttheoryDefValPos = 1.0 
+        besttheoryDefValNeg = -15.0
+        besttheoryPosWeight = 5.0
+        for theoryPosWeight in theoryPosWeightRange:
+            for theoryDefValNeg in theoryDefValNegRange:
+                for NBDefaultPriorWeight in NBDefaultPriorWeightRange:
+                    for NBDefVal in NBDefValRange:
+                        for NBPosWeight in NBPosWeightRange:
+                            for sineFeatures in sineFeaturesRange:
+                                if sineFeatures:
+                                    for sineWeight in sineWeightRange:  
+                                        localLogFile = logFile+str(runs)                           
+                                        task_queue.put((run_mash,(runs,inputDir, localLogFile, predictionFile, learnTheories, theoryDefValPos, theoryDefValNeg, theoryPosWeight, NBDefaultPriorWeight, NBDefVal, NBPosWeight, sineFeatures, sineWeight)))
+                                        runs += 1
+                                else:
+                                    localLogFile = logFile+str(runs)
+                                    task_queue.put((run_mash,(runs,inputDir, localLogFile, predictionFile, learnTheories, theoryDefValPos, theoryDefValNeg, theoryPosWeight, NBDefaultPriorWeight, NBDefVal, NBPosWeight, sineFeatures, sineWeight)))
+                                    runs += 1
+        # Start worker processes
+        processes = []
+        for _i in range(cores):
+            process = Process(target=worker, args=(task_queue, done_queue))
+            process.start()
+            processes.append(process)
+    
+        for _i in range(runs):      
+            learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+             sineFeatures,sineWeight,\
+             avgAuc,avgRecall100,avgTheoryPrecision,avgTheoryRecall100,avgTheoryRecall,avgTheoryPredictedPercent  = done_queue.get()
+            bestlearnTheories,besttheoryDefValPos,besttheoryDefValNeg,besttheoryPosWeight,bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight = update_best_params(avgRecall100,bestAvgRecall100,\
+                       bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight,\
+                       bestlearnTheories,besttheoryDefValPos,besttheoryDefValNeg,besttheoryPosWeight,\
+                       NBDefaultPriorWeight,NBDefVal,NBPosWeight,sineFeatures,sineWeight,\
+                       learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight)              
+        print 'bestAvgRecall100 %s bestNBDefaultPriorWeight %s bestNBDefVal %s bestNBPosWeight %s bestSineFeatures %s bestSineWeight %s',bestAvgRecall100,bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight
+    
\ No newline at end of file
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/theoryStats.py	Thu Jan 17 17:55:02 2013 +0100
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/theoryStats.py	Thu Jan 17 17:55:03 2013 +0100
@@ -29,28 +29,35 @@
         self.recall100 = 0
         self.recall = 0.0
         self.predicted = 0.0
+        self.predictedPercent = 0.0
     
-    def update(self,currentTheory,predictedTheories,usedTheories):
+    def update(self,currentTheory,predictedTheories,usedTheories,nrAvailableTheories):
         self.count += 1
         allPredTheories = predictedTheories.union([currentTheory])
         if set(usedTheories).issubset(allPredTheories):
             self.recall100 += 1
         localPredicted = len(allPredTheories)
         self.predicted += localPredicted 
+        localPredictedPercent = float(localPredicted)/nrAvailableTheories
+        self.predictedPercent += localPredictedPercent 
         localPrec = float(len(set(usedTheories).intersection(allPredTheories))) / localPredicted
         self.precision += localPrec
-        localRecall = float(len(set(usedTheories).intersection(allPredTheories))) / len(set(usedTheories))
+        if len(set(usedTheories)) == 0:
+            localRecall = 1.0
+        else:
+            localRecall = float(len(set(usedTheories).intersection(allPredTheories))) / len(set(usedTheories))
         self.recall += localRecall
         self.logger.info('Theory prediction results:')
-        self.logger.info('Problem: %s \t Recall100: %s \t Precision: %s \t Recall: %s \t PredictedTeories: %s',\
-                         self.count,self.recall100,round(localPrec,2),round(localRecall,2),localPredicted)
+        self.logger.info('Problem: %s \t Recall100: %s \t Precision: %s \t Recall: %s \t PredictedTeoriesPercent: %s PredictedTeories: %s',\
+                         self.count,self.recall100,round(localPrec,2),round(localRecall,2),round(localPredictedPercent,2),localPredicted)
         
     def printAvg(self):
         self.logger.info('Average theory results:')
-        self.logger.info('avgPrecision: %s \t avgRecall100: %s \t avgRecall: %s \t avgPredicted:%s', \
+        self.logger.info('avgPrecision: %s \t avgRecall100: %s \t avgRecall: %s \t avgPredictedPercent: %s \t avgPredicted: %s', \
                          round(self.precision/self.count,2),\
                          round(float(self.recall100)/self.count,2),\
                          round(self.recall/self.count,2),\
+                         round(self.predictedPercent /self.count,2),\
                          round(self.predicted /self.count,2))
         
     def save(self,fileName):
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Thu Jan 17 17:55:02 2013 +0100
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Thu Jan 17 17:55:03 2013 +0100
@@ -150,8 +150,7 @@
     val core =
       "--inputFile " ^ cmd_file ^ " --predictions " ^ sugg_file ^
       " --numberOfPredictions " ^ string_of_int max_suggs ^
-      " --NBSinePrior" (* --learnTheories *) ^
-      (if save then " --saveModel" else "")
+      (* " --learnTheories" ^ *) (if save then " --saveModel" else "")
     val command =
       "\"$ISABELLE_SLEDGEHAMMER_MASH/src/mash.py\" --quiet --outputDir " ^
       File.shell_path (mash_model_dir ()) ^ " --log " ^ log_file ^ " " ^ core ^