killed Python version of MaSh, now that the SML version works adequately
authorblanchet
Sun Jun 29 18:28:27 2014 +0200 (2014-06-29)
changeset 5743102c408aed5ee
parent 57430 020cea57eaa4
child 57432 78d7fbe9b203
killed Python version of MaSh, now that the SML version works adequately
NEWS
src/Doc/Sledgehammer/document/root.tex
src/HOL/TPTP/MaSh_Export.thy
src/HOL/TPTP/mash_eval.ML
src/HOL/TPTP/mash_export.ML
src/HOL/Tools/Sledgehammer/MaSh/etc/settings
src/HOL/Tools/Sledgehammer/MaSh/src/ExpandFeatures.py
src/HOL/Tools/Sledgehammer/MaSh/src/KNN.py
src/HOL/Tools/Sledgehammer/MaSh/src/KNNs.py
src/HOL/Tools/Sledgehammer/MaSh/src/argparse.py
src/HOL/Tools/Sledgehammer/MaSh/src/compareStats.py
src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py
src/HOL/Tools/Sledgehammer/MaSh/src/mash.py
src/HOL/Tools/Sledgehammer/MaSh/src/parameters.py
src/HOL/Tools/Sledgehammer/MaSh/src/predefined.py
src/HOL/Tools/Sledgehammer/MaSh/src/readData.py
src/HOL/Tools/Sledgehammer/MaSh/src/server.py
src/HOL/Tools/Sledgehammer/MaSh/src/singleNaiveBayes.py
src/HOL/Tools/Sledgehammer/MaSh/src/snow.py
src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py
src/HOL/Tools/Sledgehammer/MaSh/src/spawnDaemon.py
src/HOL/Tools/Sledgehammer/MaSh/src/stats.py
src/HOL/Tools/Sledgehammer/MaSh/src/tester.py
src/HOL/Tools/Sledgehammer/MaSh/src/theoryModels.py
src/HOL/Tools/Sledgehammer/MaSh/src/theoryStats.py
src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML
src/HOL/Tools/etc/options
     1.1 --- a/NEWS	Sat Jun 28 22:13:23 2014 +0200
     1.2 +++ b/NEWS	Sun Jun 29 18:28:27 2014 +0200
     1.3 @@ -433,9 +433,7 @@
     1.4          and increase performance and reliability.
     1.5        - MaSh and MeSh are now used by default together with the traditional
     1.6          MePo (Meng-Paulson) relevance filter. To disable MaSh, set the "MaSh"
     1.7 -        system option in Plugin Options / Isabelle / General to "none". Other
     1.8 -        allowed values include "sml" (for the default SML engine) and "py"
     1.9 -        (for the old Python engine).
    1.10 +        system option in Plugin Options / Isabelle / General to "none".
    1.11    - New option:
    1.12        smt_proofs
    1.13    - Renamed options:
     2.1 --- a/src/Doc/Sledgehammer/document/root.tex	Sat Jun 28 22:13:23 2014 +0200
     2.2 +++ b/src/Doc/Sledgehammer/document/root.tex	Sun Jun 29 18:28:27 2014 +0200
     2.3 @@ -1059,14 +1059,11 @@
     2.4  The MaSh machine learner. Three learning engines are provided:
     2.5  
     2.6  \begin{enum}
     2.7 -\item[\labelitemi] \textbf{\textit{sml\_nb}} (also called \textbf{\textit{sml}}
     2.8 +\item[\labelitemi] \textbf{\textit{nb}} (also called \textbf{\textit{sml}}
     2.9  and \textbf{\textit{yes}}) is a Standard ML implementation of naive Bayes.
    2.10  
    2.11 -\item[\labelitemi] \textbf{\textit{sml\_knn}} is a Standard ML implementation of
    2.12 +\item[\labelitemi] \textbf{\textit{knn}} is a Standard ML implementation of
    2.13  $k$-nearest neighbors.
    2.14 -
    2.15 -\item[\labelitemi] \textbf{\textit{py}} is a Python implementation of naive Bayes.
    2.16 -The program is included with Isabelle as \texttt{mash.py}.
    2.17  \end{enum}
    2.18  
    2.19  In addition, the special value \textit{none} is used to disable machine learning by
    2.20 @@ -1077,10 +1074,7 @@
    2.21  \texttt{\$ISABELLE\_HOME\_USER/etc/settings} file, or via the ``MaSh'' option
    2.22  under ``Plugins > Plugin Options > Isabelle > General'' in Isabelle/jEdit.
    2.23  Persistent data for both engines is stored in the directory
    2.24 -\texttt{\$ISABELLE\_HOME\_USER/mash}. When switching to the \textit{py} engine,
    2.25 -you will need to invoke the \textit{relearn\_isar} subcommand
    2.26 -(\S\ref{sledgehammer}) to synchronize the persistent databases on the
    2.27 -Python side.
    2.28 +\texttt{\$ISABELLE\_HOME\_USER/mash}.
    2.29  
    2.30  \item[\labelitemi] \textbf{\textit{mesh}:} The MeSh filter, which combines the
    2.31  rankings from MePo and MaSh.
     3.1 --- a/src/HOL/TPTP/MaSh_Export.thy	Sat Jun 28 22:13:23 2014 +0200
     3.2 +++ b/src/HOL/TPTP/MaSh_Export.thy	Sun Jun 29 18:28:27 2014 +0200
     3.3 @@ -46,22 +46,22 @@
     3.4    ()
     3.5  *}
     3.6  
     3.7 -ML {* Options.put_default @{system_option MaSh} "sml_nb" *}
     3.8 +ML {* Options.put_default @{system_option MaSh} "nb" *}
     3.9  
    3.10  ML {*
    3.11  if do_it then
    3.12    generate_mash_suggestions @{context} params (range, step) thys max_suggestions
    3.13 -    (prefix ^ "mash_sml_nb_suggestions")
    3.14 +    (prefix ^ "mash_nb_suggestions")
    3.15  else
    3.16    ()
    3.17  *}
    3.18  
    3.19 -ML {* Options.put_default @{system_option MaSh} "sml_knn" *}
    3.20 +ML {* Options.put_default @{system_option MaSh} "knn" *}
    3.21  
    3.22  ML {*
    3.23  if do_it then
    3.24    generate_mash_suggestions @{context} params (range, step) thys max_suggestions
    3.25 -    (prefix ^ "mash_sml_knn_suggestions")
    3.26 +    (prefix ^ "mash_knn_suggestions")
    3.27  else
    3.28    ()
    3.29  *}
     4.1 --- a/src/HOL/TPTP/mash_eval.ML	Sat Jun 28 22:13:23 2014 +0200
     4.2 +++ b/src/HOL/TPTP/mash_eval.ML	Sun Jun 29 18:28:27 2014 +0200
     4.3 @@ -29,6 +29,7 @@
     4.4  open Sledgehammer_Prover
     4.5  open Sledgehammer_Prover_ATP
     4.6  open Sledgehammer_Commands
     4.7 +open MaSh_Export
     4.8  
     4.9  val prefix = Library.prefix
    4.10  
    4.11 @@ -38,9 +39,6 @@
    4.12  val MeSh_ProverN = MeShN ^ "-Prover"
    4.13  val IsarN = "Isar"
    4.14  
    4.15 -fun in_range (from, to) j =
    4.16 -  j >= from andalso (to = NONE orelse j <= the to)
    4.17 -
    4.18  fun evaluate_mash_suggestions ctxt params range methods prob_dir_name mepo_file_name
    4.19      mash_isar_file_name mash_prover_file_name mesh_isar_file_name mesh_prover_file_name
    4.20      report_file_name =
     5.1 --- a/src/HOL/TPTP/mash_export.ML	Sat Jun 28 22:13:23 2014 +0200
     5.2 +++ b/src/HOL/TPTP/mash_export.ML	Sun Jun 29 18:28:27 2014 +0200
     5.3 @@ -9,6 +9,9 @@
     5.4  sig
     5.5    type params = Sledgehammer_Prover.params
     5.6  
     5.7 +  val in_range : int * int option -> int -> bool
     5.8 +  val extract_suggestions : string -> string * (string * real) list
     5.9 +
    5.10    val generate_accessibility : Proof.context -> theory list -> string -> unit
    5.11    val generate_features : Proof.context -> theory list -> string -> unit
    5.12    val generate_isar_dependencies : Proof.context -> int * int option -> theory list -> string ->
    5.13 @@ -37,6 +40,18 @@
    5.14  fun in_range (from, to) j =
    5.15    j >= from andalso (to = NONE orelse j <= the to)
    5.16  
    5.17 +(* The suggested weights do not make much sense. *)
    5.18 +fun extract_suggestion sugg =
    5.19 +  (case space_explode "=" sugg of
    5.20 +    [name, weight] => SOME (decode_str name, Real.fromString weight |> the_default 1.0)
    5.21 +  | [name] => SOME (decode_str name, 1.0)
    5.22 +  | _ => NONE)
    5.23 +
    5.24 +fun extract_suggestions line =
    5.25 +  (case space_explode ":" line of
    5.26 +    [goal, suggs] => (decode_str goal, map_filter extract_suggestion (space_explode " " suggs))
    5.27 +  | _ => ("", []))
    5.28 +
    5.29  fun has_thm_thy th thy =
    5.30    Context.theory_name thy = Context.theory_name (theory_of_thm th)
    5.31  
    5.32 @@ -265,11 +280,11 @@
    5.33         #> Sledgehammer_MePo.mepo_suggested_facts ctxt params max_suggs NONE hyp_ts concl_t)
    5.34  
    5.35  fun generate_mash_suggestions ctxt params =
    5.36 -  (Sledgehammer_MaSh.mash_unlearn ctxt params;
    5.37 +  (Sledgehammer_MaSh.mash_unlearn ();
    5.38     generate_mepo_or_mash_suggestions
    5.39       (fn ctxt => fn thy => fn params as {provers = prover :: _, ...} => fn max_suggs => fn hyp_ts =>
    5.40            fn concl_t =>
    5.41 -        tap (Sledgehammer_MaSh.mash_learn_facts ctxt params prover true 2 false
    5.42 +        tap (Sledgehammer_MaSh.mash_learn_facts ctxt params prover 2 false
    5.43            Sledgehammer_Util.one_year)
    5.44          #> Sledgehammer_MaSh.mash_suggested_facts ctxt thy params max_suggs hyp_ts concl_t
    5.45          #> fst) ctxt params)
     6.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/etc/settings	Sat Jun 28 22:13:23 2014 +0200
     6.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.3 @@ -1,8 +0,0 @@
     6.4 -# -*- shell-script -*- :mode=shellscript:
     6.5 -
     6.6 -ISABELLE_SLEDGEHAMMER_MASH="$COMPONENT"
     6.7 -
     6.8 -# MASH=yes
     6.9 -if [ -z "$MASH_PORT" ]; then
    6.10 -  MASH_PORT=9255
    6.11 -fi
     7.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/ExpandFeatures.py	Sat Jun 28 22:13:23 2014 +0200
     7.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.3 @@ -1,163 +0,0 @@
     7.4 -'''
     7.5 -Created on Aug 21, 2013
     7.6 -
     7.7 -@author: daniel
     7.8 -'''
     7.9 -
    7.10 -from math import log
    7.11 -#from gensim import corpora, models, similarities
    7.12 -
    7.13 -class ExpandFeatures(object):
    7.14 -
    7.15 -    def __init__(self,dicts):
    7.16 -        self.dicts = dicts
    7.17 -        self.featureMap = {}
    7.18 -        self.alpha = 0.1
    7.19 -        self.featureCounts = {}
    7.20 -        self.counter = 0        
    7.21 -        self.corpus = []
    7.22 -#        self.LSIModel = models.lsimodel.LsiModel(self.corpus,num_topics=500)
    7.23 -
    7.24 -    def initialize(self,dicts):
    7.25 -        self.dicts = dicts
    7.26 -        IS = open(dicts.accFile,'r')
    7.27 -        for line in IS:
    7.28 -            line = line.split(':')
    7.29 -            name = line[0]
    7.30 -            #print 'name',name
    7.31 -            nameId = dicts.nameIdDict[name]    
    7.32 -            features = dicts.featureDict[nameId]
    7.33 -            dependencies = dicts.dependenciesDict[nameId]   
    7.34 -            x = [self.dicts.idNameDict[d] for d in dependencies]
    7.35 -            #print x  
    7.36 -            self.update(features, dependencies)
    7.37 -            self.corpus.append([(x,1) for x in features.keys()])
    7.38 -        IS.close()
    7.39 -        print 'x'
    7.40 -        #self.LSIModel = models.lsimodel.LsiModel(self.corpus,num_topics=500)
    7.41 -        print self.LSIModel
    7.42 -        print 'y'
    7.43 -        
    7.44 -    def update(self,features,dependencies):
    7.45 -        self.counter += 1
    7.46 -        self.corpus.append([(x,1) for x in features.keys()])
    7.47 -        self.LSIModel.add_documents([[(x,1) for x in features.keys()]])
    7.48 -        """
    7.49 -        for f in features.iterkeys():
    7.50 -            try:
    7.51 -                self.featureCounts[f] += 1
    7.52 -            except:
    7.53 -                self.featureCounts[f] = 1
    7.54 -            if self.featureCounts[f] > 100:
    7.55 -                continue
    7.56 -            try:
    7.57 -                self.featureMap[f] = self.featureMap[f].intersection(features.keys())
    7.58 -            except:
    7.59 -                self.featureMap[f] = set(features.keys())
    7.60 -            #print 'fOld',len(fMap),self.featureCounts[f],len(dependencies)
    7.61 -
    7.62 -            for d in dependencies[1:]:
    7.63 -                #print 'dep',self.dicts.idNameDict[d]
    7.64 -                dFeatures = self.dicts.featureDict[d]
    7.65 -                for df in dFeatures.iterkeys():
    7.66 -                    if self.featureCounts.has_key(df):
    7.67 -                        if self.featureCounts[df] > 20:
    7.68 -                            continue
    7.69 -                    else:
    7.70 -                        print df
    7.71 -                    try:
    7.72 -                        fMap[df] += self.alpha * (1.0 - fMap[df])
    7.73 -                    except:
    7.74 -                        fMap[df] = self.alpha
    7.75 -            """
    7.76 -            #print 'fNew',len(fMap)
    7.77 -            
    7.78 -    def expand(self,features):
    7.79 -        #print self.corpus[:50]        
    7.80 -        #print corpus
    7.81 -        #tfidfmodel = models.TfidfModel(self.corpus, normalize=True)        
    7.82 -        #print features.keys()        
    7.83 -        #tfidfcorpus = [tfidfmodel[x] for x in self.corpus]
    7.84 -        #newFeatures = LSI[[(x,1) for x in features.keys()]]
    7.85 -        #newFeatures = self.LSIModel[[(x,1) for x in features.keys()]]
    7.86 -        #print features
    7.87 -        #print newFeatures
    7.88 -        #print newFeatures
    7.89 -        
    7.90 -        """
    7.91 -        newFeatures = dict(features)
    7.92 -        for f in features.keys():
    7.93 -            try:
    7.94 -                fC = self.featureCounts[f]
    7.95 -            except:
    7.96 -                fC = 0.5
    7.97 -            newFeatures[f] = log(float(8+self.counter) / fC)
    7.98 -        #nrOfFeatures = float(len(features))
    7.99 -        addedCount = 0
   7.100 -        alpha = 0.2
   7.101 -        #"""
   7.102 -        
   7.103 -        """
   7.104 -        consideredFeatures = []
   7.105 -        while len(newFeatures) < 30:
   7.106 -            #alpha = alpha * 0.5
   7.107 -            minF = None
   7.108 -            minFrequence = 1000000
   7.109 -            for f in newFeatures.iterkeys():
   7.110 -                if f in consideredFeatures:
   7.111 -                    continue
   7.112 -                try:
   7.113 -                    if self.featureCounts[f] < minFrequence:
   7.114 -                        minF = f
   7.115 -                except:
   7.116 -                    pass
   7.117 -            if minF == None:
   7.118 -                break
   7.119 -            # Expand minimal feature
   7.120 -            consideredFeatures.append(minF)
   7.121 -            for expF in self.featureMap[minF]:
   7.122 -                if not newFeatures.has_key(expF):
   7.123 -                    fC = self.featureCounts[minF]
   7.124 -                    newFeatures[expF] = alpha*log(float(8+self.counter) / fC)
   7.125 -        #print features, newFeatures
   7.126 -        #"""
   7.127 -        """
   7.128 -        for f in features.iterkeys():
   7.129 -            try:
   7.130 -                self.featureCounts[f] += 1
   7.131 -            except:
   7.132 -                self.featureCounts[f] = 0            
   7.133 -            if self.featureCounts[f] > 10:
   7.134 -                continue            
   7.135 -            addedCount += 1
   7.136 -            try:
   7.137 -                fmap = self.featureMap[f]
   7.138 -            except:
   7.139 -                self.featureMap[f] = {}
   7.140 -                fmap = {}
   7.141 -            for nf,nv in fmap.iteritems():
   7.142 -                try:
   7.143 -                    newFeatures[nf] += nv
   7.144 -                except:
   7.145 -                    newFeatures[nf] = nv
   7.146 -        if addedCount > 0: 
   7.147 -            for f,w in newFeatures.iteritems():
   7.148 -                newFeatures[f] = float(w)/addedCount
   7.149 -        #"""                    
   7.150 -        """
   7.151 -        deleteF = []
   7.152 -        for f,w in newFeatures.iteritems():
   7.153 -            if w < 0.1:
   7.154 -                deleteF.append(f)
   7.155 -        for f in deleteF:
   7.156 -            del newFeatures[f]
   7.157 -        """
   7.158 -        #print 'fold',len(features)
   7.159 -        #print 'fnew',len(newFeatures)
   7.160 -        #return dict(newFeatures)
   7.161 -        return features
   7.162 -
   7.163 -if __name__ == "__main__":
   7.164 -    pass
   7.165 -    
   7.166 -        
   7.167 \ No newline at end of file
     8.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/KNN.py	Sat Jun 28 22:13:23 2014 +0200
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,99 +0,0 @@
     8.4 -'''
     8.5 -Created on Aug 21, 2013
     8.6 -
     8.7 -@author: daniel
     8.8 -'''
     8.9 -
    8.10 -from cPickle import dump,load
    8.11 -from numpy import array
    8.12 -from math import sqrt,log
    8.13 -
    8.14 -def cosine(f1,f2):
    8.15 -    f1Norm = 0.0
    8.16 -    for f in f1.keys():
    8.17 -        f1Norm += f1[f] * f1[f]
    8.18 -    #assert f1Norm = sum(map(lambda x,y: x*y,f1.itervalues(),f1.itervalues()))
    8.19 -    f1Norm = sqrt(f1Norm) 
    8.20 -    
    8.21 -    f2Norm = 0.0
    8.22 -    for f in f2.keys():
    8.23 -        f2Norm += f2[f] * f2[f]
    8.24 -    f2Norm = sqrt(f2Norm)         
    8.25 -   
    8.26 -    dotProduct = 0.0
    8.27 -    featureIntersection = set(f1.keys()) & set(f2.keys())
    8.28 -    for f in featureIntersection:
    8.29 -            dotProduct += f1[f] * f2[f]
    8.30 -    cosine = dotProduct / (f1Norm * f2Norm)        
    8.31 -    return 1.0 - cosine
    8.32 -
    8.33 -def euclidean(f1,f2):
    8.34 -    diffSum = 0.0        
    8.35 -    featureUnion = set(f1.keys()) | set(f2.keys())
    8.36 -    for f in featureUnion:
    8.37 -        try:
    8.38 -            f1Val = f1[f]
    8.39 -        except:
    8.40 -            f1Val = 0.0
    8.41 -        try:
    8.42 -            f2Val = f2[f]
    8.43 -        except:
    8.44 -            f2Val = 0.0
    8.45 -        diff = f1Val - f2Val
    8.46 -        diffSum += diff * diff
    8.47 -        #if f in f1.keys():
    8.48 -        #    diffSum += log(2+self.pointCount/self.featureCounts[f]) * diff * diff
    8.49 -        #else:
    8.50 -        #    diffSum += diff * diff            
    8.51 -    #print diffSum,f1,f2
    8.52 -    return diffSum
    8.53 -
    8.54 -class KNN(object):
    8.55 -    '''
    8.56 -    A basic KNN ranker.
    8.57 -    '''
    8.58 -
    8.59 -    def __init__(self,dicts,metric=cosine):
    8.60 -        '''
    8.61 -        Constructor
    8.62 -        '''
    8.63 -        self.points = dicts.featureDict
    8.64 -        self.metric = metric
    8.65 -
    8.66 -    def initializeModel(self,_trainData,_dicts):  
    8.67 -        """
    8.68 -        Build basic model from training data.
    8.69 -        """
    8.70 -        pass
    8.71 -    
    8.72 -    def update(self,dataPoint,features,dependencies):
    8.73 -        assert self.points[dataPoint] == features
    8.74 -        
    8.75 -    def overwrite(self,problemId,newDependencies,dicts):
    8.76 -        # Taken care of by dicts
    8.77 -        pass
    8.78 -    
    8.79 -    def delete(self,dataPoint,features,dependencies):
    8.80 -        # Taken care of by dicts
    8.81 -        pass      
    8.82 -    
    8.83 -    def predict(self,features,accessibles,dicts):
    8.84 -        predictions = map(lambda x: self.metric(features,self.points[x]),accessibles)
    8.85 -        predictions = array(predictions)
    8.86 -        perm = predictions.argsort()
    8.87 -        return array(accessibles)[perm],predictions[perm]
    8.88 -    
    8.89 -    def save(self,fileName):
    8.90 -        OStream = open(fileName, 'wb')
    8.91 -        dump((self.points,self.metric),OStream)
    8.92 -        OStream.close()
    8.93 -
    8.94 -    def load(self,fileName):
    8.95 -        OStream = open(fileName, 'rb')
    8.96 -        self.points,self.metric = load(OStream)
    8.97 -        OStream.close()
    8.98 -
    8.99 -if __name__ == '__main__':
   8.100 -    pass    
   8.101 -        
   8.102 -        
   8.103 \ No newline at end of file
     9.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/KNNs.py	Sat Jun 28 22:13:23 2014 +0200
     9.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.3 @@ -1,105 +0,0 @@
     9.4 -'''
     9.5 -Created on Aug 21, 2013
     9.6 -
     9.7 -@author: daniel
     9.8 -'''
     9.9 -
    9.10 -from math import log
    9.11 -from KNN import KNN,cosine
    9.12 -from numpy import array
    9.13 -
    9.14 -class KNNAdaptPointFeatures(KNN):
    9.15 -    
    9.16 -    def __init__(self,dicts,metric=cosine,alpha = 0.05):
    9.17 -        self.points = dicts.featureDict
    9.18 -        self.metric = self.euclidean    
    9.19 -        self.alpha = alpha
    9.20 -        self.count = 0
    9.21 -        self.featureCount = {}
    9.22 -
    9.23 -    def initializeModel(self,trainData,dicts):  
    9.24 -        """
    9.25 -        Build basic model from training data.
    9.26 -        """
    9.27 -        IS = open(dicts.accFile,'r')
    9.28 -        for line in IS:
    9.29 -            line = line.split(':')
    9.30 -            name = line[0]
    9.31 -            nameId = dicts.nameIdDict[name]
    9.32 -            features = dicts.featureDict[nameId]
    9.33 -            dependencies = dicts.dependenciesDict[nameId] 
    9.34 -            self.update(nameId, features, dependencies)
    9.35 -        IS.close()
    9.36 -        
    9.37 -    def update(self,dataPoint,features,dependencies):
    9.38 -        self.count += 1
    9.39 -        for f in features.iterkeys():
    9.40 -            try:
    9.41 -                self.featureCount[f] += 1
    9.42 -            except:
    9.43 -                self.featureCount[f] = 1
    9.44 -        for d in dependencies:
    9.45 -            dFeatures = self.points[d]
    9.46 -            featureUnion = set(dFeatures.keys()) | set(features.keys())
    9.47 -            for f in featureUnion:
    9.48 -                try:
    9.49 -                    pVal = features[f]
    9.50 -                except:
    9.51 -                    pVal = 0.0
    9.52 -                try:
    9.53 -                    dVal = dFeatures[f]
    9.54 -                except:
    9.55 -                    dVal = 0.0
    9.56 -                newDVal = dVal + self.alpha * (pVal - dVal)                
    9.57 -                dFeatures[f] = newDVal           
    9.58 -        
    9.59 -    def euclidean(self,f1,f2):
    9.60 -        diffSum = 0.0        
    9.61 -        f1Set = set(f1.keys())
    9.62 -        featureUnion = f1Set | set(f2.keys())
    9.63 -        for f in featureUnion:
    9.64 -            if not self.featureCount.has_key(f):
    9.65 -                continue
    9.66 -            if self.featureCount[f] == 1:
    9.67 -                continue
    9.68 -            try:
    9.69 -                f1Val = f1[f]
    9.70 -            except:
    9.71 -                f1Val = 0.0
    9.72 -            try:
    9.73 -                f2Val = f2[f]
    9.74 -            except:
    9.75 -                f2Val = 0.0
    9.76 -            diff = f1Val - f2Val
    9.77 -            diffSum += diff * diff
    9.78 -            if f in f1Set:
    9.79 -                diffSum += log(2+self.count/self.featureCount[f]) * diff * diff
    9.80 -            else:
    9.81 -                diffSum += diff * diff            
    9.82 -        #print diffSum,f1,f2
    9.83 -        return diffSum 
    9.84 -
    9.85 -class KNNUrban(KNN):
    9.86 -    def __init__(self,dicts,metric=cosine,nrOfNeighbours = 40):
    9.87 -        self.points = dicts.featureDict
    9.88 -        self.metric = metric    
    9.89 -        self.nrOfNeighbours = nrOfNeighbours # Ignored at the moment
    9.90 -    
    9.91 -    def predict(self,features,accessibles,dicts):
    9.92 -        predictions = map(lambda x: self.metric(features,self.points[x]),accessibles)
    9.93 -        pDict = dict(zip(accessibles,predictions))
    9.94 -        for a,p in zip(accessibles,predictions):
    9.95 -            aDeps = dicts.dependenciesDict[a]
    9.96 -            for d in aDeps:
    9.97 -                pDict[d] -= p 
    9.98 -        predictions = []
    9.99 -        names = []
   9.100 -        for n,p in pDict.items():
   9.101 -            predictions.append(p)
   9.102 -            names.append(n)        
   9.103 -        predictions = array(predictions)
   9.104 -        perm = predictions.argsort()
   9.105 -        return array(names)[perm],predictions[perm]
   9.106 -    
   9.107 -    
   9.108 -         
   9.109 \ No newline at end of file
    10.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/argparse.py	Sat Jun 28 22:13:23 2014 +0200
    10.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.3 @@ -1,2357 +0,0 @@
    10.4 -#     Title:      HOL/Tools/Sledgehammer/MaSh/src/argparse.py
    10.5 -#
    10.6 -# Argument parser. See copyright notice below.
    10.7 -
    10.8 -# -*- coding: utf-8 -*-
    10.9 -
   10.10 -# Copyright (C) 2006-2009 Steven J. Bethard <steven.bethard@gmail.com>.
   10.11 -#
   10.12 -# Licensed under the Apache License, Version 2.0 (the "License"); you may not
   10.13 -# use this file except in compliance with the License. You may obtain a copy
   10.14 -# of the License at
   10.15 -#
   10.16 -#     http://www.apache.org/licenses/LICENSE-2.0
   10.17 -#
   10.18 -# Unless required by applicable law or agreed to in writing, software
   10.19 -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
   10.20 -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
   10.21 -# License for the specific language governing permissions and limitations
   10.22 -# under the License.
   10.23 -
   10.24 -"""Command-line parsing library
   10.25 -
   10.26 -This module is an optparse-inspired command-line parsing library that:
   10.27 -
   10.28 -    - handles both optional and positional arguments
   10.29 -    - produces highly informative usage messages
   10.30 -    - supports parsers that dispatch to sub-parsers
   10.31 -
   10.32 -The following is a simple usage example that sums integers from the
   10.33 -command-line and writes the result to a file::
   10.34 -
   10.35 -    parser = argparse.ArgumentParser(
   10.36 -        description='sum the integers at the command line')
   10.37 -    parser.add_argument(
   10.38 -        'integers', metavar='int', nargs='+', type=int,
   10.39 -        help='an integer to be summed')
   10.40 -    parser.add_argument(
   10.41 -        '--log', default=sys.stdout, type=argparse.FileType('w'),
   10.42 -        help='the file where the sum should be written')
   10.43 -    args = parser.parse_args()
   10.44 -    args.log.write('%s' % sum(args.integers))
   10.45 -    args.log.close()
   10.46 -
   10.47 -The module contains the following public classes:
   10.48 -
   10.49 -    - ArgumentParser -- The main entry point for command-line parsing. As the
   10.50 -        example above shows, the add_argument() method is used to populate
   10.51 -        the parser with actions for optional and positional arguments. Then
   10.52 -        the parse_args() method is invoked to convert the args at the
   10.53 -        command-line into an object with attributes.
   10.54 -
   10.55 -    - ArgumentError -- The exception raised by ArgumentParser objects when
   10.56 -        there are errors with the parser's actions. Errors raised while
   10.57 -        parsing the command-line are caught by ArgumentParser and emitted
   10.58 -        as command-line messages.
   10.59 -
   10.60 -    - FileType -- A factory for defining types of files to be created. As the
   10.61 -        example above shows, instances of FileType are typically passed as
   10.62 -        the type= argument of add_argument() calls.
   10.63 -
   10.64 -    - Action -- The base class for parser actions. Typically actions are
   10.65 -        selected by passing strings like 'store_true' or 'append_const' to
   10.66 -        the action= argument of add_argument(). However, for greater
   10.67 -        customization of ArgumentParser actions, subclasses of Action may
   10.68 -        be defined and passed as the action= argument.
   10.69 -
   10.70 -    - HelpFormatter, RawDescriptionHelpFormatter, RawTextHelpFormatter,
   10.71 -        ArgumentDefaultsHelpFormatter -- Formatter classes which
   10.72 -        may be passed as the formatter_class= argument to the
   10.73 -        ArgumentParser constructor. HelpFormatter is the default,
   10.74 -        RawDescriptionHelpFormatter and RawTextHelpFormatter tell the parser
   10.75 -        not to change the formatting for help text, and
   10.76 -        ArgumentDefaultsHelpFormatter adds information about argument defaults
   10.77 -        to the help.
   10.78 -
   10.79 -All other classes in this module are considered implementation details.
   10.80 -(Also note that HelpFormatter and RawDescriptionHelpFormatter are only
   10.81 -considered public as object names -- the API of the formatter objects is
   10.82 -still considered an implementation detail.)
   10.83 -"""
   10.84 -
   10.85 -__version__ = '1.1'
   10.86 -__all__ = [
   10.87 -    'ArgumentParser',
   10.88 -    'ArgumentError',
   10.89 -    'Namespace',
   10.90 -    'Action',
   10.91 -    'FileType',
   10.92 -    'HelpFormatter',
   10.93 -    'RawDescriptionHelpFormatter',
   10.94 -    'RawTextHelpFormatter',
   10.95 -    'ArgumentDefaultsHelpFormatter',
   10.96 -]
   10.97 -
   10.98 -
   10.99 -import copy as _copy
  10.100 -import os as _os
  10.101 -import re as _re
  10.102 -import sys as _sys
  10.103 -import textwrap as _textwrap
  10.104 -
  10.105 -from gettext import gettext as _
  10.106 -
  10.107 -try:
  10.108 -    _set = set
  10.109 -except NameError:
  10.110 -    from sets import Set as _set
  10.111 -
  10.112 -try:
  10.113 -    _basestring = basestring
  10.114 -except NameError:
  10.115 -    _basestring = str
  10.116 -
  10.117 -try:
  10.118 -    _sorted = sorted
  10.119 -except NameError:
  10.120 -
  10.121 -    def _sorted(iterable, reverse=False):
  10.122 -        result = list(iterable)
  10.123 -        result.sort()
  10.124 -        if reverse:
  10.125 -            result.reverse()
  10.126 -        return result
  10.127 -
  10.128 -
  10.129 -def _callable(obj):
  10.130 -    return hasattr(obj, '__call__') or hasattr(obj, '__bases__')
  10.131 -
  10.132 -# silence Python 2.6 buggy warnings about Exception.message
  10.133 -if _sys.version_info[:2] == (2, 6):
  10.134 -    import warnings
  10.135 -    warnings.filterwarnings(
  10.136 -        action='ignore',
  10.137 -        message='BaseException.message has been deprecated as of Python 2.6',
  10.138 -        category=DeprecationWarning,
  10.139 -        module='argparse')
  10.140 -
  10.141 -
  10.142 -SUPPRESS = '==SUPPRESS=='
  10.143 -
  10.144 -OPTIONAL = '?'
  10.145 -ZERO_OR_MORE = '*'
  10.146 -ONE_OR_MORE = '+'
  10.147 -PARSER = 'A...'
  10.148 -REMAINDER = '...'
  10.149 -
  10.150 -# =============================
  10.151 -# Utility functions and classes
  10.152 -# =============================
  10.153 -
  10.154 -class _AttributeHolder(object):
  10.155 -    """Abstract base class that provides __repr__.
  10.156 -
  10.157 -    The __repr__ method returns a string in the format::
  10.158 -        ClassName(attr=name, attr=name, ...)
  10.159 -    The attributes are determined either by a class-level attribute,
  10.160 -    '_kwarg_names', or by inspecting the instance __dict__.
  10.161 -    """
  10.162 -
  10.163 -    def __repr__(self):
  10.164 -        type_name = type(self).__name__
  10.165 -        arg_strings = []
  10.166 -        for arg in self._get_args():
  10.167 -            arg_strings.append(repr(arg))
  10.168 -        for name, value in self._get_kwargs():
  10.169 -            arg_strings.append('%s=%r' % (name, value))
  10.170 -        return '%s(%s)' % (type_name, ', '.join(arg_strings))
  10.171 -
  10.172 -    def _get_kwargs(self):
  10.173 -        return _sorted(self.__dict__.items())
  10.174 -
  10.175 -    def _get_args(self):
  10.176 -        return []
  10.177 -
  10.178 -
  10.179 -def _ensure_value(namespace, name, value):
  10.180 -    if getattr(namespace, name, None) is None:
  10.181 -        setattr(namespace, name, value)
  10.182 -    return getattr(namespace, name)
  10.183 -
  10.184 -
  10.185 -# ===============
  10.186 -# Formatting Help
  10.187 -# ===============
  10.188 -
  10.189 -class HelpFormatter(object):
  10.190 -    """Formatter for generating usage messages and argument help strings.
  10.191 -
  10.192 -    Only the name of this class is considered a public API. All the methods
  10.193 -    provided by the class are considered an implementation detail.
  10.194 -    """
  10.195 -
  10.196 -    def __init__(self,
  10.197 -                 prog,
  10.198 -                 indent_increment=2,
  10.199 -                 max_help_position=24,
  10.200 -                 width=None):
  10.201 -
  10.202 -        # default setting for width
  10.203 -        if width is None:
  10.204 -            try:
  10.205 -                width = int(_os.environ['COLUMNS'])
  10.206 -            except (KeyError, ValueError):
  10.207 -                width = 80
  10.208 -            width -= 2
  10.209 -
  10.210 -        self._prog = prog
  10.211 -        self._indent_increment = indent_increment
  10.212 -        self._max_help_position = max_help_position
  10.213 -        self._width = width
  10.214 -
  10.215 -        self._current_indent = 0
  10.216 -        self._level = 0
  10.217 -        self._action_max_length = 0
  10.218 -
  10.219 -        self._root_section = self._Section(self, None)
  10.220 -        self._current_section = self._root_section
  10.221 -
  10.222 -        self._whitespace_matcher = _re.compile(r'\s+')
  10.223 -        self._long_break_matcher = _re.compile(r'\n\n\n+')
  10.224 -
  10.225 -    # ===============================
  10.226 -    # Section and indentation methods
  10.227 -    # ===============================
  10.228 -    def _indent(self):
  10.229 -        self._current_indent += self._indent_increment
  10.230 -        self._level += 1
  10.231 -
  10.232 -    def _dedent(self):
  10.233 -        self._current_indent -= self._indent_increment
  10.234 -        assert self._current_indent >= 0, 'Indent decreased below 0.'
  10.235 -        self._level -= 1
  10.236 -
  10.237 -    class _Section(object):
  10.238 -
  10.239 -        def __init__(self, formatter, parent, heading=None):
  10.240 -            self.formatter = formatter
  10.241 -            self.parent = parent
  10.242 -            self.heading = heading
  10.243 -            self.items = []
  10.244 -
  10.245 -        def format_help(self):
  10.246 -            # format the indented section
  10.247 -            if self.parent is not None:
  10.248 -                self.formatter._indent()
  10.249 -            join = self.formatter._join_parts
  10.250 -            for func, args in self.items:
  10.251 -                func(*args)
  10.252 -            item_help = join([func(*args) for func, args in self.items])
  10.253 -            if self.parent is not None:
  10.254 -                self.formatter._dedent()
  10.255 -
  10.256 -            # return nothing if the section was empty
  10.257 -            if not item_help:
  10.258 -                return ''
  10.259 -
  10.260 -            # add the heading if the section was non-empty
  10.261 -            if self.heading is not SUPPRESS and self.heading is not None:
  10.262 -                current_indent = self.formatter._current_indent
  10.263 -                heading = '%*s%s:\n' % (current_indent, '', self.heading)
  10.264 -            else:
  10.265 -                heading = ''
  10.266 -
  10.267 -            # join the section-initial newline, the heading and the help
  10.268 -            return join(['\n', heading, item_help, '\n'])
  10.269 -
  10.270 -    def _add_item(self, func, args):
  10.271 -        self._current_section.items.append((func, args))
  10.272 -
  10.273 -    # ========================
  10.274 -    # Message building methods
  10.275 -    # ========================
  10.276 -    def start_section(self, heading):
  10.277 -        self._indent()
  10.278 -        section = self._Section(self, self._current_section, heading)
  10.279 -        self._add_item(section.format_help, [])
  10.280 -        self._current_section = section
  10.281 -
  10.282 -    def end_section(self):
  10.283 -        self._current_section = self._current_section.parent
  10.284 -        self._dedent()
  10.285 -
  10.286 -    def add_text(self, text):
  10.287 -        if text is not SUPPRESS and text is not None:
  10.288 -            self._add_item(self._format_text, [text])
  10.289 -
  10.290 -    def add_usage(self, usage, actions, groups, prefix=None):
  10.291 -        if usage is not SUPPRESS:
  10.292 -            args = usage, actions, groups, prefix
  10.293 -            self._add_item(self._format_usage, args)
  10.294 -
  10.295 -    def add_argument(self, action):
  10.296 -        if action.help is not SUPPRESS:
  10.297 -
  10.298 -            # find all invocations
  10.299 -            get_invocation = self._format_action_invocation
  10.300 -            invocations = [get_invocation(action)]
  10.301 -            for subaction in self._iter_indented_subactions(action):
  10.302 -                invocations.append(get_invocation(subaction))
  10.303 -
  10.304 -            # update the maximum item length
  10.305 -            invocation_length = max([len(s) for s in invocations])
  10.306 -            action_length = invocation_length + self._current_indent
  10.307 -            self._action_max_length = max(self._action_max_length,
  10.308 -                                          action_length)
  10.309 -
  10.310 -            # add the item to the list
  10.311 -            self._add_item(self._format_action, [action])
  10.312 -
  10.313 -    def add_arguments(self, actions):
  10.314 -        for action in actions:
  10.315 -            self.add_argument(action)
  10.316 -
  10.317 -    # =======================
  10.318 -    # Help-formatting methods
  10.319 -    # =======================
  10.320 -    def format_help(self):
  10.321 -        help = self._root_section.format_help()
  10.322 -        if help:
  10.323 -            help = self._long_break_matcher.sub('\n\n', help)
  10.324 -            help = help.strip('\n') + '\n'
  10.325 -        return help
  10.326 -
  10.327 -    def _join_parts(self, part_strings):
  10.328 -        return ''.join([part
  10.329 -                        for part in part_strings
  10.330 -                        if part and part is not SUPPRESS])
  10.331 -
  10.332 -    def _format_usage(self, usage, actions, groups, prefix):
  10.333 -        if prefix is None:
  10.334 -            prefix = _('usage: ')
  10.335 -
  10.336 -        # if usage is specified, use that
  10.337 -        if usage is not None:
  10.338 -            usage = usage % dict(prog=self._prog)
  10.339 -
  10.340 -        # if no optionals or positionals are available, usage is just prog
  10.341 -        elif usage is None and not actions:
  10.342 -            usage = '%(prog)s' % dict(prog=self._prog)
  10.343 -
  10.344 -        # if optionals and positionals are available, calculate usage
  10.345 -        elif usage is None:
  10.346 -            prog = '%(prog)s' % dict(prog=self._prog)
  10.347 -
  10.348 -            # split optionals from positionals
  10.349 -            optionals = []
  10.350 -            positionals = []
  10.351 -            for action in actions:
  10.352 -                if action.option_strings:
  10.353 -                    optionals.append(action)
  10.354 -                else:
  10.355 -                    positionals.append(action)
  10.356 -
  10.357 -            # build full usage string
  10.358 -            format = self._format_actions_usage
  10.359 -            action_usage = format(optionals + positionals, groups)
  10.360 -            usage = ' '.join([s for s in [prog, action_usage] if s])
  10.361 -
  10.362 -            # wrap the usage parts if it's too long
  10.363 -            text_width = self._width - self._current_indent
  10.364 -            if len(prefix) + len(usage) > text_width:
  10.365 -
  10.366 -                # break usage into wrappable parts
  10.367 -                part_regexp = r'\(.*?\)+|\[.*?\]+|\S+'
  10.368 -                opt_usage = format(optionals, groups)
  10.369 -                pos_usage = format(positionals, groups)
  10.370 -                opt_parts = _re.findall(part_regexp, opt_usage)
  10.371 -                pos_parts = _re.findall(part_regexp, pos_usage)
  10.372 -                assert ' '.join(opt_parts) == opt_usage
  10.373 -                assert ' '.join(pos_parts) == pos_usage
  10.374 -
  10.375 -                # helper for wrapping lines
  10.376 -                def get_lines(parts, indent, prefix=None):
  10.377 -                    lines = []
  10.378 -                    line = []
  10.379 -                    if prefix is not None:
  10.380 -                        line_len = len(prefix) - 1
  10.381 -                    else:
  10.382 -                        line_len = len(indent) - 1
  10.383 -                    for part in parts:
  10.384 -                        if line_len + 1 + len(part) > text_width:
  10.385 -                            lines.append(indent + ' '.join(line))
  10.386 -                            line = []
  10.387 -                            line_len = len(indent) - 1
  10.388 -                        line.append(part)
  10.389 -                        line_len += len(part) + 1
  10.390 -                    if line:
  10.391 -                        lines.append(indent + ' '.join(line))
  10.392 -                    if prefix is not None:
  10.393 -                        lines[0] = lines[0][len(indent):]
  10.394 -                    return lines
  10.395 -
  10.396 -                # if prog is short, follow it with optionals or positionals
  10.397 -                if len(prefix) + len(prog) <= 0.75 * text_width:
  10.398 -                    indent = ' ' * (len(prefix) + len(prog) + 1)
  10.399 -                    if opt_parts:
  10.400 -                        lines = get_lines([prog] + opt_parts, indent, prefix)
  10.401 -                        lines.extend(get_lines(pos_parts, indent))
  10.402 -                    elif pos_parts:
  10.403 -                        lines = get_lines([prog] + pos_parts, indent, prefix)
  10.404 -                    else:
  10.405 -                        lines = [prog]
  10.406 -
  10.407 -                # if prog is long, put it on its own line
  10.408 -                else:
  10.409 -                    indent = ' ' * len(prefix)
  10.410 -                    parts = opt_parts + pos_parts
  10.411 -                    lines = get_lines(parts, indent)
  10.412 -                    if len(lines) > 1:
  10.413 -                        lines = []
  10.414 -                        lines.extend(get_lines(opt_parts, indent))
  10.415 -                        lines.extend(get_lines(pos_parts, indent))
  10.416 -                    lines = [prog] + lines
  10.417 -
  10.418 -                # join lines into usage
  10.419 -                usage = '\n'.join(lines)
  10.420 -
  10.421 -        # prefix with 'usage:'
  10.422 -        return '%s%s\n\n' % (prefix, usage)
  10.423 -
  10.424 -    def _format_actions_usage(self, actions, groups):
  10.425 -        # find group indices and identify actions in groups
  10.426 -        group_actions = _set()
  10.427 -        inserts = {}
  10.428 -        for group in groups:
  10.429 -            try:
  10.430 -                start = actions.index(group._group_actions[0])
  10.431 -            except ValueError:
  10.432 -                continue
  10.433 -            else:
  10.434 -                end = start + len(group._group_actions)
  10.435 -                if actions[start:end] == group._group_actions:
  10.436 -                    for action in group._group_actions:
  10.437 -                        group_actions.add(action)
  10.438 -                    if not group.required:
  10.439 -                        inserts[start] = '['
  10.440 -                        inserts[end] = ']'
  10.441 -                    else:
  10.442 -                        inserts[start] = '('
  10.443 -                        inserts[end] = ')'
  10.444 -                    for i in range(start + 1, end):
  10.445 -                        inserts[i] = '|'
  10.446 -
  10.447 -        # collect all actions format strings
  10.448 -        parts = []
  10.449 -        for i, action in enumerate(actions):
  10.450 -
  10.451 -            # suppressed arguments are marked with None
  10.452 -            # remove | separators for suppressed arguments
  10.453 -            if action.help is SUPPRESS:
  10.454 -                parts.append(None)
  10.455 -                if inserts.get(i) == '|':
  10.456 -                    inserts.pop(i)
  10.457 -                elif inserts.get(i + 1) == '|':
  10.458 -                    inserts.pop(i + 1)
  10.459 -
  10.460 -            # produce all arg strings
  10.461 -            elif not action.option_strings:
  10.462 -                part = self._format_args(action, action.dest)
  10.463 -
  10.464 -                # if it's in a group, strip the outer []
  10.465 -                if action in group_actions:
  10.466 -                    if part[0] == '[' and part[-1] == ']':
  10.467 -                        part = part[1:-1]
  10.468 -
  10.469 -                # add the action string to the list
  10.470 -                parts.append(part)
  10.471 -
  10.472 -            # produce the first way to invoke the option in brackets
  10.473 -            else:
  10.474 -                option_string = action.option_strings[0]
  10.475 -
  10.476 -                # if the Optional doesn't take a value, format is:
  10.477 -                #    -s or --long
  10.478 -                if action.nargs == 0:
  10.479 -                    part = '%s' % option_string
  10.480 -
  10.481 -                # if the Optional takes a value, format is:
  10.482 -                #    -s ARGS or --long ARGS
  10.483 -                else:
  10.484 -                    default = action.dest.upper()
  10.485 -                    args_string = self._format_args(action, default)
  10.486 -                    part = '%s %s' % (option_string, args_string)
  10.487 -
  10.488 -                # make it look optional if it's not required or in a group
  10.489 -                if not action.required and action not in group_actions:
  10.490 -                    part = '[%s]' % part
  10.491 -
  10.492 -                # add the action string to the list
  10.493 -                parts.append(part)
  10.494 -
  10.495 -        # insert things at the necessary indices
  10.496 -        for i in _sorted(inserts, reverse=True):
  10.497 -            parts[i:i] = [inserts[i]]
  10.498 -
  10.499 -        # join all the action items with spaces
  10.500 -        text = ' '.join([item for item in parts if item is not None])
  10.501 -
  10.502 -        # clean up separators for mutually exclusive groups
  10.503 -        open = r'[\[(]'
  10.504 -        close = r'[\])]'
  10.505 -        text = _re.sub(r'(%s) ' % open, r'\1', text)
  10.506 -        text = _re.sub(r' (%s)' % close, r'\1', text)
  10.507 -        text = _re.sub(r'%s *%s' % (open, close), r'', text)
  10.508 -        text = _re.sub(r'\(([^|]*)\)', r'\1', text)
  10.509 -        text = text.strip()
  10.510 -
  10.511 -        # return the text
  10.512 -        return text
  10.513 -
  10.514 -    def _format_text(self, text):
  10.515 -        if '%(prog)' in text:
  10.516 -            text = text % dict(prog=self._prog)
  10.517 -        text_width = self._width - self._current_indent
  10.518 -        indent = ' ' * self._current_indent
  10.519 -        return self._fill_text(text, text_width, indent) + '\n\n'
  10.520 -
  10.521 -    def _format_action(self, action):
  10.522 -        # determine the required width and the entry label
  10.523 -        help_position = min(self._action_max_length + 2,
  10.524 -                            self._max_help_position)
  10.525 -        help_width = self._width - help_position
  10.526 -        action_width = help_position - self._current_indent - 2
  10.527 -        action_header = self._format_action_invocation(action)
  10.528 -
  10.529 -        # ho nelp; start on same line and add a final newline
  10.530 -        if not action.help:
  10.531 -            tup = self._current_indent, '', action_header
  10.532 -            action_header = '%*s%s\n' % tup
  10.533 -
  10.534 -        # short action name; start on the same line and pad two spaces
  10.535 -        elif len(action_header) <= action_width:
  10.536 -            tup = self._current_indent, '', action_width, action_header
  10.537 -            action_header = '%*s%-*s  ' % tup
  10.538 -            indent_first = 0
  10.539 -
  10.540 -        # long action name; start on the next line
  10.541 -        else:
  10.542 -            tup = self._current_indent, '', action_header
  10.543 -            action_header = '%*s%s\n' % tup
  10.544 -            indent_first = help_position
  10.545 -
  10.546 -        # collect the pieces of the action help
  10.547 -        parts = [action_header]
  10.548 -
  10.549 -        # if there was help for the action, add lines of help text
  10.550 -        if action.help:
  10.551 -            help_text = self._expand_help(action)
  10.552 -            help_lines = self._split_lines(help_text, help_width)
  10.553 -            parts.append('%*s%s\n' % (indent_first, '', help_lines[0]))
  10.554 -            for line in help_lines[1:]:
  10.555 -                parts.append('%*s%s\n' % (help_position, '', line))
  10.556 -
  10.557 -        # or add a newline if the description doesn't end with one
  10.558 -        elif not action_header.endswith('\n'):
  10.559 -            parts.append('\n')
  10.560 -
  10.561 -        # if there are any sub-actions, add their help as well
  10.562 -        for subaction in self._iter_indented_subactions(action):
  10.563 -            parts.append(self._format_action(subaction))
  10.564 -
  10.565 -        # return a single string
  10.566 -        return self._join_parts(parts)
  10.567 -
  10.568 -    def _format_action_invocation(self, action):
  10.569 -        if not action.option_strings:
  10.570 -            metavar, = self._metavar_formatter(action, action.dest)(1)
  10.571 -            return metavar
  10.572 -
  10.573 -        else:
  10.574 -            parts = []
  10.575 -
  10.576 -            # if the Optional doesn't take a value, format is:
  10.577 -            #    -s, --long
  10.578 -            if action.nargs == 0:
  10.579 -                parts.extend(action.option_strings)
  10.580 -
  10.581 -            # if the Optional takes a value, format is:
  10.582 -            #    -s ARGS, --long ARGS
  10.583 -            else:
  10.584 -                default = action.dest.upper()
  10.585 -                args_string = self._format_args(action, default)
  10.586 -                for option_string in action.option_strings:
  10.587 -                    parts.append('%s %s' % (option_string, args_string))
  10.588 -
  10.589 -            return ', '.join(parts)
  10.590 -
  10.591 -    def _metavar_formatter(self, action, default_metavar):
  10.592 -        if action.metavar is not None:
  10.593 -            result = action.metavar
  10.594 -        elif action.choices is not None:
  10.595 -            choice_strs = [str(choice) for choice in action.choices]
  10.596 -            result = '{%s}' % ','.join(choice_strs)
  10.597 -        else:
  10.598 -            result = default_metavar
  10.599 -
  10.600 -        def format(tuple_size):
  10.601 -            if isinstance(result, tuple):
  10.602 -                return result
  10.603 -            else:
  10.604 -                return (result, ) * tuple_size
  10.605 -        return format
  10.606 -
  10.607 -    def _format_args(self, action, default_metavar):
  10.608 -        get_metavar = self._metavar_formatter(action, default_metavar)
  10.609 -        if action.nargs is None:
  10.610 -            result = '%s' % get_metavar(1)
  10.611 -        elif action.nargs == OPTIONAL:
  10.612 -            result = '[%s]' % get_metavar(1)
  10.613 -        elif action.nargs == ZERO_OR_MORE:
  10.614 -            result = '[%s [%s ...]]' % get_metavar(2)
  10.615 -        elif action.nargs == ONE_OR_MORE:
  10.616 -            result = '%s [%s ...]' % get_metavar(2)
  10.617 -        elif action.nargs == REMAINDER:
  10.618 -            result = '...'
  10.619 -        elif action.nargs == PARSER:
  10.620 -            result = '%s ...' % get_metavar(1)
  10.621 -        else:
  10.622 -            formats = ['%s' for _ in range(action.nargs)]
  10.623 -            result = ' '.join(formats) % get_metavar(action.nargs)
  10.624 -        return result
  10.625 -
  10.626 -    def _expand_help(self, action):
  10.627 -        params = dict(vars(action), prog=self._prog)
  10.628 -        for name in list(params):
  10.629 -            if params[name] is SUPPRESS:
  10.630 -                del params[name]
  10.631 -        for name in list(params):
  10.632 -            if hasattr(params[name], '__name__'):
  10.633 -                params[name] = params[name].__name__
  10.634 -        if params.get('choices') is not None:
  10.635 -            choices_str = ', '.join([str(c) for c in params['choices']])
  10.636 -            params['choices'] = choices_str
  10.637 -        return self._get_help_string(action) % params
  10.638 -
  10.639 -    def _iter_indented_subactions(self, action):
  10.640 -        try:
  10.641 -            get_subactions = action._get_subactions
  10.642 -        except AttributeError:
  10.643 -            pass
  10.644 -        else:
  10.645 -            self._indent()
  10.646 -            for subaction in get_subactions():
  10.647 -                yield subaction
  10.648 -            self._dedent()
  10.649 -
  10.650 -    def _split_lines(self, text, width):
  10.651 -        text = self._whitespace_matcher.sub(' ', text).strip()
  10.652 -        return _textwrap.wrap(text, width)
  10.653 -
  10.654 -    def _fill_text(self, text, width, indent):
  10.655 -        text = self._whitespace_matcher.sub(' ', text).strip()
  10.656 -        return _textwrap.fill(text, width, initial_indent=indent,
  10.657 -                                           subsequent_indent=indent)
  10.658 -
  10.659 -    def _get_help_string(self, action):
  10.660 -        return action.help
  10.661 -
  10.662 -
  10.663 -class RawDescriptionHelpFormatter(HelpFormatter):
  10.664 -    """Help message formatter which retains any formatting in descriptions.
  10.665 -
  10.666 -    Only the name of this class is considered a public API. All the methods
  10.667 -    provided by the class are considered an implementation detail.
  10.668 -    """
  10.669 -
  10.670 -    def _fill_text(self, text, width, indent):
  10.671 -        return ''.join([indent + line for line in text.splitlines(True)])
  10.672 -
  10.673 -
  10.674 -class RawTextHelpFormatter(RawDescriptionHelpFormatter):
  10.675 -    """Help message formatter which retains formatting of all help text.
  10.676 -
  10.677 -    Only the name of this class is considered a public API. All the methods
  10.678 -    provided by the class are considered an implementation detail.
  10.679 -    """
  10.680 -
  10.681 -    def _split_lines(self, text, width):
  10.682 -        return text.splitlines()
  10.683 -
  10.684 -
  10.685 -class ArgumentDefaultsHelpFormatter(HelpFormatter):
  10.686 -    """Help message formatter which adds default values to argument help.
  10.687 -
  10.688 -    Only the name of this class is considered a public API. All the methods
  10.689 -    provided by the class are considered an implementation detail.
  10.690 -    """
  10.691 -
  10.692 -    def _get_help_string(self, action):
  10.693 -        help = action.help
  10.694 -        if '%(default)' not in action.help:
  10.695 -            if action.default is not SUPPRESS:
  10.696 -                defaulting_nargs = [OPTIONAL, ZERO_OR_MORE]
  10.697 -                if action.option_strings or action.nargs in defaulting_nargs:
  10.698 -                    help += ' (default: %(default)s)'
  10.699 -        return help
  10.700 -
  10.701 -
  10.702 -# =====================
  10.703 -# Options and Arguments
  10.704 -# =====================
  10.705 -
  10.706 -def _get_action_name(argument):
  10.707 -    if argument is None:
  10.708 -        return None
  10.709 -    elif argument.option_strings:
  10.710 -        return  '/'.join(argument.option_strings)
  10.711 -    elif argument.metavar not in (None, SUPPRESS):
  10.712 -        return argument.metavar
  10.713 -    elif argument.dest not in (None, SUPPRESS):
  10.714 -        return argument.dest
  10.715 -    else:
  10.716 -        return None
  10.717 -
  10.718 -
  10.719 -class ArgumentError(Exception):
  10.720 -    """An error from creating or using an argument (optional or positional).
  10.721 -
  10.722 -    The string value of this exception is the message, augmented with
  10.723 -    information about the argument that caused it.
  10.724 -    """
  10.725 -
  10.726 -    def __init__(self, argument, message):
  10.727 -        self.argument_name = _get_action_name(argument)
  10.728 -        self.message = message
  10.729 -
  10.730 -    def __str__(self):
  10.731 -        if self.argument_name is None:
  10.732 -            format = '%(message)s'
  10.733 -        else:
  10.734 -            format = 'argument %(argument_name)s: %(message)s'
  10.735 -        return format % dict(message=self.message,
  10.736 -                             argument_name=self.argument_name)
  10.737 -
  10.738 -
  10.739 -class ArgumentTypeError(Exception):
  10.740 -    """An error from trying to convert a command line string to a type."""
  10.741 -    pass
  10.742 -
  10.743 -
  10.744 -# ==============
  10.745 -# Action classes
  10.746 -# ==============
  10.747 -
  10.748 -class Action(_AttributeHolder):
  10.749 -    """Information about how to convert command line strings to Python objects.
  10.750 -
  10.751 -    Action objects are used by an ArgumentParser to represent the information
  10.752 -    needed to parse a single argument from one or more strings from the
  10.753 -    command line. The keyword arguments to the Action constructor are also
  10.754 -    all attributes of Action instances.
  10.755 -
  10.756 -    Keyword Arguments:
  10.757 -
  10.758 -        - option_strings -- A list of command-line option strings which
  10.759 -            should be associated with this action.
  10.760 -
  10.761 -        - dest -- The name of the attribute to hold the created object(s)
  10.762 -
  10.763 -        - nargs -- The number of command-line arguments that should be
  10.764 -            consumed. By default, one argument will be consumed and a single
  10.765 -            value will be produced.  Other values include:
  10.766 -                - N (an integer) consumes N arguments (and produces a list)
  10.767 -                - '?' consumes zero or one arguments
  10.768 -                - '*' consumes zero or more arguments (and produces a list)
  10.769 -                - '+' consumes one or more arguments (and produces a list)
  10.770 -            Note that the difference between the default and nargs=1 is that
  10.771 -            with the default, a single value will be produced, while with
  10.772 -            nargs=1, a list containing a single value will be produced.
  10.773 -
  10.774 -        - const -- The value to be produced if the option is specified and the
  10.775 -            option uses an action that takes no values.
  10.776 -
  10.777 -        - default -- The value to be produced if the option is not specified.
  10.778 -
  10.779 -        - type -- The type which the command-line arguments should be converted
  10.780 -            to, should be one of 'string', 'int', 'float', 'complex' or a
  10.781 -            callable object that accepts a single string argument. If None,
  10.782 -            'string' is assumed.
  10.783 -
  10.784 -        - choices -- A container of values that should be allowed. If not None,
  10.785 -            after a command-line argument has been converted to the appropriate
  10.786 -            type, an exception will be raised if it is not a member of this
  10.787 -            collection.
  10.788 -
  10.789 -        - required -- True if the action must always be specified at the
  10.790 -            command line. This is only meaningful for optional command-line
  10.791 -            arguments.
  10.792 -
  10.793 -        - help -- The help string describing the argument.
  10.794 -
  10.795 -        - metavar -- The name to be used for the option's argument with the
  10.796 -            help string. If None, the 'dest' value will be used as the name.
  10.797 -    """
  10.798 -
  10.799 -    def __init__(self,
  10.800 -                 option_strings,
  10.801 -                 dest,
  10.802 -                 nargs=None,
  10.803 -                 const=None,
  10.804 -                 default=None,
  10.805 -                 type=None,
  10.806 -                 choices=None,
  10.807 -                 required=False,
  10.808 -                 help=None,
  10.809 -                 metavar=None):
  10.810 -        self.option_strings = option_strings
  10.811 -        self.dest = dest
  10.812 -        self.nargs = nargs
  10.813 -        self.const = const
  10.814 -        self.default = default
  10.815 -        self.type = type
  10.816 -        self.choices = choices
  10.817 -        self.required = required
  10.818 -        self.help = help
  10.819 -        self.metavar = metavar
  10.820 -
  10.821 -    def _get_kwargs(self):
  10.822 -        names = [
  10.823 -            'option_strings',
  10.824 -            'dest',
  10.825 -            'nargs',
  10.826 -            'const',
  10.827 -            'default',
  10.828 -            'type',
  10.829 -            'choices',
  10.830 -            'help',
  10.831 -            'metavar',
  10.832 -        ]
  10.833 -        return [(name, getattr(self, name)) for name in names]
  10.834 -
  10.835 -    def __call__(self, parser, namespace, values, option_string=None):
  10.836 -        raise NotImplementedError(_('.__call__() not defined'))
  10.837 -
  10.838 -
  10.839 -class _StoreAction(Action):
  10.840 -
  10.841 -    def __init__(self,
  10.842 -                 option_strings,
  10.843 -                 dest,
  10.844 -                 nargs=None,
  10.845 -                 const=None,
  10.846 -                 default=None,
  10.847 -                 type=None,
  10.848 -                 choices=None,
  10.849 -                 required=False,
  10.850 -                 help=None,
  10.851 -                 metavar=None):
  10.852 -        if nargs == 0:
  10.853 -            raise ValueError('nargs for store actions must be > 0; if you '
  10.854 -                             'have nothing to store, actions such as store '
  10.855 -                             'true or store const may be more appropriate')
  10.856 -        if const is not None and nargs != OPTIONAL:
  10.857 -            raise ValueError('nargs must be %r to supply const' % OPTIONAL)
  10.858 -        super(_StoreAction, self).__init__(
  10.859 -            option_strings=option_strings,
  10.860 -            dest=dest,
  10.861 -            nargs=nargs,
  10.862 -            const=const,
  10.863 -            default=default,
  10.864 -            type=type,
  10.865 -            choices=choices,
  10.866 -            required=required,
  10.867 -            help=help,
  10.868 -            metavar=metavar)
  10.869 -
  10.870 -    def __call__(self, parser, namespace, values, option_string=None):
  10.871 -        setattr(namespace, self.dest, values)
  10.872 -
  10.873 -
  10.874 -class _StoreConstAction(Action):
  10.875 -
  10.876 -    def __init__(self,
  10.877 -                 option_strings,
  10.878 -                 dest,
  10.879 -                 const,
  10.880 -                 default=None,
  10.881 -                 required=False,
  10.882 -                 help=None,
  10.883 -                 metavar=None):
  10.884 -        super(_StoreConstAction, self).__init__(
  10.885 -            option_strings=option_strings,
  10.886 -            dest=dest,
  10.887 -            nargs=0,
  10.888 -            const=const,
  10.889 -            default=default,
  10.890 -            required=required,
  10.891 -            help=help)
  10.892 -
  10.893 -    def __call__(self, parser, namespace, values, option_string=None):
  10.894 -        setattr(namespace, self.dest, self.const)
  10.895 -
  10.896 -
  10.897 -class _StoreTrueAction(_StoreConstAction):
  10.898 -
  10.899 -    def __init__(self,
  10.900 -                 option_strings,
  10.901 -                 dest,
  10.902 -                 default=False,
  10.903 -                 required=False,
  10.904 -                 help=None):
  10.905 -        super(_StoreTrueAction, self).__init__(
  10.906 -            option_strings=option_strings,
  10.907 -            dest=dest,
  10.908 -            const=True,
  10.909 -            default=default,
  10.910 -            required=required,
  10.911 -            help=help)
  10.912 -
  10.913 -
  10.914 -class _StoreFalseAction(_StoreConstAction):
  10.915 -
  10.916 -    def __init__(self,
  10.917 -                 option_strings,
  10.918 -                 dest,
  10.919 -                 default=True,
  10.920 -                 required=False,
  10.921 -                 help=None):
  10.922 -        super(_StoreFalseAction, self).__init__(
  10.923 -            option_strings=option_strings,
  10.924 -            dest=dest,
  10.925 -            const=False,
  10.926 -            default=default,
  10.927 -            required=required,
  10.928 -            help=help)
  10.929 -
  10.930 -
  10.931 -class _AppendAction(Action):
  10.932 -
  10.933 -    def __init__(self,
  10.934 -                 option_strings,
  10.935 -                 dest,
  10.936 -                 nargs=None,
  10.937 -                 const=None,
  10.938 -                 default=None,
  10.939 -                 type=None,
  10.940 -                 choices=None,
  10.941 -                 required=False,
  10.942 -                 help=None,
  10.943 -                 metavar=None):
  10.944 -        if nargs == 0:
  10.945 -            raise ValueError('nargs for append actions must be > 0; if arg '
  10.946 -                             'strings are not supplying the value to append, '
  10.947 -                             'the append const action may be more appropriate')
  10.948 -        if const is not None and nargs != OPTIONAL:
  10.949 -            raise ValueError('nargs must be %r to supply const' % OPTIONAL)
  10.950 -        super(_AppendAction, self).__init__(
  10.951 -            option_strings=option_strings,
  10.952 -            dest=dest,
  10.953 -            nargs=nargs,
  10.954 -            const=const,
  10.955 -            default=default,
  10.956 -            type=type,
  10.957 -            choices=choices,
  10.958 -            required=required,
  10.959 -            help=help,
  10.960 -            metavar=metavar)
  10.961 -
  10.962 -    def __call__(self, parser, namespace, values, option_string=None):
  10.963 -        items = _copy.copy(_ensure_value(namespace, self.dest, []))
  10.964 -        items.append(values)
  10.965 -        setattr(namespace, self.dest, items)
  10.966 -
  10.967 -
  10.968 -class _AppendConstAction(Action):
  10.969 -
  10.970 -    def __init__(self,
  10.971 -                 option_strings,
  10.972 -                 dest,
  10.973 -                 const,
  10.974 -                 default=None,
  10.975 -                 required=False,
  10.976 -                 help=None,
  10.977 -                 metavar=None):
  10.978 -        super(_AppendConstAction, self).__init__(
  10.979 -            option_strings=option_strings,
  10.980 -            dest=dest,
  10.981 -            nargs=0,
  10.982 -            const=const,
  10.983 -            default=default,
  10.984 -            required=required,
  10.985 -            help=help,
  10.986 -            metavar=metavar)
  10.987 -
  10.988 -    def __call__(self, parser, namespace, values, option_string=None):
  10.989 -        items = _copy.copy(_ensure_value(namespace, self.dest, []))
  10.990 -        items.append(self.const)
  10.991 -        setattr(namespace, self.dest, items)
  10.992 -
  10.993 -
  10.994 -class _CountAction(Action):
  10.995 -
  10.996 -    def __init__(self,
  10.997 -                 option_strings,
  10.998 -                 dest,
  10.999 -                 default=None,
 10.1000 -                 required=False,
 10.1001 -                 help=None):
 10.1002 -        super(_CountAction, self).__init__(
 10.1003 -            option_strings=option_strings,
 10.1004 -            dest=dest,
 10.1005 -            nargs=0,
 10.1006 -            default=default,
 10.1007 -            required=required,
 10.1008 -            help=help)
 10.1009 -
 10.1010 -    def __call__(self, parser, namespace, values, option_string=None):
 10.1011 -        new_count = _ensure_value(namespace, self.dest, 0) + 1
 10.1012 -        setattr(namespace, self.dest, new_count)
 10.1013 -
 10.1014 -
 10.1015 -class _HelpAction(Action):
 10.1016 -
 10.1017 -    def __init__(self,
 10.1018 -                 option_strings,
 10.1019 -                 dest=SUPPRESS,
 10.1020 -                 default=SUPPRESS,
 10.1021 -                 help=None):
 10.1022 -        super(_HelpAction, self).__init__(
 10.1023 -            option_strings=option_strings,
 10.1024 -            dest=dest,
 10.1025 -            default=default,
 10.1026 -            nargs=0,
 10.1027 -            help=help)
 10.1028 -
 10.1029 -    def __call__(self, parser, namespace, values, option_string=None):
 10.1030 -        parser.print_help()
 10.1031 -        parser.exit()
 10.1032 -
 10.1033 -
 10.1034 -class _VersionAction(Action):
 10.1035 -
 10.1036 -    def __init__(self,
 10.1037 -                 option_strings,
 10.1038 -                 version=None,
 10.1039 -                 dest=SUPPRESS,
 10.1040 -                 default=SUPPRESS,
 10.1041 -                 help=None):
 10.1042 -        super(_VersionAction, self).__init__(
 10.1043 -            option_strings=option_strings,
 10.1044 -            dest=dest,
 10.1045 -            default=default,
 10.1046 -            nargs=0,
 10.1047 -            help=help)
 10.1048 -        self.version = version
 10.1049 -
 10.1050 -    def __call__(self, parser, namespace, values, option_string=None):
 10.1051 -        version = self.version
 10.1052 -        if version is None:
 10.1053 -            version = parser.version
 10.1054 -        formatter = parser._get_formatter()
 10.1055 -        formatter.add_text(version)
 10.1056 -        parser.exit(message=formatter.format_help())
 10.1057 -
 10.1058 -
 10.1059 -class _SubParsersAction(Action):
 10.1060 -
 10.1061 -    class _ChoicesPseudoAction(Action):
 10.1062 -
 10.1063 -        def __init__(self, name, help):
 10.1064 -            sup = super(_SubParsersAction._ChoicesPseudoAction, self)
 10.1065 -            sup.__init__(option_strings=[], dest=name, help=help)
 10.1066 -
 10.1067 -    def __init__(self,
 10.1068 -                 option_strings,
 10.1069 -                 prog,
 10.1070 -                 parser_class,
 10.1071 -                 dest=SUPPRESS,
 10.1072 -                 help=None,
 10.1073 -                 metavar=None):
 10.1074 -
 10.1075 -        self._prog_prefix = prog
 10.1076 -        self._parser_class = parser_class
 10.1077 -        self._name_parser_map = {}
 10.1078 -        self._choices_actions = []
 10.1079 -
 10.1080 -        super(_SubParsersAction, self).__init__(
 10.1081 -            option_strings=option_strings,
 10.1082 -            dest=dest,
 10.1083 -            nargs=PARSER,
 10.1084 -            choices=self._name_parser_map,
 10.1085 -            help=help,
 10.1086 -            metavar=metavar)
 10.1087 -
 10.1088 -    def add_parser(self, name, **kwargs):
 10.1089 -        # set prog from the existing prefix
 10.1090 -        if kwargs.get('prog') is None:
 10.1091 -            kwargs['prog'] = '%s %s' % (self._prog_prefix, name)
 10.1092 -
 10.1093 -        # create a pseudo-action to hold the choice help
 10.1094 -        if 'help' in kwargs:
 10.1095 -            help = kwargs.pop('help')
 10.1096 -            choice_action = self._ChoicesPseudoAction(name, help)
 10.1097 -            self._choices_actions.append(choice_action)
 10.1098 -
 10.1099 -        # create the parser and add it to the map
 10.1100 -        parser = self._parser_class(**kwargs)
 10.1101 -        self._name_parser_map[name] = parser
 10.1102 -        return parser
 10.1103 -
 10.1104 -    def _get_subactions(self):
 10.1105 -        return self._choices_actions
 10.1106 -
 10.1107 -    def __call__(self, parser, namespace, values, option_string=None):
 10.1108 -        parser_name = values[0]
 10.1109 -        arg_strings = values[1:]
 10.1110 -
 10.1111 -        # set the parser name if requested
 10.1112 -        if self.dest is not SUPPRESS:
 10.1113 -            setattr(namespace, self.dest, parser_name)
 10.1114 -
 10.1115 -        # select the parser
 10.1116 -        try:
 10.1117 -            parser = self._name_parser_map[parser_name]
 10.1118 -        except KeyError:
 10.1119 -            tup = parser_name, ', '.join(self._name_parser_map)
 10.1120 -            msg = _('unknown parser %r (choices: %s)' % tup)
 10.1121 -            raise ArgumentError(self, msg)
 10.1122 -
 10.1123 -        # parse all the remaining options into the namespace
 10.1124 -        parser.parse_args(arg_strings, namespace)
 10.1125 -
 10.1126 -
 10.1127 -# ==============
 10.1128 -# Type classes
 10.1129 -# ==============
 10.1130 -
 10.1131 -class FileType(object):
 10.1132 -    """Factory for creating file object types
 10.1133 -
 10.1134 -    Instances of FileType are typically passed as type= arguments to the
 10.1135 -    ArgumentParser add_argument() method.
 10.1136 -
 10.1137 -    Keyword Arguments:
 10.1138 -        - mode -- A string indicating how the file is to be opened. Accepts the
 10.1139 -            same values as the builtin open() function.
 10.1140 -        - bufsize -- The file's desired buffer size. Accepts the same values as
 10.1141 -            the builtin open() function.
 10.1142 -    """
 10.1143 -
 10.1144 -    def __init__(self, mode='r', bufsize=None):
 10.1145 -        self._mode = mode
 10.1146 -        self._bufsize = bufsize
 10.1147 -
 10.1148 -    def __call__(self, string):
 10.1149 -        # the special argument "-" means sys.std{in,out}
 10.1150 -        if string == '-':
 10.1151 -            if 'r' in self._mode:
 10.1152 -                return _sys.stdin
 10.1153 -            elif 'w' in self._mode:
 10.1154 -                return _sys.stdout
 10.1155 -            else:
 10.1156 -                msg = _('argument "-" with mode %r' % self._mode)
 10.1157 -                raise ValueError(msg)
 10.1158 -
 10.1159 -        # all other arguments are used as file names
 10.1160 -        if self._bufsize:
 10.1161 -            return open(string, self._mode, self._bufsize)
 10.1162 -        else:
 10.1163 -            return open(string, self._mode)
 10.1164 -
 10.1165 -    def __repr__(self):
 10.1166 -        args = [self._mode, self._bufsize]
 10.1167 -        args_str = ', '.join([repr(arg) for arg in args if arg is not None])
 10.1168 -        return '%s(%s)' % (type(self).__name__, args_str)
 10.1169 -
 10.1170 -# ===========================
 10.1171 -# Optional and Positional Parsing
 10.1172 -# ===========================
 10.1173 -
 10.1174 -class Namespace(_AttributeHolder):
 10.1175 -    """Simple object for storing attributes.
 10.1176 -
 10.1177 -    Implements equality by attribute names and values, and provides a simple
 10.1178 -    string representation.
 10.1179 -    """
 10.1180 -
 10.1181 -    def __init__(self, **kwargs):
 10.1182 -        for name in kwargs:
 10.1183 -            setattr(self, name, kwargs[name])
 10.1184 -
 10.1185 -    def __eq__(self, other):
 10.1186 -        return vars(self) == vars(other)
 10.1187 -
 10.1188 -    def __ne__(self, other):
 10.1189 -        return not (self == other)
 10.1190 -
 10.1191 -    def __contains__(self, key):
 10.1192 -        return key in self.__dict__
 10.1193 -
 10.1194 -
 10.1195 -class _ActionsContainer(object):
 10.1196 -
 10.1197 -    def __init__(self,
 10.1198 -                 description,
 10.1199 -                 prefix_chars,
 10.1200 -                 argument_default,
 10.1201 -                 conflict_handler):
 10.1202 -        super(_ActionsContainer, self).__init__()
 10.1203 -
 10.1204 -        self.description = description
 10.1205 -        self.argument_default = argument_default
 10.1206 -        self.prefix_chars = prefix_chars
 10.1207 -        self.conflict_handler = conflict_handler
 10.1208 -
 10.1209 -        # set up registries
 10.1210 -        self._registries = {}
 10.1211 -
 10.1212 -        # register actions
 10.1213 -        self.register('action', None, _StoreAction)
 10.1214 -        self.register('action', 'store', _StoreAction)
 10.1215 -        self.register('action', 'store_const', _StoreConstAction)
 10.1216 -        self.register('action', 'store_true', _StoreTrueAction)
 10.1217 -        self.register('action', 'store_false', _StoreFalseAction)
 10.1218 -        self.register('action', 'append', _AppendAction)
 10.1219 -        self.register('action', 'append_const', _AppendConstAction)
 10.1220 -        self.register('action', 'count', _CountAction)
 10.1221 -        self.register('action', 'help', _HelpAction)
 10.1222 -        self.register('action', 'version', _VersionAction)
 10.1223 -        self.register('action', 'parsers', _SubParsersAction)
 10.1224 -
 10.1225 -        # raise an exception if the conflict handler is invalid
 10.1226 -        self._get_handler()
 10.1227 -
 10.1228 -        # action storage
 10.1229 -        self._actions = []
 10.1230 -        self._option_string_actions = {}
 10.1231 -
 10.1232 -        # groups
 10.1233 -        self._action_groups = []
 10.1234 -        self._mutually_exclusive_groups = []
 10.1235 -
 10.1236 -        # defaults storage
 10.1237 -        self._defaults = {}
 10.1238 -
 10.1239 -        # determines whether an "option" looks like a negative number
 10.1240 -        self._negative_number_matcher = _re.compile(r'^-\d+$|^-\d*\.\d+$')
 10.1241 -
 10.1242 -        # whether or not there are any optionals that look like negative
 10.1243 -        # numbers -- uses a list so it can be shared and edited
 10.1244 -        self._has_negative_number_optionals = []
 10.1245 -
 10.1246 -    # ====================
 10.1247 -    # Registration methods
 10.1248 -    # ====================
 10.1249 -    def register(self, registry_name, value, object):
 10.1250 -        registry = self._registries.setdefault(registry_name, {})
 10.1251 -        registry[value] = object
 10.1252 -
 10.1253 -    def _registry_get(self, registry_name, value, default=None):
 10.1254 -        return self._registries[registry_name].get(value, default)
 10.1255 -
 10.1256 -    # ==================================
 10.1257 -    # Namespace default accessor methods
 10.1258 -    # ==================================
 10.1259 -    def set_defaults(self, **kwargs):
 10.1260 -        self._defaults.update(kwargs)
 10.1261 -
 10.1262 -        # if these defaults match any existing arguments, replace
 10.1263 -        # the previous default on the object with the new one
 10.1264 -        for action in self._actions:
 10.1265 -            if action.dest in kwargs:
 10.1266 -                action.default = kwargs[action.dest]
 10.1267 -
 10.1268 -    def get_default(self, dest):
 10.1269 -        for action in self._actions:
 10.1270 -            if action.dest == dest and action.default is not None:
 10.1271 -                return action.default
 10.1272 -        return self._defaults.get(dest, None)
 10.1273 -
 10.1274 -
 10.1275 -    # =======================
 10.1276 -    # Adding argument actions
 10.1277 -    # =======================
 10.1278 -    def add_argument(self, *args, **kwargs):
 10.1279 -        """
 10.1280 -        add_argument(dest, ..., name=value, ...)
 10.1281 -        add_argument(option_string, option_string, ..., name=value, ...)
 10.1282 -        """
 10.1283 -
 10.1284 -        # if no positional args are supplied or only one is supplied and
 10.1285 -        # it doesn't look like an option string, parse a positional
 10.1286 -        # argument
 10.1287 -        chars = self.prefix_chars
 10.1288 -        if not args or len(args) == 1 and args[0][0] not in chars:
 10.1289 -            if args and 'dest' in kwargs:
 10.1290 -                raise ValueError('dest supplied twice for positional argument')
 10.1291 -            kwargs = self._get_positional_kwargs(*args, **kwargs)
 10.1292 -
 10.1293 -        # otherwise, we're adding an optional argument
 10.1294 -        else:
 10.1295 -            kwargs = self._get_optional_kwargs(*args, **kwargs)
 10.1296 -
 10.1297 -        # if no default was supplied, use the parser-level default
 10.1298 -        if 'default' not in kwargs:
 10.1299 -            dest = kwargs['dest']
 10.1300 -            if dest in self._defaults:
 10.1301 -                kwargs['default'] = self._defaults[dest]
 10.1302 -            elif self.argument_default is not None:
 10.1303 -                kwargs['default'] = self.argument_default
 10.1304 -
 10.1305 -        # create the action object, and add it to the parser
 10.1306 -        action_class = self._pop_action_class(kwargs)
 10.1307 -        if not _callable(action_class):
 10.1308 -            raise ValueError('unknown action "%s"' % action_class)
 10.1309 -        action = action_class(**kwargs)
 10.1310 -
 10.1311 -        # raise an error if the action type is not callable
 10.1312 -        type_func = self._registry_get('type', action.type, action.type)
 10.1313 -        if not _callable(type_func):
 10.1314 -            raise ValueError('%r is not callable' % type_func)
 10.1315 -
 10.1316 -        return self._add_action(action)
 10.1317 -
 10.1318 -    def add_argument_group(self, *args, **kwargs):
 10.1319 -        group = _ArgumentGroup(self, *args, **kwargs)
 10.1320 -        self._action_groups.append(group)
 10.1321 -        return group
 10.1322 -
 10.1323 -    def add_mutually_exclusive_group(self, **kwargs):
 10.1324 -        group = _MutuallyExclusiveGroup(self, **kwargs)
 10.1325 -        self._mutually_exclusive_groups.append(group)
 10.1326 -        return group
 10.1327 -
 10.1328 -    def _add_action(self, action):
 10.1329 -        # resolve any conflicts
 10.1330 -        self._check_conflict(action)
 10.1331 -
 10.1332 -        # add to actions list
 10.1333 -        self._actions.append(action)
 10.1334 -        action.container = self
 10.1335 -
 10.1336 -        # index the action by any option strings it has
 10.1337 -        for option_string in action.option_strings:
 10.1338 -            self._option_string_actions[option_string] = action
 10.1339 -
 10.1340 -        # set the flag if any option strings look like negative numbers
 10.1341 -        for option_string in action.option_strings:
 10.1342 -            if self._negative_number_matcher.match(option_string):
 10.1343 -                if not self._has_negative_number_optionals:
 10.1344 -                    self._has_negative_number_optionals.append(True)
 10.1345 -
 10.1346 -        # return the created action
 10.1347 -        return action
 10.1348 -
 10.1349 -    def _remove_action(self, action):
 10.1350 -        self._actions.remove(action)
 10.1351 -
 10.1352 -    def _add_container_actions(self, container):
 10.1353 -        # collect groups by titles
 10.1354 -        title_group_map = {}
 10.1355 -        for group in self._action_groups:
 10.1356 -            if group.title in title_group_map:
 10.1357 -                msg = _('cannot merge actions - two groups are named %r')
 10.1358 -                raise ValueError(msg % (group.title))
 10.1359 -            title_group_map[group.title] = group
 10.1360 -
 10.1361 -        # map each action to its group
 10.1362 -        group_map = {}
 10.1363 -        for group in container._action_groups:
 10.1364 -
 10.1365 -            # if a group with the title exists, use that, otherwise
 10.1366 -            # create a new group matching the container's group
 10.1367 -            if group.title not in title_group_map:
 10.1368 -                title_group_map[group.title] = self.add_argument_group(
 10.1369 -                    title=group.title,
 10.1370 -                    description=group.description,
 10.1371 -                    conflict_handler=group.conflict_handler)
 10.1372 -
 10.1373 -            # map the actions to their new group
 10.1374 -            for action in group._group_actions:
 10.1375 -                group_map[action] = title_group_map[group.title]
 10.1376 -
 10.1377 -        # add container's mutually exclusive groups
 10.1378 -        # NOTE: if add_mutually_exclusive_group ever gains title= and
 10.1379 -        # description= then this code will need to be expanded as above
 10.1380 -        for group in container._mutually_exclusive_groups:
 10.1381 -            mutex_group = self.add_mutually_exclusive_group(
 10.1382 -                required=group.required)
 10.1383 -
 10.1384 -            # map the actions to their new mutex group
 10.1385 -            for action in group._group_actions:
 10.1386 -                group_map[action] = mutex_group
 10.1387 -
 10.1388 -        # add all actions to this container or their group
 10.1389 -        for action in container._actions:
 10.1390 -            group_map.get(action, self)._add_action(action)
 10.1391 -
 10.1392 -    def _get_positional_kwargs(self, dest, **kwargs):
 10.1393 -        # make sure required is not specified
 10.1394 -        if 'required' in kwargs:
 10.1395 -            msg = _("'required' is an invalid argument for positionals")
 10.1396 -            raise TypeError(msg)
 10.1397 -
 10.1398 -        # mark positional arguments as required if at least one is
 10.1399 -        # always required
 10.1400 -        if kwargs.get('nargs') not in [OPTIONAL, ZERO_OR_MORE]:
 10.1401 -            kwargs['required'] = True
 10.1402 -        if kwargs.get('nargs') == ZERO_OR_MORE and 'default' not in kwargs:
 10.1403 -            kwargs['required'] = True
 10.1404 -
 10.1405 -        # return the keyword arguments with no option strings
 10.1406 -        return dict(kwargs, dest=dest, option_strings=[])
 10.1407 -
 10.1408 -    def _get_optional_kwargs(self, *args, **kwargs):
 10.1409 -        # determine short and long option strings
 10.1410 -        option_strings = []
 10.1411 -        long_option_strings = []
 10.1412 -        for option_string in args:
 10.1413 -            # error on strings that don't start with an appropriate prefix
 10.1414 -            if not option_string[0] in self.prefix_chars:
 10.1415 -                msg = _('invalid option string %r: '
 10.1416 -                        'must start with a character %r')
 10.1417 -                tup = option_string, self.prefix_chars
 10.1418 -                raise ValueError(msg % tup)
 10.1419 -
 10.1420 -            # strings starting with two prefix characters are long options
 10.1421 -            option_strings.append(option_string)
 10.1422 -            if option_string[0] in self.prefix_chars:
 10.1423 -                if len(option_string) > 1:
 10.1424 -                    if option_string[1] in self.prefix_chars:
 10.1425 -                        long_option_strings.append(option_string)
 10.1426 -
 10.1427 -        # infer destination, '--foo-bar' -> 'foo_bar' and '-x' -> 'x'
 10.1428 -        dest = kwargs.pop('dest', None)
 10.1429 -        if dest is None:
 10.1430 -            if long_option_strings:
 10.1431 -                dest_option_string = long_option_strings[0]
 10.1432 -            else:
 10.1433 -                dest_option_string = option_strings[0]
 10.1434 -            dest = dest_option_string.lstrip(self.prefix_chars)
 10.1435 -            if not dest:
 10.1436 -                msg = _('dest= is required for options like %r')
 10.1437 -                raise ValueError(msg % option_string)
 10.1438 -            dest = dest.replace('-', '_')
 10.1439 -
 10.1440 -        # return the updated keyword arguments
 10.1441 -        return dict(kwargs, dest=dest, option_strings=option_strings)
 10.1442 -
 10.1443 -    def _pop_action_class(self, kwargs, default=None):
 10.1444 -        action = kwargs.pop('action', default)
 10.1445 -        return self._registry_get('action', action, action)
 10.1446 -
 10.1447 -    def _get_handler(self):
 10.1448 -        # determine function from conflict handler string
 10.1449 -        handler_func_name = '_handle_conflict_%s' % self.conflict_handler
 10.1450 -        try:
 10.1451 -            return getattr(self, handler_func_name)
 10.1452 -        except AttributeError:
 10.1453 -            msg = _('invalid conflict_resolution value: %r')
 10.1454 -            raise ValueError(msg % self.conflict_handler)
 10.1455 -
 10.1456 -    def _check_conflict(self, action):
 10.1457 -
 10.1458 -        # find all options that conflict with this option
 10.1459 -        confl_optionals = []
 10.1460 -        for option_string in action.option_strings:
 10.1461 -            if option_string in self._option_string_actions:
 10.1462 -                confl_optional = self._option_string_actions[option_string]
 10.1463 -                confl_optionals.append((option_string, confl_optional))
 10.1464 -
 10.1465 -        # resolve any conflicts
 10.1466 -        if confl_optionals:
 10.1467 -            conflict_handler = self._get_handler()
 10.1468 -            conflict_handler(action, confl_optionals)
 10.1469 -
 10.1470 -    def _handle_conflict_error(self, action, conflicting_actions):
 10.1471 -        message = _('conflicting option string(s): %s')
 10.1472 -        conflict_string = ', '.join([option_string
 10.1473 -                                     for option_string, action
 10.1474 -                                     in conflicting_actions])
 10.1475 -        raise ArgumentError(action, message % conflict_string)
 10.1476 -
 10.1477 -    def _handle_conflict_resolve(self, action, conflicting_actions):
 10.1478 -
 10.1479 -        # remove all conflicting options
 10.1480 -        for option_string, action in conflicting_actions:
 10.1481 -
 10.1482 -            # remove the conflicting option
 10.1483 -            action.option_strings.remove(option_string)
 10.1484 -            self._option_string_actions.pop(option_string, None)
 10.1485 -
 10.1486 -            # if the option now has no option string, remove it from the
 10.1487 -            # container holding it
 10.1488 -            if not action.option_strings:
 10.1489 -                action.container._remove_action(action)
 10.1490 -
 10.1491 -
 10.1492 -class _ArgumentGroup(_ActionsContainer):
 10.1493 -
 10.1494 -    def __init__(self, container, title=None, description=None, **kwargs):
 10.1495 -        # add any missing keyword arguments by checking the container
 10.1496 -        update = kwargs.setdefault
 10.1497 -        update('conflict_handler', container.conflict_handler)
 10.1498 -        update('prefix_chars', container.prefix_chars)
 10.1499 -        update('argument_default', container.argument_default)
 10.1500 -        super_init = super(_ArgumentGroup, self).__init__
 10.1501 -        super_init(description=description, **kwargs)
 10.1502 -
 10.1503 -        # group attributes
 10.1504 -        self.title = title
 10.1505 -        self._group_actions = []
 10.1506 -
 10.1507 -        # share most attributes with the container
 10.1508 -        self._registries = container._registries
 10.1509 -        self._actions = container._actions
 10.1510 -        self._option_string_actions = container._option_string_actions
 10.1511 -        self._defaults = container._defaults
 10.1512 -        self._has_negative_number_optionals = \
 10.1513 -            container._has_negative_number_optionals
 10.1514 -
 10.1515 -    def _add_action(self, action):
 10.1516 -        action = super(_ArgumentGroup, self)._add_action(action)
 10.1517 -        self._group_actions.append(action)
 10.1518 -        return action
 10.1519 -
 10.1520 -    def _remove_action(self, action):
 10.1521 -        super(_ArgumentGroup, self)._remove_action(action)
 10.1522 -        self._group_actions.remove(action)
 10.1523 -
 10.1524 -
 10.1525 -class _MutuallyExclusiveGroup(_ArgumentGroup):
 10.1526 -
 10.1527 -    def __init__(self, container, required=False):
 10.1528 -        super(_MutuallyExclusiveGroup, self).__init__(container)
 10.1529 -        self.required = required
 10.1530 -        self._container = container
 10.1531 -
 10.1532 -    def _add_action(self, action):
 10.1533 -        if action.required:
 10.1534 -            msg = _('mutually exclusive arguments must be optional')
 10.1535 -            raise ValueError(msg)
 10.1536 -        action = self._container._add_action(action)
 10.1537 -        self._group_actions.append(action)
 10.1538 -        return action
 10.1539 -
 10.1540 -    def _remove_action(self, action):
 10.1541 -        self._container._remove_action(action)
 10.1542 -        self._group_actions.remove(action)
 10.1543 -
 10.1544 -
 10.1545 -class ArgumentParser(_AttributeHolder, _ActionsContainer):
 10.1546 -    """Object for parsing command line strings into Python objects.
 10.1547 -
 10.1548 -    Keyword Arguments:
 10.1549 -        - prog -- The name of the program (default: sys.argv[0])
 10.1550 -        - usage -- A usage message (default: auto-generated from arguments)
 10.1551 -        - description -- A description of what the program does
 10.1552 -        - epilog -- Text following the argument descriptions
 10.1553 -        - parents -- Parsers whose arguments should be copied into this one
 10.1554 -        - formatter_class -- HelpFormatter class for printing help messages
 10.1555 -        - prefix_chars -- Characters that prefix optional arguments
 10.1556 -        - fromfile_prefix_chars -- Characters that prefix files containing
 10.1557 -            additional arguments
 10.1558 -        - argument_default -- The default value for all arguments
 10.1559 -        - conflict_handler -- String indicating how to handle conflicts
 10.1560 -        - add_help -- Add a -h/-help option
 10.1561 -    """
 10.1562 -
 10.1563 -    def __init__(self,
 10.1564 -                 prog=None,
 10.1565 -                 usage=None,
 10.1566 -                 description=None,
 10.1567 -                 epilog=None,
 10.1568 -                 version=None,
 10.1569 -                 parents=[],
 10.1570 -                 formatter_class=HelpFormatter,
 10.1571 -                 prefix_chars='-',
 10.1572 -                 fromfile_prefix_chars=None,
 10.1573 -                 argument_default=None,
 10.1574 -                 conflict_handler='error',
 10.1575 -                 add_help=True):
 10.1576 -
 10.1577 -        if version is not None:
 10.1578 -            import warnings
 10.1579 -            warnings.warn(
 10.1580 -                """The "version" argument to ArgumentParser is deprecated. """
 10.1581 -                """Please use """
 10.1582 -                """"add_argument(..., action='version', version="N", ...)" """
 10.1583 -                """instead""", DeprecationWarning)
 10.1584 -
 10.1585 -        superinit = super(ArgumentParser, self).__init__
 10.1586 -        superinit(description=description,
 10.1587 -                  prefix_chars=prefix_chars,
 10.1588 -                  argument_default=argument_default,
 10.1589 -                  conflict_handler=conflict_handler)
 10.1590 -
 10.1591 -        # default setting for prog
 10.1592 -        if prog is None:
 10.1593 -            prog = _os.path.basename(_sys.argv[0])
 10.1594 -
 10.1595 -        self.prog = prog
 10.1596 -        self.usage = usage
 10.1597 -        self.epilog = epilog
 10.1598 -        self.version = version
 10.1599 -        self.formatter_class = formatter_class
 10.1600 -        self.fromfile_prefix_chars = fromfile_prefix_chars
 10.1601 -        self.add_help = add_help
 10.1602 -
 10.1603 -        add_group = self.add_argument_group
 10.1604 -        self._positionals = add_group(_('positional arguments'))
 10.1605 -        self._optionals = add_group(_('optional arguments'))
 10.1606 -        self._subparsers = None
 10.1607 -
 10.1608 -        # register types
 10.1609 -        def identity(string):
 10.1610 -            return string
 10.1611 -        self.register('type', None, identity)
 10.1612 -
 10.1613 -        # add help and version arguments if necessary
 10.1614 -        # (using explicit default to override global argument_default)
 10.1615 -        if self.add_help:
 10.1616 -            self.add_argument(
 10.1617 -                '-h', '--help', action='help', default=SUPPRESS,
 10.1618 -                help=_('show this help message and exit'))
 10.1619 -        if self.version:
 10.1620 -            self.add_argument(
 10.1621 -                '-v', '--version', action='version', default=SUPPRESS,
 10.1622 -                version=self.version,
 10.1623 -                help=_("show program's version number and exit"))
 10.1624 -
 10.1625 -        # add parent arguments and defaults
 10.1626 -        for parent in parents:
 10.1627 -            self._add_container_actions(parent)
 10.1628 -            try:
 10.1629 -                defaults = parent._defaults
 10.1630 -            except AttributeError:
 10.1631 -                pass
 10.1632 -            else:
 10.1633 -                self._defaults.update(defaults)
 10.1634 -
 10.1635 -    # =======================
 10.1636 -    # Pretty __repr__ methods
 10.1637 -    # =======================
 10.1638 -    def _get_kwargs(self):
 10.1639 -        names = [
 10.1640 -            'prog',
 10.1641 -            'usage',
 10.1642 -            'description',
 10.1643 -            'version',
 10.1644 -            'formatter_class',
 10.1645 -            'conflict_handler',
 10.1646 -            'add_help',
 10.1647 -        ]
 10.1648 -        return [(name, getattr(self, name)) for name in names]
 10.1649 -
 10.1650 -    # ==================================
 10.1651 -    # Optional/Positional adding methods
 10.1652 -    # ==================================
 10.1653 -    def add_subparsers(self, **kwargs):
 10.1654 -        if self._subparsers is not None:
 10.1655 -            self.error(_('cannot have multiple subparser arguments'))
 10.1656 -
 10.1657 -        # add the parser class to the arguments if it's not present
 10.1658 -        kwargs.setdefault('parser_class', type(self))
 10.1659 -
 10.1660 -        if 'title' in kwargs or 'description' in kwargs:
 10.1661 -            title = _(kwargs.pop('title', 'subcommands'))
 10.1662 -            description = _(kwargs.pop('description', None))
 10.1663 -            self._subparsers = self.add_argument_group(title, description)
 10.1664 -        else:
 10.1665 -            self._subparsers = self._positionals
 10.1666 -
 10.1667 -        # prog defaults to the usage message of this parser, skipping
 10.1668 -        # optional arguments and with no "usage:" prefix
 10.1669 -        if kwargs.get('prog') is None:
 10.1670 -            formatter = self._get_formatter()
 10.1671 -            positionals = self._get_positional_actions()
 10.1672 -            groups = self._mutually_exclusive_groups
 10.1673 -            formatter.add_usage(self.usage, positionals, groups, '')
 10.1674 -            kwargs['prog'] = formatter.format_help().strip()
 10.1675 -
 10.1676 -        # create the parsers action and add it to the positionals list
 10.1677 -        parsers_class = self._pop_action_class(kwargs, 'parsers')
 10.1678 -        action = parsers_class(option_strings=[], **kwargs)
 10.1679 -        self._subparsers._add_action(action)
 10.1680 -
 10.1681 -        # return the created parsers action
 10.1682 -        return action
 10.1683 -
 10.1684 -    def _add_action(self, action):
 10.1685 -        if action.option_strings:
 10.1686 -            self._optionals._add_action(action)
 10.1687 -        else:
 10.1688 -            self._positionals._add_action(action)
 10.1689 -        return action
 10.1690 -
 10.1691 -    def _get_optional_actions(self):
 10.1692 -        return [action
 10.1693 -                for action in self._actions
 10.1694 -                if action.option_strings]
 10.1695 -
 10.1696 -    def _get_positional_actions(self):
 10.1697 -        return [action
 10.1698 -                for action in self._actions
 10.1699 -                if not action.option_strings]
 10.1700 -
 10.1701 -    # =====================================
 10.1702 -    # Command line argument parsing methods
 10.1703 -    # =====================================
 10.1704 -    def parse_args(self, args=None, namespace=None):
 10.1705 -        args, argv = self.parse_known_args(args, namespace)
 10.1706 -        if argv:
 10.1707 -            msg = _('unrecognized arguments: %s')
 10.1708 -            self.error(msg % ' '.join(argv))
 10.1709 -        return args
 10.1710 -
 10.1711 -    def parse_known_args(self, args=None, namespace=None):
 10.1712 -        # args default to the system args
 10.1713 -        if args is None:
 10.1714 -            args = _sys.argv[1:]
 10.1715 -
 10.1716 -        # default Namespace built from parser defaults
 10.1717 -        if namespace is None:
 10.1718 -            namespace = Namespace()
 10.1719 -
 10.1720 -        # add any action defaults that aren't present
 10.1721 -        for action in self._actions:
 10.1722 -            if action.dest is not SUPPRESS:
 10.1723 -                if not hasattr(namespace, action.dest):
 10.1724 -                    if action.default is not SUPPRESS:
 10.1725 -                        default = action.default
 10.1726 -                        if isinstance(action.default, _basestring):
 10.1727 -                            default = self._get_value(action, default)
 10.1728 -                        setattr(namespace, action.dest, default)
 10.1729 -
 10.1730 -        # add any parser defaults that aren't present
 10.1731 -        for dest in self._defaults:
 10.1732 -            if not hasattr(namespace, dest):
 10.1733 -                setattr(namespace, dest, self._defaults[dest])
 10.1734 -
 10.1735 -        # parse the arguments and exit if there are any errors
 10.1736 -        try:
 10.1737 -            return self._parse_known_args(args, namespace)
 10.1738 -        except ArgumentError:
 10.1739 -            err = _sys.exc_info()[1]
 10.1740 -            self.error(str(err))
 10.1741 -
 10.1742 -    def _parse_known_args(self, arg_strings, namespace):
 10.1743 -        # replace arg strings that are file references
 10.1744 -        if self.fromfile_prefix_chars is not None:
 10.1745 -            arg_strings = self._read_args_from_files(arg_strings)
 10.1746 -
 10.1747 -        # map all mutually exclusive arguments to the other arguments
 10.1748 -        # they can't occur with
 10.1749 -        action_conflicts = {}
 10.1750 -        for mutex_group in self._mutually_exclusive_groups:
 10.1751 -            group_actions = mutex_group._group_actions
 10.1752 -            for i, mutex_action in enumerate(mutex_group._group_actions):
 10.1753 -                conflicts = action_conflicts.setdefault(mutex_action, [])
 10.1754 -                conflicts.extend(group_actions[:i])
 10.1755 -                conflicts.extend(group_actions[i + 1:])
 10.1756 -
 10.1757 -        # find all option indices, and determine the arg_string_pattern
 10.1758 -        # which has an 'O' if there is an option at an index,
 10.1759 -        # an 'A' if there is an argument, or a '-' if there is a '--'
 10.1760 -        option_string_indices = {}
 10.1761 -        arg_string_pattern_parts = []
 10.1762 -        arg_strings_iter = iter(arg_strings)
 10.1763 -        for i, arg_string in enumerate(arg_strings_iter):
 10.1764 -
 10.1765 -            # all args after -- are non-options
 10.1766 -            if arg_string == '--':
 10.1767 -                arg_string_pattern_parts.append('-')
 10.1768 -                for arg_string in arg_strings_iter:
 10.1769 -                    arg_string_pattern_parts.append('A')
 10.1770 -
 10.1771 -            # otherwise, add the arg to the arg strings
 10.1772 -            # and note the index if it was an option
 10.1773 -            else:
 10.1774 -                option_tuple = self._parse_optional(arg_string)
 10.1775 -                if option_tuple is None:
 10.1776 -                    pattern = 'A'
 10.1777 -                else:
 10.1778 -                    option_string_indices[i] = option_tuple
 10.1779 -                    pattern = 'O'
 10.1780 -                arg_string_pattern_parts.append(pattern)
 10.1781 -
 10.1782 -        # join the pieces together to form the pattern
 10.1783 -        arg_strings_pattern = ''.join(arg_string_pattern_parts)
 10.1784 -
 10.1785 -        # converts arg strings to the appropriate and then takes the action
 10.1786 -        seen_actions = _set()
 10.1787 -        seen_non_default_actions = _set()
 10.1788 -
 10.1789 -        def take_action(action, argument_strings, option_string=None):
 10.1790 -            seen_actions.add(action)
 10.1791 -            argument_values = self._get_values(action, argument_strings)
 10.1792 -
 10.1793 -            # error if this argument is not allowed with other previously
 10.1794 -            # seen arguments, assuming that actions that use the default
 10.1795 -            # value don't really count as "present"
 10.1796 -            if argument_values is not action.default:
 10.1797 -                seen_non_default_actions.add(action)
 10.1798 -                for conflict_action in action_conflicts.get(action, []):
 10.1799 -                    if conflict_action in seen_non_default_actions:
 10.1800 -                        msg = _('not allowed with argument %s')
 10.1801 -                        action_name = _get_action_name(conflict_action)
 10.1802 -                        raise ArgumentError(action, msg % action_name)
 10.1803 -
 10.1804 -            # take the action if we didn't receive a SUPPRESS value
 10.1805 -            # (e.g. from a default)
 10.1806 -            if argument_values is not SUPPRESS:
 10.1807 -                action(self, namespace, argument_values, option_string)
 10.1808 -
 10.1809 -        # function to convert arg_strings into an optional action
 10.1810 -        def consume_optional(start_index):
 10.1811 -
 10.1812 -            # get the optional identified at this index
 10.1813 -            option_tuple = option_string_indices[start_index]
 10.1814 -            action, option_string, explicit_arg = option_tuple
 10.1815 -
 10.1816 -            # identify additional optionals in the same arg string
 10.1817 -            # (e.g. -xyz is the same as -x -y -z if no args are required)
 10.1818 -            match_argument = self._match_argument
 10.1819 -            action_tuples = []
 10.1820 -            while True:
 10.1821 -
 10.1822 -                # if we found no optional action, skip it
 10.1823 -                if action is None:
 10.1824 -                    extras.append(arg_strings[start_index])
 10.1825 -                    return start_index + 1
 10.1826 -
 10.1827 -                # if there is an explicit argument, try to match the
 10.1828 -                # optional's string arguments to only this
 10.1829 -                if explicit_arg is not None:
 10.1830 -                    arg_count = match_argument(action, 'A')
 10.1831 -
 10.1832 -                    # if the action is a single-dash option and takes no
 10.1833 -                    # arguments, try to parse more single-dash options out
 10.1834 -                    # of the tail of the option string
 10.1835 -                    chars = self.prefix_chars
 10.1836 -                    if arg_count == 0 and option_string[1] not in chars:
 10.1837 -                        action_tuples.append((action, [], option_string))
 10.1838 -                        for char in self.prefix_chars:
 10.1839 -                            option_string = char + explicit_arg[0]
 10.1840 -                            explicit_arg = explicit_arg[1:] or None
 10.1841 -                            optionals_map = self._option_string_actions
 10.1842 -                            if option_string in optionals_map:
 10.1843 -                                action = optionals_map[option_string]
 10.1844 -                                break
 10.1845 -                        else:
 10.1846 -                            msg = _('ignored explicit argument %r')
 10.1847 -                            raise ArgumentError(action, msg % explicit_arg)
 10.1848 -
 10.1849 -                    # if the action expect exactly one argument, we've
 10.1850 -                    # successfully matched the option; exit the loop
 10.1851 -                    elif arg_count == 1:
 10.1852 -                        stop = start_index + 1
 10.1853 -                        args = [explicit_arg]
 10.1854 -                        action_tuples.append((action, args, option_string))
 10.1855 -                        break
 10.1856 -
 10.1857 -                    # error if a double-dash option did not use the
 10.1858 -                    # explicit argument
 10.1859 -                    else:
 10.1860 -                        msg = _('ignored explicit argument %r')
 10.1861 -                        raise ArgumentError(action, msg % explicit_arg)
 10.1862 -
 10.1863 -                # if there is no explicit argument, try to match the
 10.1864 -                # optional's string arguments with the following strings
 10.1865 -                # if successful, exit the loop
 10.1866 -                else:
 10.1867 -                    start = start_index + 1
 10.1868 -                    selected_patterns = arg_strings_pattern[start:]
 10.1869 -                    arg_count = match_argument(action, selected_patterns)
 10.1870 -                    stop = start + arg_count
 10.1871 -                    args = arg_strings[start:stop]
 10.1872 -                    action_tuples.append((action, args, option_string))
 10.1873 -                    break
 10.1874 -
 10.1875 -            # add the Optional to the list and return the index at which
 10.1876 -            # the Optional's string args stopped
 10.1877 -            assert action_tuples
 10.1878 -            for action, args, option_string in action_tuples:
 10.1879 -                take_action(action, args, option_string)
 10.1880 -            return stop
 10.1881 -
 10.1882 -        # the list of Positionals left to be parsed; this is modified
 10.1883 -        # by consume_positionals()
 10.1884 -        positionals = self._get_positional_actions()
 10.1885 -
 10.1886 -        # function to convert arg_strings into positional actions
 10.1887 -        def consume_positionals(start_index):
 10.1888 -            # match as many Positionals as possible
 10.1889 -            match_partial = self._match_arguments_partial
 10.1890 -            selected_pattern = arg_strings_pattern[start_index:]
 10.1891 -            arg_counts = match_partial(positionals, selected_pattern)
 10.1892 -
 10.1893 -            # slice off the appropriate arg strings for each Positional
 10.1894 -            # and add the Positional and its args to the list
 10.1895 -            for action, arg_count in zip(positionals, arg_counts):
 10.1896 -                args = arg_strings[start_index: start_index + arg_count]
 10.1897 -                start_index += arg_count
 10.1898 -                take_action(action, args)
 10.1899 -
 10.1900 -            # slice off the Positionals that we just parsed and return the
 10.1901 -            # index at which the Positionals' string args stopped
 10.1902 -            positionals[:] = positionals[len(arg_counts):]
 10.1903 -            return start_index
 10.1904 -
 10.1905 -        # consume Positionals and Optionals alternately, until we have
 10.1906 -        # passed the last option string
 10.1907 -        extras = []
 10.1908 -        start_index = 0
 10.1909 -        if option_string_indices:
 10.1910 -            max_option_string_index = max(option_string_indices)
 10.1911 -        else:
 10.1912 -            max_option_string_index = -1
 10.1913 -        while start_index <= max_option_string_index:
 10.1914 -
 10.1915 -            # consume any Positionals preceding the next option
 10.1916 -            next_option_string_index = min([
 10.1917 -                index
 10.1918 -                for index in option_string_indices
 10.1919 -                if index >= start_index])
 10.1920 -            if start_index != next_option_string_index:
 10.1921 -                positionals_end_index = consume_positionals(start_index)
 10.1922 -
 10.1923 -                # only try to parse the next optional if we didn't consume
 10.1924 -                # the option string during the positionals parsing
 10.1925 -                if positionals_end_index > start_index:
 10.1926 -                    start_index = positionals_end_index
 10.1927 -                    continue
 10.1928 -                else:
 10.1929 -                    start_index = positionals_end_index
 10.1930 -
 10.1931 -            # if we consumed all the positionals we could and we're not
 10.1932 -            # at the index of an option string, there were extra arguments
 10.1933 -            if start_index not in option_string_indices:
 10.1934 -                strings = arg_strings[start_index:next_option_string_index]
 10.1935 -                extras.extend(strings)
 10.1936 -                start_index = next_option_string_index
 10.1937 -
 10.1938 -            # consume the next optional and any arguments for it
 10.1939 -            start_index = consume_optional(start_index)
 10.1940 -
 10.1941 -        # consume any positionals following the last Optional
 10.1942 -        stop_index = consume_positionals(start_index)
 10.1943 -
 10.1944 -        # if we didn't consume all the argument strings, there were extras
 10.1945 -        extras.extend(arg_strings[stop_index:])
 10.1946 -
 10.1947 -        # if we didn't use all the Positional objects, there were too few
 10.1948 -        # arg strings supplied.
 10.1949 -        if positionals:
 10.1950 -            self.error(_('too few arguments'))
 10.1951 -
 10.1952 -        # make sure all required actions were present
 10.1953 -        for action in self._actions:
 10.1954 -            if action.required:
 10.1955 -                if action not in seen_actions:
 10.1956 -                    name = _get_action_name(action)
 10.1957 -                    self.error(_('argument %s is required') % name)
 10.1958 -
 10.1959 -        # make sure all required groups had one option present
 10.1960 -        for group in self._mutually_exclusive_groups:
 10.1961 -            if group.required:
 10.1962 -                for action in group._group_actions:
 10.1963 -                    if action in seen_non_default_actions:
 10.1964 -                        break
 10.1965 -
 10.1966 -                # if no actions were used, report the error
 10.1967 -                else:
 10.1968 -                    names = [_get_action_name(action)
 10.1969 -                             for action in group._group_actions
 10.1970 -                             if action.help is not SUPPRESS]
 10.1971 -                    msg = _('one of the arguments %s is required')
 10.1972 -                    self.error(msg % ' '.join(names))
 10.1973 -
 10.1974 -        # return the updated namespace and the extra arguments
 10.1975 -        return namespace, extras
 10.1976 -
 10.1977 -    def _read_args_from_files(self, arg_strings):
 10.1978 -        # expand arguments referencing files
 10.1979 -        new_arg_strings = []
 10.1980 -        for arg_string in arg_strings:
 10.1981 -
 10.1982 -            # for regular arguments, just add them back into the list
 10.1983 -            if arg_string[0] not in self.fromfile_prefix_chars:
 10.1984 -                new_arg_strings.append(arg_string)
 10.1985 -
 10.1986 -            # replace arguments referencing files with the file content
 10.1987 -            else:
 10.1988 -                try:
 10.1989 -                    args_file = open(arg_string[1:])
 10.1990 -                    try:
 10.1991 -                        arg_strings = []
 10.1992 -                        for arg_line in args_file.read().splitlines():
 10.1993 -                            for arg in self.convert_arg_line_to_args(arg_line):
 10.1994 -                                arg_strings.append(arg)
 10.1995 -                        arg_strings = self._read_args_from_files(arg_strings)
 10.1996 -                        new_arg_strings.extend(arg_strings)
 10.1997 -                    finally:
 10.1998 -                        args_file.close()
 10.1999 -                except IOError:
 10.2000 -                    err = _sys.exc_info()[1]
 10.2001 -                    self.error(str(err))
 10.2002 -
 10.2003 -        # return the modified argument list
 10.2004 -        return new_arg_strings
 10.2005 -
 10.2006 -    def convert_arg_line_to_args(self, arg_line):
 10.2007 -        return [arg_line]
 10.2008 -
 10.2009 -    def _match_argument(self, action, arg_strings_pattern):
 10.2010 -        # match the pattern for this action to the arg strings
 10.2011 -        nargs_pattern = self._get_nargs_pattern(action)
 10.2012 -        match = _re.match(nargs_pattern, arg_strings_pattern)
 10.2013 -
 10.2014 -        # raise an exception if we weren't able to find a match
 10.2015 -        if match is None:
 10.2016 -            nargs_errors = {
 10.2017 -                None: _('expected one argument'),
 10.2018 -                OPTIONAL: _('expected at most one argument'),
 10.2019 -                ONE_OR_MORE: _('expected at least one argument'),
 10.2020 -            }
 10.2021 -            default = _('expected %s argument(s)') % action.nargs
 10.2022 -            msg = nargs_errors.get(action.nargs, default)
 10.2023 -            raise ArgumentError(action, msg)
 10.2024 -
 10.2025 -        # return the number of arguments matched
 10.2026 -        return len(match.group(1))
 10.2027 -
 10.2028 -    def _match_arguments_partial(self, actions, arg_strings_pattern):
 10.2029 -        # progressively shorten the actions list by slicing off the
 10.2030 -        # final actions until we find a match
 10.2031 -        result = []
 10.2032 -        for i in range(len(actions), 0, -1):
 10.2033 -            actions_slice = actions[:i]
 10.2034 -            pattern = ''.join([self._get_nargs_pattern(action)
 10.2035 -                               for action in actions_slice])
 10.2036 -            match = _re.match(pattern, arg_strings_pattern)
 10.2037 -            if match is not None:
 10.2038 -                result.extend([len(string) for string in match.groups()])
 10.2039 -                break
 10.2040 -
 10.2041 -        # return the list of arg string counts
 10.2042 -        return result
 10.2043 -
 10.2044 -    def _parse_optional(self, arg_string):
 10.2045 -        # if it's an empty string, it was meant to be a positional
 10.2046 -        if not arg_string:
 10.2047 -            return None
 10.2048 -
 10.2049 -        # if it doesn't start with a prefix, it was meant to be positional
 10.2050 -        if not arg_string[0] in self.prefix_chars:
 10.2051 -            return None
 10.2052 -
 10.2053 -        # if the option string is present in the parser, return the action
 10.2054 -        if arg_string in self._option_string_actions:
 10.2055 -            action = self._option_string_actions[arg_string]
 10.2056 -            return action, arg_string, None
 10.2057 -
 10.2058 -        # if it's just a single character, it was meant to be positional
 10.2059 -        if len(arg_string) == 1:
 10.2060 -            return None
 10.2061 -
 10.2062 -        # if the option string before the "=" is present, return the action
 10.2063 -        if '=' in arg_string:
 10.2064 -            option_string, explicit_arg = arg_string.split('=', 1)
 10.2065 -            if option_string in self._option_string_actions:
 10.2066 -                action = self._option_string_actions[option_string]
 10.2067 -                return action, option_string, explicit_arg
 10.2068 -
 10.2069 -        # search through all possible prefixes of the option string
 10.2070 -        # and all actions in the parser for possible interpretations
 10.2071 -        option_tuples = self._get_option_tuples(arg_string)
 10.2072 -
 10.2073 -        # if multiple actions match, the option string was ambiguous
 10.2074 -        if len(option_tuples) > 1:
 10.2075 -            options = ', '.join([option_string
 10.2076 -                for action, option_string, explicit_arg in option_tuples])
 10.2077 -            tup = arg_string, options
 10.2078 -            self.error(_('ambiguous option: %s could match %s') % tup)
 10.2079 -
 10.2080 -        # if exactly one action matched, this segmentation is good,
 10.2081 -        # so return the parsed action
 10.2082 -        elif len(option_tuples) == 1:
 10.2083 -            option_tuple, = option_tuples
 10.2084 -            return option_tuple
 10.2085 -
 10.2086 -        # if it was not found as an option, but it looks like a negative
 10.2087 -        # number, it was meant to be positional
 10.2088 -        # unless there are negative-number-like options
 10.2089 -        if self._negative_number_matcher.match(arg_string):
 10.2090 -            if not self._has_negative_number_optionals:
 10.2091 -                return None
 10.2092 -
 10.2093 -        # if it contains a space, it was meant to be a positional
 10.2094 -        if ' ' in arg_string:
 10.2095 -            return None
 10.2096 -
 10.2097 -        # it was meant to be an optional but there is no such option
 10.2098 -        # in this parser (though it might be a valid option in a subparser)
 10.2099 -        return None, arg_string, None
 10.2100 -
 10.2101 -    def _get_option_tuples(self, option_string):
 10.2102 -        result = []
 10.2103 -
 10.2104 -        # option strings starting with two prefix characters are only
 10.2105 -        # split at the '='
 10.2106 -        chars = self.prefix_chars
 10.2107 -        if option_string[0] in chars and option_string[1] in chars:
 10.2108 -            if '=' in option_string:
 10.2109 -                option_prefix, explicit_arg = option_string.split('=', 1)
 10.2110 -            else:
 10.2111 -                option_prefix = option_string
 10.2112 -                explicit_arg = None
 10.2113 -            for option_string in self._option_string_actions:
 10.2114 -                if option_string.startswith(option_prefix):
 10.2115 -                    action = self._option_string_actions[option_string]
 10.2116 -                    tup = action, option_string, explicit_arg
 10.2117 -                    result.append(tup)
 10.2118 -
 10.2119 -        # single character options can be concatenated with their arguments
 10.2120 -        # but multiple character options always have to have their argument
 10.2121 -        # separate
 10.2122 -        elif option_string[0] in chars and option_string[1] not in chars:
 10.2123 -            option_prefix = option_string
 10.2124 -            explicit_arg = None
 10.2125 -            short_option_prefix = option_string[:2]
 10.2126 -            short_explicit_arg = option_string[2:]
 10.2127 -
 10.2128 -            for option_string in self._option_string_actions:
 10.2129 -                if option_string == short_option_prefix:
 10.2130 -                    action = self._option_string_actions[option_string]
 10.2131 -                    tup = action, option_string, short_explicit_arg
 10.2132 -                    result.append(tup)
 10.2133 -                elif option_string.startswith(option_prefix):
 10.2134 -                    action = self._option_string_actions[option_string]
 10.2135 -                    tup = action, option_string, explicit_arg
 10.2136 -                    result.append(tup)
 10.2137 -
 10.2138 -        # shouldn't ever get here
 10.2139 -        else:
 10.2140 -            self.error(_('unexpected option string: %s') % option_string)
 10.2141 -
 10.2142 -        # return the collected option tuples
 10.2143 -        return result
 10.2144 -
 10.2145 -    def _get_nargs_pattern(self, action):
 10.2146 -        # in all examples below, we have to allow for '--' args
 10.2147 -        # which are represented as '-' in the pattern
 10.2148 -        nargs = action.nargs
 10.2149 -
 10.2150 -        # the default (None) is assumed to be a single argument
 10.2151 -        if nargs is None:
 10.2152 -            nargs_pattern = '(-*A-*)'
 10.2153 -
 10.2154 -        # allow zero or one arguments
 10.2155 -        elif nargs == OPTIONAL:
 10.2156 -            nargs_pattern = '(-*A?-*)'
 10.2157 -
 10.2158 -        # allow zero or more arguments
 10.2159 -        elif nargs == ZERO_OR_MORE:
 10.2160 -            nargs_pattern = '(-*[A-]*)'
 10.2161 -
 10.2162 -        # allow one or more arguments
 10.2163 -        elif nargs == ONE_OR_MORE:
 10.2164 -            nargs_pattern = '(-*A[A-]*)'
 10.2165 -
 10.2166 -        # allow any number of options or arguments
 10.2167 -        elif nargs == REMAINDER:
 10.2168 -            nargs_pattern = '([-AO]*)'
 10.2169 -
 10.2170 -        # allow one argument followed by any number of options or arguments
 10.2171 -        elif nargs == PARSER:
 10.2172 -            nargs_pattern = '(-*A[-AO]*)'
 10.2173 -
 10.2174 -        # all others should be integers
 10.2175 -        else:
 10.2176 -            nargs_pattern = '(-*%s-*)' % '-*'.join('A' * nargs)
 10.2177 -
 10.2178 -        # if this is an optional action, -- is not allowed
 10.2179 -        if action.option_strings:
 10.2180 -            nargs_pattern = nargs_pattern.replace('-*', '')
 10.2181 -            nargs_pattern = nargs_pattern.replace('-', '')
 10.2182 -
 10.2183 -        # return the pattern
 10.2184 -        return nargs_pattern
 10.2185 -
 10.2186 -    # ========================
 10.2187 -    # Value conversion methods
 10.2188 -    # ========================
 10.2189 -    def _get_values(self, action, arg_strings):
 10.2190 -        # for everything but PARSER args, strip out '--'
 10.2191 -        if action.nargs not in [PARSER, REMAINDER]:
 10.2192 -            arg_strings = [s for s in arg_strings if s != '--']
 10.2193 -
 10.2194 -        # optional argument produces a default when not present
 10.2195 -        if not arg_strings and action.nargs == OPTIONAL:
 10.2196 -            if action.option_strings:
 10.2197 -                value = action.const
 10.2198 -            else:
 10.2199 -                value = action.default
 10.2200 -            if isinstance(value, _basestring):
 10.2201 -                value = self._get_value(action, value)
 10.2202 -                self._check_value(action, value)
 10.2203 -
 10.2204 -        # when nargs='*' on a positional, if there were no command-line
 10.2205 -        # args, use the default if it is anything other than None
 10.2206 -        elif (not arg_strings and action.nargs == ZERO_OR_MORE and
 10.2207 -              not action.option_strings):
 10.2208 -            if action.default is not None:
 10.2209 -                value = action.default
 10.2210 -            else:
 10.2211 -                value = arg_strings
 10.2212 -            self._check_value(action, value)
 10.2213 -
 10.2214 -        # single argument or optional argument produces a single value
 10.2215 -        elif len(arg_strings) == 1 and action.nargs in [None, OPTIONAL]:
 10.2216 -            arg_string, = arg_strings
 10.2217 -            value = self._get_value(action, arg_string)
 10.2218 -            self._check_value(action, value)
 10.2219 -
 10.2220 -        # REMAINDER arguments convert all values, checking none
 10.2221 -        elif action.nargs == REMAINDER:
 10.2222 -            value = [self._get_value(action, v) for v in arg_strings]
 10.2223 -
 10.2224 -        # PARSER arguments convert all values, but check only the first
 10.2225 -        elif action.nargs == PARSER:
 10.2226 -            value = [self._get_value(action, v) for v in arg_strings]
 10.2227 -            self._check_value(action, value[0])
 10.2228 -
 10.2229 -        # all other types of nargs produce a list
 10.2230 -        else:
 10.2231 -            value = [self._get_value(action, v) for v in arg_strings]
 10.2232 -            for v in value:
 10.2233 -                self._check_value(action, v)
 10.2234 -
 10.2235 -        # return the converted value
 10.2236 -        return value
 10.2237 -
 10.2238 -    def _get_value(self, action, arg_string):
 10.2239 -        type_func = self._registry_get('type', action.type, action.type)
 10.2240 -        if not _callable(type_func):
 10.2241 -            msg = _('%r is not callable')
 10.2242 -            raise ArgumentError(action, msg % type_func)
 10.2243 -
 10.2244 -        # convert the value to the appropriate type
 10.2245 -        try:
 10.2246 -            result = type_func(arg_string)
 10.2247 -
 10.2248 -        # ArgumentTypeErrors indicate errors
 10.2249 -        except ArgumentTypeError:
 10.2250 -            name = getattr(action.type, '__name__', repr(action.type))
 10.2251 -            msg = str(_sys.exc_info()[1])
 10.2252 -            raise ArgumentError(action, msg)
 10.2253 -
 10.2254 -        # TypeErrors or ValueErrors also indicate errors
 10.2255 -        except (TypeError, ValueError):
 10.2256 -            name = getattr(action.type, '__name__', repr(action.type))
 10.2257 -            msg = _('invalid %s value: %r')
 10.2258 -            raise ArgumentError(action, msg % (name, arg_string))
 10.2259 -
 10.2260 -        # return the converted value
 10.2261 -        return result
 10.2262 -
 10.2263 -    def _check_value(self, action, value):
 10.2264 -        # converted value must be one of the choices (if specified)
 10.2265 -        if action.choices is not None and value not in action.choices:
 10.2266 -            tup = value, ', '.join(map(repr, action.choices))
 10.2267 -            msg = _('invalid choice: %r (choose from %s)') % tup
 10.2268 -            raise ArgumentError(action, msg)
 10.2269 -
 10.2270 -    # =======================
 10.2271 -    # Help-formatting methods
 10.2272 -    # =======================
 10.2273 -    def format_usage(self):
 10.2274 -        formatter = self._get_formatter()
 10.2275 -        formatter.add_usage(self.usage, self._actions,
 10.2276 -                            self._mutually_exclusive_groups)
 10.2277 -        return formatter.format_help()
 10.2278 -
 10.2279 -    def format_help(self):
 10.2280 -        formatter = self._get_formatter()
 10.2281 -
 10.2282 -        # usage
 10.2283 -        formatter.add_usage(self.usage, self._actions,
 10.2284 -                            self._mutually_exclusive_groups)
 10.2285 -
 10.2286 -        # description
 10.2287 -        formatter.add_text(self.description)
 10.2288 -
 10.2289 -        # positionals, optionals and user-defined groups
 10.2290 -        for action_group in self._action_groups:
 10.2291 -            formatter.start_section(action_group.title)
 10.2292 -            formatter.add_text(action_group.description)
 10.2293 -            formatter.add_arguments(action_group._group_actions)
 10.2294 -            formatter.end_section()
 10.2295 -
 10.2296 -        # epilog
 10.2297 -        formatter.add_text(self.epilog)
 10.2298 -
 10.2299 -        # determine help from format above
 10.2300 -        return formatter.format_help()
 10.2301 -
 10.2302 -    def format_version(self):
 10.2303 -        import warnings
 10.2304 -        warnings.warn(
 10.2305 -            'The format_version method is deprecated -- the "version" '
 10.2306 -            'argument to ArgumentParser is no longer supported.',
 10.2307 -            DeprecationWarning)
 10.2308 -        formatter = self._get_formatter()
 10.2309 -        formatter.add_text(self.version)
 10.2310 -        return formatter.format_help()
 10.2311 -
 10.2312 -    def _get_formatter(self):
 10.2313 -        return self.formatter_class(prog=self.prog)
 10.2314 -
 10.2315 -    # =====================
 10.2316 -    # Help-printing methods
 10.2317 -    # =====================
 10.2318 -    def print_usage(self, file=None):
 10.2319 -        if file is None:
 10.2320 -            file = _sys.stdout
 10.2321 -        self._print_message(self.format_usage(), file)
 10.2322 -
 10.2323 -    def print_help(self, file=None):
 10.2324 -        if file is None:
 10.2325 -            file = _sys.stdout
 10.2326 -        self._print_message(self.format_help(), file)
 10.2327 -
 10.2328 -    def print_version(self, file=None):
 10.2329 -        import warnings
 10.2330 -        warnings.warn(
 10.2331 -            'The print_version method is deprecated -- the "version" '
 10.2332 -            'argument to ArgumentParser is no longer supported.',
 10.2333 -            DeprecationWarning)
 10.2334 -        self._print_message(self.format_version(), file)
 10.2335 -
 10.2336 -    def _print_message(self, message, file=None):
 10.2337 -        if message:
 10.2338 -            if file is None:
 10.2339 -                file = _sys.stderr
 10.2340 -            file.write(message)
 10.2341 -
 10.2342 -    # ===============
 10.2343 -    # Exiting methods
 10.2344 -    # ===============
 10.2345 -    def exit(self, status=0, message=None):
 10.2346 -        if message:
 10.2347 -            self._print_message(message, _sys.stderr)
 10.2348 -        _sys.exit(status)
 10.2349 -
 10.2350 -    def error(self, message):
 10.2351 -        """error(message: string)
 10.2352 -
 10.2353 -        Prints a usage message incorporating the message to stderr and
 10.2354 -        exits.
 10.2355 -
 10.2356 -        If you override this in a subclass, it should not return -- it
 10.2357 -        should either exit or raise an exception.
 10.2358 -        """
 10.2359 -        self.print_usage(_sys.stderr)
 10.2360 -        self.exit(2, _('%s: error: %s\n') % (self.prog, message))
    11.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/compareStats.py	Sat Jun 28 22:13:23 2014 +0200
    11.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.3 @@ -1,65 +0,0 @@
    11.4 -#!/usr/bin/env python
    11.5 -#     Title:      HOL/Tools/Sledgehammer/MaSh/src/compareStats.py
    11.6 -#     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
    11.7 -#     Copyright   2012
    11.8 -#
    11.9 -# Tool that compares MaSh statistics and displays a comparison.
   11.10 -
   11.11 -'''
   11.12 -Created on Jul 13, 2012
   11.13 -
   11.14 -@author: Daniel Kuehlwein
   11.15 -'''
   11.16 -
   11.17 -import sys
   11.18 -from argparse import ArgumentParser,RawDescriptionHelpFormatter
   11.19 -from matplotlib.pyplot import plot,figure,show,legend,xlabel,ylabel,axis,hist
   11.20 -from stats import Statistics
   11.21 -
   11.22 -parser = ArgumentParser(description='Compare Statistics.  \n\n\
   11.23 -Loads different statistics and displays a comparison. Requires the matplotlib module.\n\n\
   11.24 --------- Example Usage ---------------\n\
   11.25 -./compareStats.py --statFiles ../tmp/natISANB.stats ../tmp/natATPNB.stats -b 30\n\n\
   11.26 -Author: Daniel Kuehlwein, July 2012',formatter_class=RawDescriptionHelpFormatter)
   11.27 -parser.add_argument('--statFiles', default=None, nargs='+',
   11.28 -                    help='The names of the saved statistic files.')
   11.29 -parser.add_argument('-b','--bins',default=50,help="Number of bins for the AUC histogram. Default=50.",type=int)
   11.30 -
   11.31 -def main(argv = sys.argv[1:]):
   11.32 -    args = parser.parse_args(argv)
   11.33 -    if args.statFiles == None:
   11.34 -        print 'Filenames missing.'
   11.35 -        sys.exit(-1)
   11.36 -
   11.37 -    aucData = []
   11.38 -    aucLabels = []
   11.39 -    for statFile in args.statFiles:
   11.40 -        s = Statistics()
   11.41 -        s.load(statFile)
   11.42 -        avgRecall = [float(x)/s.problems for x in s.recallData]
   11.43 -        figure('Recall')
   11.44 -        plot(range(s.cutOff),avgRecall,label=statFile)
   11.45 -        legend(loc='lower right')
   11.46 -        ylabel('Average Recall')
   11.47 -        xlabel('Highest ranked premises')
   11.48 -        axis([0,s.cutOff,0.0,1.0])
   11.49 -        figure('100%Recall')
   11.50 -        plot(range(s.cutOff),s.recall100Data,label=statFile)
   11.51 -        legend(loc='lower right')
   11.52 -        ylabel('100%Recall')
   11.53 -        xlabel('Highest ranked premises')
   11.54 -        axis([0,s.cutOff,0,s.problems])
   11.55 -        aucData.append(s.aucData)
   11.56 -        aucLabels.append(statFile)
   11.57 -    figure('AUC Histogram')
   11.58 -    hist(aucData,bins=args.bins,label=aucLabels,histtype='bar')
   11.59 -    legend(loc='upper left')
   11.60 -    ylabel('Problems')
   11.61 -    xlabel('AUC')
   11.62 -
   11.63 -    show()
   11.64 -
   11.65 -if __name__ == '__main__':
   11.66 -    #args = ['--statFiles','../tmp/natISANB.stats','../tmp/natATPNB.stats','-b','30']
   11.67 -    #sys.exit(main(args))
   11.68 -    sys.exit(main())
    12.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py	Sat Jun 28 22:13:23 2014 +0200
    12.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.3 @@ -1,261 +0,0 @@
    12.4 -#     Title:      HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py
    12.5 -#     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
    12.6 -#     Copyright   2012-2013
    12.7 -#
    12.8 -# Persistent dictionaries: accessibility, dependencies, and features.
    12.9 -
   12.10 -import sys
   12.11 -from os.path import join
   12.12 -from Queue import Queue
   12.13 -from readData import create_accessible_dict,create_dependencies_dict
   12.14 -from cPickle import load,dump
   12.15 -from exceptions import LookupError
   12.16 -
   12.17 -class Dictionaries(object):
   12.18 -    '''
   12.19 -    This class contains all info about name-> id mapping, etc.
   12.20 -    '''
   12.21 -    def __init__(self):
   12.22 -        '''
   12.23 -        Constructor
   12.24 -        '''
   12.25 -        self.nameIdDict = {}
   12.26 -        self.idNameDict = {}
   12.27 -        self.featureIdDict={}
   12.28 -        self.maxNameId = 1
   12.29 -        self.maxFeatureId = 0
   12.30 -        self.featureDict = {}
   12.31 -        self.dependenciesDict = {}
   12.32 -        self.accessibleDict = {}
   12.33 -        self.expandedAccessibles = {}
   12.34 -        self.accFile =  ''
   12.35 -        self.changed = True
   12.36 -        # Unnamed facts
   12.37 -        self.nameIdDict[''] = 0
   12.38 -        self.idNameDict[0] = 'Unnamed Fact'
   12.39 -
   12.40 -    """
   12.41 -    Init functions. nameIdDict, idNameDict, featureIdDict, articleDict get filled!
   12.42 -    """
   12.43 -    def init_featureDict(self,featureFile):
   12.44 -        self.create_feature_dict(featureFile)
   12.45 -        #self.featureDict,self.maxNameId,self.maxFeatureId,self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict =\
   12.46 -        # create_feature_dict(self.nameIdDict,self.idNameDict,self.maxNameId,self.featureIdDict,self.maxFeatureId,self.featureCountDict,\
   12.47 -        #                     self.triggerFeaturesDict,self.featureTriggeredFormulasDict,sineFeatures,featureFile)
   12.48 -    def init_dependenciesDict(self,depFile):
   12.49 -        self.dependenciesDict = create_dependencies_dict(self.nameIdDict,depFile)
   12.50 -    def init_accessibleDict(self,accFile):
   12.51 -        self.accessibleDict,self.maxNameId = create_accessible_dict(self.nameIdDict,self.idNameDict,self.maxNameId,accFile)
   12.52 -
   12.53 -    def init_all(self,args):
   12.54 -        self.featureFileName = 'mash_features'
   12.55 -        self.accFileName = 'mash_accessibility'
   12.56 -        featureFile = join(args.inputDir,self.featureFileName)
   12.57 -        depFile = join(args.inputDir,args.depFile)
   12.58 -        self.accFile = join(args.inputDir,self.accFileName)
   12.59 -        self.init_featureDict(featureFile)
   12.60 -        self.init_accessibleDict(self.accFile)
   12.61 -        self.init_dependenciesDict(depFile)
   12.62 -        self.expandedAccessibles = {}
   12.63 -        self.changed = True
   12.64 -
   12.65 -    def create_feature_dict(self,inputFile):
   12.66 -        self.featureDict = {}
   12.67 -        IS = open(inputFile,'r')
   12.68 -        for line in IS:
   12.69 -            line = line.split(':')
   12.70 -            name = line[0]
   12.71 -            # Name Id
   12.72 -            if self.nameIdDict.has_key(name):
   12.73 -                raise LookupError('%s appears twice in the feature file. Aborting.'% name)
   12.74 -                sys.exit(-1)
   12.75 -            else:
   12.76 -                self.nameIdDict[name] = self.maxNameId
   12.77 -                self.idNameDict[self.maxNameId] = name
   12.78 -                nameId = self.maxNameId
   12.79 -                self.maxNameId += 1
   12.80 -            features = self.get_features(line)
   12.81 -            # Store results
   12.82 -            self.featureDict[nameId] = features
   12.83 -        IS.close()
   12.84 -        return
   12.85 -
   12.86 -    def get_name_id(self,name):
   12.87 -        """
   12.88 -        Return the Id for a name.
   12.89 -        If it doesn't exist yet, a new entry is created.
   12.90 -        """
   12.91 -        if self.nameIdDict.has_key(name):
   12.92 -            nameId = self.nameIdDict[name]
   12.93 -        else:
   12.94 -            self.nameIdDict[name] = self.maxNameId
   12.95 -            self.idNameDict[self.maxNameId] = name
   12.96 -            nameId = self.maxNameId
   12.97 -            self.maxNameId += 1
   12.98 -            self.changed = True
   12.99 -        return nameId
  12.100 -
  12.101 -    def add_feature(self,featureName):
  12.102 -        fMul = featureName.split('|')        
  12.103 -        fIds = []
  12.104 -        for f in fMul:
  12.105 -            if not self.featureIdDict.has_key(f):
  12.106 -                self.featureIdDict[f] = self.maxFeatureId
  12.107 -                self.maxFeatureId += 1
  12.108 -                self.changed = True
  12.109 -            fId = self.featureIdDict[f]
  12.110 -            fIds.append(fId)
  12.111 -        return fIds
  12.112 -
  12.113 -    def get_features(self,line):
  12.114 -        featureNames = [f.strip() for f in line[1].split()]
  12.115 -        features = {}
  12.116 -        for fn in featureNames:
  12.117 -            tmp = fn.split('=')
  12.118 -            weight = 1.0 
  12.119 -            if len(tmp) == 2:
  12.120 -                fn = tmp[0]
  12.121 -                weight = float(tmp[1])
  12.122 -            fIds = self.add_feature(tmp[0])
  12.123 -            features[fIds[0]] = (weight,fIds[1:])
  12.124 -            #features[fId] = 1.0 ###
  12.125 -        return features
  12.126 -
  12.127 -    def expand_accessibles(self,acc):
  12.128 -        accessibles = set(acc)
  12.129 -        unexpandedQueue = Queue()
  12.130 -        for a in acc:
  12.131 -            if self.expandedAccessibles.has_key(a):
  12.132 -                accessibles = accessibles.union(self.expandedAccessibles[a])
  12.133 -            else:
  12.134 -                unexpandedQueue.put(a)
  12.135 -        while not unexpandedQueue.empty():
  12.136 -            nextUnExp = unexpandedQueue.get()
  12.137 -            nextUnExpAcc = self.accessibleDict[nextUnExp]
  12.138 -            for a in nextUnExpAcc:
  12.139 -                if not a in accessibles:
  12.140 -                    accessibles = accessibles.union([a])
  12.141 -                    if self.expandedAccessibles.has_key(a):
  12.142 -                        accessibles = accessibles.union(self.expandedAccessibles[a])
  12.143 -                    else:
  12.144 -                        unexpandedQueue.put(a)
  12.145 -        return list(accessibles)
  12.146 -
  12.147 -    def parse_unExpAcc(self,line):
  12.148 -        try:
  12.149 -            unExpAcc = [self.nameIdDict[a.strip()] for a in line.split()]            
  12.150 -        except:
  12.151 -            raise LookupError('Cannot find the accessibles:%s. Accessibles need to be introduced before referring to them.' % line)
  12.152 -        return unExpAcc
  12.153 -
  12.154 -    def parse_fact(self,line):
  12.155 -        """
  12.156 -        Parses a single line, extracting accessibles, features, and dependencies.
  12.157 -        """
  12.158 -        assert line.startswith('! ')
  12.159 -        line = line[2:]
  12.160 -
  12.161 -        # line = name:accessibles;features;dependencies
  12.162 -        line = line.split(':')
  12.163 -        name = line[0].strip()
  12.164 -        nameId = self.get_name_id(name)
  12.165 -        line = line[1].split(';')
  12.166 -        features = self.get_features(line)
  12.167 -        self.featureDict[nameId] = features
  12.168 -        try:
  12.169 -            self.dependenciesDict[nameId] = [self.nameIdDict[d.strip()] for d in line[2].split()]        
  12.170 -        except:
  12.171 -            unknownDeps = []
  12.172 -            for d in line[2].split():
  12.173 -                if not self.nameIdDict.has_key(d):
  12.174 -                    unknownDeps.append(d)
  12.175 -            raise LookupError('Unknown fact used as dependency: %s. Facts need to be introduced before being used as depedency.' % ','.join(unknownDeps))
  12.176 -        self.accessibleDict[nameId] = self.parse_unExpAcc(line[0])
  12.177 -
  12.178 -        self.changed = True
  12.179 -        return nameId
  12.180 -
  12.181 -    def parse_overwrite(self,line):
  12.182 -        """
  12.183 -        Parses a single line, extracts the problemId and the Ids of the dependencies.
  12.184 -        """
  12.185 -        assert line.startswith('p ')
  12.186 -        line = line[2:]
  12.187 -
  12.188 -        # line = name:dependencies
  12.189 -        line = line.split(':')
  12.190 -        name = line[0].strip()
  12.191 -        try:
  12.192 -            nameId = self.nameIdDict[name]
  12.193 -        except:
  12.194 -            raise LookupError('Trying to overwrite dependencies for unknown fact: %s. Facts need to be introduced before overwriting them.' % name)
  12.195 -        try:
  12.196 -            dependencies = [self.nameIdDict[d.strip()] for d in line[1].split()]
  12.197 -        except:
  12.198 -            unknownDeps = []
  12.199 -            for d in line[1].split():
  12.200 -                if not self.nameIdDict.has_key(d):
  12.201 -                    unknownDeps.append(d)
  12.202 -            raise LookupError('Unknown fact used as dependency: %s. Facts need to be introduced before being used as depedency.' % ','.join(unknownDeps))
  12.203 -        self.changed = True
  12.204 -        return nameId,dependencies
  12.205 -
  12.206 -    def parse_problem(self,line):
  12.207 -        """
  12.208 -        Parses a problem and returns the features, the accessibles, and any hints.
  12.209 -        """
  12.210 -        assert line.startswith('? ')
  12.211 -        line = line[2:]
  12.212 -        name = None
  12.213 -        numberOfPredictions = None
  12.214 -
  12.215 -        # How many predictions should be returned:
  12.216 -        tmp = line.split('#')
  12.217 -        if len(tmp) == 2:
  12.218 -            numberOfPredictions = int(tmp[0].strip())
  12.219 -            line = tmp[1]
  12.220 -
  12.221 -        # Check whether there is a problem name:
  12.222 -        tmp = line.split(':')
  12.223 -        if len(tmp) == 2:
  12.224 -            name = tmp[0].strip()
  12.225 -            line = tmp[1]
  12.226 -
  12.227 -        # line = accessibles;features
  12.228 -        line = line.split(';')
  12.229 -        features = self.get_features(line)
  12.230 -        
  12.231 -        # Accessible Ids, expand and store the accessibles.
  12.232 -        #unExpAcc = [self.nameIdDict[a.strip()] for a in line[0].split()]
  12.233 -        unExpAcc = self.parse_unExpAcc(line[0])        
  12.234 -        if len(self.expandedAccessibles.keys())>=100:
  12.235 -            self.expandedAccessibles = {}
  12.236 -            self.changed = True
  12.237 -        for accId in unExpAcc:
  12.238 -            if not self.expandedAccessibles.has_key(accId):
  12.239 -                accIdAcc = self.accessibleDict[accId]
  12.240 -                self.expandedAccessibles[accId] = self.expand_accessibles(accIdAcc)
  12.241 -                self.changed = True
  12.242 -        accessibles = self.expand_accessibles(unExpAcc)
  12.243 -        
  12.244 -        # Get hints:
  12.245 -        if len(line) == 3:
  12.246 -            hints = [self.nameIdDict[d.strip()] for d in line[2].split()]
  12.247 -        else:
  12.248 -            hints = []
  12.249 -
  12.250 -        return name,features,accessibles,hints,numberOfPredictions
  12.251 -
  12.252 -    def save(self,fileName):
  12.253 -        if self.changed:
  12.254 -            dictsStream = open(fileName, 'wb')
  12.255 -            dump((self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\
  12.256 -                self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict),dictsStream)
  12.257 -            self.changed = False
  12.258 -            dictsStream.close()
  12.259 -    def load(self,fileName):
  12.260 -        dictsStream = open(fileName, 'rb')
  12.261 -        self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\
  12.262 -              self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict = load(dictsStream)
  12.263 -        self.changed = False
  12.264 -        dictsStream.close()
    13.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/mash.py	Sat Jun 28 22:13:23 2014 +0200
    13.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.3 @@ -1,149 +0,0 @@
    13.4 -#!/usr/bin/env python
    13.5 -#     Title:      HOL/Tools/Sledgehammer/MaSh/src/mash
    13.6 -#     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
    13.7 -#     Copyright   2012 - 2013
    13.8 -#
    13.9 -# Entry point for MaSh (Machine Learning for Sledgehammer).
   13.10 -
   13.11 -'''
   13.12 -MaSh - Machine Learning for Sledgehammer
   13.13 -
   13.14 -MaSh allows to use different machine learning algorithms to predict relevant fact for Sledgehammer.
   13.15 -
   13.16 -Created on July 12, 2012
   13.17 -
   13.18 -@author: Daniel Kuehlwein
   13.19 -'''
   13.20 -
   13.21 -import socket,sys,time,logging,os
   13.22 -from os.path import realpath,dirname
   13.23 -
   13.24 -from spawnDaemon import spawnDaemon
   13.25 -from parameters import init_parser
   13.26 -
   13.27 -def communicate(data,host,port):
   13.28 -    logger = logging.getLogger('communicate')
   13.29 -    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)    
   13.30 -    try:
   13.31 -        sock.connect((host,port))
   13.32 -        sock.sendall(data+'\n')        
   13.33 -        received = ''
   13.34 -        cont = True
   13.35 -        counter = 0
   13.36 -        while cont and counter < 100000:
   13.37 -            rec = sock.recv(4096)
   13.38 -            if rec.endswith('stop'):
   13.39 -                cont = False
   13.40 -                received += rec[:-4]
   13.41 -            else:
   13.42 -                received += rec
   13.43 -            counter += 1
   13.44 -        if rec == '':
   13.45 -            logger.warning('No response from server. Check server log for details.')
   13.46 -    except:        
   13.47 -        logger.warning('Communication with server failed.')
   13.48 -        received = -1
   13.49 -    finally:
   13.50 -        sock.close()    
   13.51 -    return received
   13.52 -
   13.53 -def start_server(host,port):
   13.54 -    logger = logging.getLogger('start_server')
   13.55 -    logger.info('Starting Server.')
   13.56 -    path = dirname(realpath(__file__))
   13.57 -    spawnDaemon(os.path.join(path,'server.py'),os.path.join(path,'server.py'),host,str(port))
   13.58 -    serverIsUp=False
   13.59 -    for _i in range(20):
   13.60 -        # Test if server is up
   13.61 -        try:
   13.62 -            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
   13.63 -            sock.connect((host,port))       
   13.64 -            sock.close()
   13.65 -            serverIsUp = True
   13.66 -            break
   13.67 -        except:
   13.68 -            time.sleep(0.5)
   13.69 -    if not serverIsUp:
   13.70 -        logger.error('Could not start server.')
   13.71 -        sys.exit(-1)
   13.72 -    return True
   13.73 -
   13.74 -def mash(argv = sys.argv[1:]):
   13.75 -    # Initializing command-line arguments
   13.76 -    args = init_parser(argv)
   13.77 -    # Set up logging
   13.78 -    logging.basicConfig(level=logging.DEBUG,
   13.79 -                        format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
   13.80 -                        datefmt='%d-%m %H:%M:%S',
   13.81 -                        filename=args.log,
   13.82 -                        filemode='w')    
   13.83 -    logger = logging.getLogger('mash')
   13.84 -
   13.85 -    if args.quiet:
   13.86 -        logger.setLevel(logging.WARNING)
   13.87 -    else:
   13.88 -        console = logging.StreamHandler(sys.stdout)
   13.89 -        console.setLevel(logging.INFO)
   13.90 -        formatter = logging.Formatter('# %(message)s')
   13.91 -        console.setFormatter(formatter)
   13.92 -        logging.getLogger('').addHandler(console)
   13.93 -        
   13.94 -    if not os.path.exists(args.outputDir):
   13.95 -        os.makedirs(args.outputDir)
   13.96 -
   13.97 -    # Shutdown commands need not start the server fist.
   13.98 -    if args.shutdownServer:
   13.99 -        logger.info('Sending shutdown command.')
  13.100 -        try:
  13.101 -            received = communicate('shutdown',args.host,args.port)
  13.102 -            logger.info(received)
  13.103 -        except:
  13.104 -            pass
  13.105 -        return
  13.106 -
  13.107 -    # If server is not running, start it.
  13.108 -    startedServer = False
  13.109 -    received = communicate(' '.join(('ping',args.modelFile,args.dictsFile)),args.host,args.port)
  13.110 -    if received == -1:
  13.111 -        startedServer = start_server(args.host,args.port)
  13.112 -    elif received.startswith('Files do not match'):
  13.113 -        logger.error('Filesnames do not match!')
  13.114 -        logger.error('Modelfile server: '+ received.split()[-2])
  13.115 -        logger.error('Modelfile argument: '+ args.modelFile)
  13.116 -        logger.error('Dictsfile server: '+ received.split()[-1])
  13.117 -        logger.error('Dictsfile argument: '+ args.dictsFile)
  13.118 -        return
  13.119 -    
  13.120 -    if args.init or startedServer:
  13.121 -        logger.info('Initializing Server.')
  13.122 -        data = "i "+";".join(argv)
  13.123 -        received = communicate(data,args.host,args.port)
  13.124 -        logger.info(received)     
  13.125 -    
  13.126 -    if not args.inputFile == None:
  13.127 -        logger.debug('Using the following settings: %s',args)
  13.128 -        # IO Streams
  13.129 -        OS = open(args.predictions,'w')
  13.130 -        IS = open(args.inputFile,'r')
  13.131 -        lineCount = 0
  13.132 -        for line in IS:
  13.133 -            lineCount += 1
  13.134 -            if lineCount % 100 == 0:
  13.135 -                logger.info('On line %s', lineCount)
  13.136 -            received = communicate(line,args.host,args.port)
  13.137 -            if not received == '':
  13.138 -                OS.write('%s\n' % received)
  13.139 -        OS.close()
  13.140 -        IS.close()
  13.141 -        
  13.142 -        # Statistics
  13.143 -        if args.statistics:
  13.144 -            received = communicate('avgStats',args.host,args.port)
  13.145 -            logger.info(received)
  13.146 -
  13.147 -    if args.saveModels:
  13.148 -        communicate('save',args.host,args.port)
  13.149 -
  13.150 -
  13.151 -if __name__ == "__main__":
  13.152 -    sys.exit(mash())
    14.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/parameters.py	Sat Jun 28 22:13:23 2014 +0200
    14.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.3 @@ -1,47 +0,0 @@
    14.4 -import datetime
    14.5 -from argparse import ArgumentParser,RawDescriptionHelpFormatter
    14.6 -
    14.7 -def init_parser(argv):
    14.8 -    # Set up command-line parser
    14.9 -    parser = ArgumentParser(description='MaSh - Machine Learning for Sledgehammer.  \n\n\
   14.10 -    MaSh allows to use different machine learning algorithms to predict relevant facts for Sledgehammer.\n\n\
   14.11 -    --------------- Example Usage ---------------\n\
   14.12 -    First initialize:\n./mash.py -l test.log -o ../tmp/ --init --inputDir ../data/Jinja/ \n\
   14.13 -    Then create predictions:\n./mash.py -i ../data/Jinja/mash_commands -p ../data/Jinja/mash_suggestions -l test.log -o ../tmp/ --statistics\n\
   14.14 -    \n\n\
   14.15 -    Author: Daniel Kuehlwein, July 2012',formatter_class=RawDescriptionHelpFormatter)
   14.16 -    parser.add_argument('-i','--inputFile',help='File containing all problems to be solved.')
   14.17 -    parser.add_argument('-o','--outputDir', default='../tmp/',help='Directory where all created files are stored. Default=../tmp/.')
   14.18 -    parser.add_argument('-p','--predictions',default='../tmp/%s.predictions' % datetime.datetime.now(),
   14.19 -                        help='File where the predictions stored. Default=../tmp/dateTime.predictions.')
   14.20 -    parser.add_argument('--numberOfPredictions',default=500,help="Default number of premises to write in the output. Default=500.",type=int)
   14.21 -    
   14.22 -    parser.add_argument('--init',default=False,action='store_true',help="Initialize Mash. Requires --inputDir to be defined. Default=False.")
   14.23 -    parser.add_argument('--inputDir',\
   14.24 -                        help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility')
   14.25 -    parser.add_argument('--depFile', default='mash_dependencies',
   14.26 -                        help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies')
   14.27 -    
   14.28 -    parser.add_argument('--algorithm',default='nb',help="Which learning algorithm is used. nb = Naive Bayes,KNN,predef=predefined. Default=nb.")
   14.29 -    parser.add_argument('--predef',help="File containing the predefined suggestions. Only used when algorithm = predef.")
   14.30 -    # NB Parameters
   14.31 -    parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float)
   14.32 -    parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float)
   14.33 -    parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float)
   14.34 -    parser.add_argument('--expandFeatures',default=False,action='store_true',help="Learning-based feature expansion. Default=False.")
   14.35 -    
   14.36 -    parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\
   14.37 -                        WARNING: This will make the program a lot slower! Default=False.")
   14.38 -    parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.")
   14.39 -    parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int)
   14.40 -    parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log')
   14.41 -    parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.")
   14.42 -    parser.add_argument('--modelFile', default='../tmp/model.pickle', help='Model file name. Default=../tmp/model.pickle')
   14.43 -    parser.add_argument('--dictsFile', default='../tmp/dict.pickle', help='Dict file name. Default=../tmp/dict.pickle')
   14.44 -    
   14.45 -    parser.add_argument('--port', default='9255', help='Port of the Mash server. Default=9255',type=int)
   14.46 -    parser.add_argument('--host', default='localhost', help='Host of the Mash server. Default=localhost')
   14.47 -    parser.add_argument('--shutdownServer',default=False,action='store_true',help="Shutdown server without saving the models.")
   14.48 -    parser.add_argument('--saveModels',default=False,action='store_true',help="Server saves the models.")
   14.49 -    args = parser.parse_args(argv)    
   14.50 -    return args
    15.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/predefined.py	Sat Jun 28 22:13:23 2014 +0200
    15.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.3 @@ -1,68 +0,0 @@
    15.4 -#     Title:      HOL/Tools/Sledgehammer/MaSh/src/predefined.py
    15.5 -#     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
    15.6 -#     Copyright   2012
    15.7 -#
    15.8 -# A classifier that uses the Meng-Paulson predictions.
    15.9 -
   15.10 -'''
   15.11 -Created on Jul 11, 2012
   15.12 -
   15.13 -@author: Daniel Kuehlwein
   15.14 -'''
   15.15 -
   15.16 -from cPickle import dump,load
   15.17 -
   15.18 -class Predefined(object):
   15.19 -    '''
   15.20 -    A classifier that uses the Meng-Paulson predictions.
   15.21 -    Only used to easily compare statistics between the old Sledgehammer algorithm and the new machine learning ones.
   15.22 -    '''
   15.23 -
   15.24 -    def __init__(self,mpPredictionFile):
   15.25 -        '''
   15.26 -        Constructor
   15.27 -        '''
   15.28 -        self.predictionFile = mpPredictionFile
   15.29 -
   15.30 -    def initializeModel(self,_trainData,dicts):
   15.31 -        """
   15.32 -        Load predictions.
   15.33 -        """
   15.34 -        self.predictions = {}
   15.35 -        IS = open(self.predictionFile,'r')
   15.36 -        for line in IS:
   15.37 -            line = line.split(':')
   15.38 -            name = line[0].strip()
   15.39 -            predId = dicts.get_name_id(name)
   15.40 -            line = line[1].split()
   15.41 -            predsTmp = []
   15.42 -            for x in line:
   15.43 -                x = x.split('=')
   15.44 -                predsTmp.append(x[0])
   15.45 -            preds = [dicts.get_name_id(x.strip())for x in predsTmp]
   15.46 -            self.predictions[predId] = preds
   15.47 -        IS.close()        
   15.48 -
   15.49 -    def update(self,dataPoint,features,dependencies):
   15.50 -        """
   15.51 -        Updates the Model.
   15.52 -        """
   15.53 -        # No Update needed since we assume that we got all predictions
   15.54 -        pass
   15.55 -
   15.56 -
   15.57 -    def predict(self,problemId):
   15.58 -        """
   15.59 -        Return the saved predictions.
   15.60 -        """
   15.61 -        return self.predictions[problemId]
   15.62 -
   15.63 -    def save(self,fileName):
   15.64 -        OStream = open(fileName, 'wb')
   15.65 -        dump((self.predictionFile,self.predictions),OStream)
   15.66 -        OStream.close()
   15.67 -
   15.68 -    def load(self,fileName):
   15.69 -        OStream = open(fileName, 'rb')
   15.70 -        self.predictionFile,self.predictions = load(OStream)
   15.71 -        OStream.close()
    16.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/readData.py	Sat Jun 28 22:13:23 2014 +0200
    16.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.3 @@ -1,58 +0,0 @@
    16.4 -#     Title:      HOL/Tools/Sledgehammer/MaSh/src/readData.py
    16.5 -#     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
    16.6 -#     Copyright   2012
    16.7 -#
    16.8 -# All functions to read the Isabelle output.
    16.9 -
   16.10 -'''
   16.11 -All functions to read the Isabelle output.
   16.12 -
   16.13 -Created on July 9, 2012
   16.14 -
   16.15 -@author: Daniel Kuehlwein
   16.16 -'''
   16.17 -
   16.18 -import sys,logging
   16.19 -
   16.20 -def create_dependencies_dict(nameIdDict,inputFile):
   16.21 -    logger = logging.getLogger('create_dependencies_dict')
   16.22 -    dependenciesDict = {}
   16.23 -    IS = open(inputFile,'r')
   16.24 -    for line in IS:
   16.25 -        line = line.split(':')
   16.26 -        name = line[0]
   16.27 -        # Name Id
   16.28 -        if not nameIdDict.has_key(name):
   16.29 -            logger.warning('%s is missing in nameIdDict. Aborting.',name)
   16.30 -            sys.exit(-1)
   16.31 -
   16.32 -        nameId = nameIdDict[name]
   16.33 -        dependenciesIds = [nameIdDict[f.strip()] for f in line[1].split()]
   16.34 -        # Store results, add p proves p
   16.35 -        if nameId == 0:
   16.36 -            dependenciesDict[nameId] = dependenciesIds
   16.37 -        else:
   16.38 -            dependenciesDict[nameId] = [nameId] + dependenciesIds
   16.39 -    IS.close()
   16.40 -    return dependenciesDict
   16.41 -
   16.42 -def create_accessible_dict(nameIdDict,idNameDict,maxNameId,inputFile):
   16.43 -    logger = logging.getLogger('create_accessible_dict')
   16.44 -    accessibleDict = {}
   16.45 -    IS = open(inputFile,'r')
   16.46 -    for line in IS:
   16.47 -        line = line.split(':')
   16.48 -        name = line[0]
   16.49 -        # Name Id
   16.50 -        if not nameIdDict.has_key(name):
   16.51 -            logger.warning('%s is missing in nameIdDict. Adding it as theory.',name)
   16.52 -            nameIdDict[name] = maxNameId
   16.53 -            idNameDict[maxNameId] = name
   16.54 -            nameId = maxNameId
   16.55 -            maxNameId += 1
   16.56 -        else:
   16.57 -            nameId = nameIdDict[name]
   16.58 -        accessibleStrings = line[1].split()
   16.59 -        accessibleDict[nameId] = [nameIdDict[a.strip()] for a in accessibleStrings]
   16.60 -    IS.close()
   16.61 -    return accessibleDict,maxNameId
    17.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/server.py	Sat Jun 28 22:13:23 2014 +0200
    17.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.3 @@ -1,234 +0,0 @@
    17.4 -#!/usr/bin/env python
    17.5 -#     Title:      HOL/Tools/Sledgehammer/MaSh/src/server.py
    17.6 -#     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
    17.7 -#     Copyright   2013
    17.8 -#
    17.9 -# The MaSh Server.
   17.10 -
   17.11 -import SocketServer,os,string,logging,sys
   17.12 -from multiprocessing import Manager
   17.13 -from threading import Timer
   17.14 -from time import time
   17.15 -from dictionaries import Dictionaries
   17.16 -from parameters import init_parser
   17.17 -from sparseNaiveBayes import sparseNBClassifier
   17.18 -from KNN import KNN,euclidean
   17.19 -from KNNs import KNNAdaptPointFeatures,KNNUrban
   17.20 -#from bayesPlusMetric import sparseNBPlusClassifier
   17.21 -from predefined import Predefined
   17.22 -from ExpandFeatures import ExpandFeatures
   17.23 -from stats import Statistics
   17.24 -
   17.25 -
   17.26 -class ThreadingTCPServer(SocketServer.ThreadingTCPServer):
   17.27 -    
   17.28 -    def __init__(self, *args, **kwargs):
   17.29 -        SocketServer.ThreadingTCPServer.__init__(self,*args, **kwargs)
   17.30 -        self.manager = Manager()
   17.31 -        self.lock = Manager().Lock()
   17.32 -        self.idle_timeout = 28800.0 # 8 hours in seconds
   17.33 -        self.idle_timer = Timer(self.idle_timeout, self.shutdown)
   17.34 -        self.idle_timer.start()        
   17.35 -        self.model = None
   17.36 -        self.dicts = None
   17.37 -        self.callCounter = 0
   17.38 -        
   17.39 -    def save(self):
   17.40 -        if self.model == None or self.dicts == None:
   17.41 -            try:
   17.42 -                self.logger.warning('Cannot save nonexisting models.')
   17.43 -            except:
   17.44 -                pass
   17.45 -            return
   17.46 -        # Save Models
   17.47 -        self.model.save(self.args.modelFile)
   17.48 -        self.dicts.save(self.args.dictsFile)
   17.49 -        if not self.args.saveStats == None:
   17.50 -            statsFile = os.path.join(self.args.outputDir,self.args.saveStats)
   17.51 -            self.stats.save(statsFile)   
   17.52 -               
   17.53 -    def save_and_shutdown(self): 
   17.54 -        self.save()          
   17.55 -        self.shutdown()
   17.56 -
   17.57 -class MaShHandler(SocketServer.StreamRequestHandler):
   17.58 -
   17.59 -    def init(self,argv):
   17.60 -        if argv == '':
   17.61 -            self.server.args = init_parser([])
   17.62 -        else:
   17.63 -            argv = argv.split(';')
   17.64 -            self.server.args = init_parser(argv)
   17.65 -
   17.66 -        # Set up logging
   17.67 -        logging.basicConfig(level=logging.DEBUG,
   17.68 -                            format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
   17.69 -                            datefmt='%d-%m %H:%M:%S',
   17.70 -                            filename=self.server.args.log+'server',
   17.71 -                            filemode='w')    
   17.72 -        self.server.logger = logging.getLogger('server')
   17.73 -            
   17.74 -        # Load all data
   17.75 -        self.server.dicts = Dictionaries()
   17.76 -        if os.path.isfile(self.server.args.dictsFile):
   17.77 -            self.server.dicts.load(self.server.args.dictsFile)
   17.78 -        #elif not self.server.args.dictsFile == '../tmp/dict.pickle':
   17.79 -        #    raise IOError('Cannot find dictsFile at %s '% self.server.args.dictsFile)        
   17.80 -        elif self.server.args.init:
   17.81 -            self.server.dicts.init_all(self.server.args)
   17.82 -        # Pick model
   17.83 -        if self.server.args.algorithm == 'nb':
   17.84 -            ###TODO: !! 
   17.85 -            self.server.model = sparseNBClassifier(self.server.args.NBDefaultPriorWeight,self.server.args.NBPosWeight,self.server.args.NBDefVal)            
   17.86 -            #self.server.model = sparseNBPlusClassifier(self.server.args.NBDefaultPriorWeight,self.server.args.NBPosWeight,self.server.args.NBDefVal)
   17.87 -        elif self.server.args.algorithm == 'KNN':
   17.88 -            #self.server.model = KNN(self.server.dicts)
   17.89 -            self.server.model = KNNAdaptPointFeatures(self.server.dicts)
   17.90 -        elif self.server.args.algorithm == 'predef':
   17.91 -            self.server.model = Predefined(self.server.args.predef)
   17.92 -        else: # Default case
   17.93 -            self.server.model = sparseNBClassifier(self.server.args.NBDefaultPriorWeight,self.server.args.NBPosWeight,self.server.args.NBDefVal)
   17.94 -        if self.server.args.expandFeatures:
   17.95 -            self.server.expandFeatures = ExpandFeatures(self.server.dicts)
   17.96 -            self.server.expandFeatures.initialize(self.server.dicts)
   17.97 -        # Create Model
   17.98 -        if os.path.isfile(self.server.args.modelFile):
   17.99 -            self.server.model.load(self.server.args.modelFile)          
  17.100 -        #elif not self.server.args.modelFile == '../tmp/model.pickle':
  17.101 -        #    raise IOError('Cannot find modelFile at %s '% self.server.args.modelFile)        
  17.102 -        elif self.server.args.init:
  17.103 -            trainData = self.server.dicts.featureDict.keys()
  17.104 -            self.server.model.initializeModel(trainData,self.server.dicts)
  17.105 -           
  17.106 -        if self.server.args.statistics:
  17.107 -            self.server.stats = Statistics(self.server.args.cutOff)
  17.108 -            self.server.statementCounter = 1
  17.109 -            self.server.computeStats = False
  17.110 -
  17.111 -        self.server.logger.debug('Initialized in '+str(round(time()-self.startTime,2))+' seconds.')
  17.112 -        self.request.sendall('Server initialized in '+str(round(time()-self.startTime,2))+' seconds.')
  17.113 -        self.server.callCounter = 1
  17.114 -
  17.115 -    def update(self):
  17.116 -        problemId = self.server.dicts.parse_fact(self.data)    
  17.117 -        # Statistics
  17.118 -        if self.server.args.statistics and self.server.computeStats:
  17.119 -            self.server.computeStats = False
  17.120 -            # Assume '!' comes after '?'
  17.121 -            if self.server.args.algorithm == 'predef':
  17.122 -                self.server.predictions = self.server.model.predict(problemId)
  17.123 -            self.server.stats.update(self.server.predictions,self.server.dicts.dependenciesDict[problemId],self.server.statementCounter)
  17.124 -            if not self.server.stats.badPreds == []:
  17.125 -                bp = string.join([str(self.server.dicts.idNameDict[x]) for x in self.server.stats.badPreds], ',')
  17.126 -                self.server.logger.debug('Poor predictions: %s',bp)
  17.127 -            self.server.statementCounter += 1
  17.128 -
  17.129 -        if self.server.args.expandFeatures:
  17.130 -            self.server.expandFeatures.update(self.server.dicts.featureDict[problemId],self.server.dicts.dependenciesDict[problemId])
  17.131 -        # Update Dependencies, p proves p
  17.132 -        if not problemId == 0:
  17.133 -            self.server.dicts.dependenciesDict[problemId] = [problemId]+self.server.dicts.dependenciesDict[problemId]
  17.134 -        ###TODO: 
  17.135 -        self.server.model.update(problemId,self.server.dicts.featureDict[problemId],self.server.dicts.dependenciesDict[problemId])
  17.136 -        #self.server.model.update(problemId,self.server.dicts.featureDict[problemId],self.server.dicts.dependenciesDict[problemId],self.server.dicts)
  17.137 -
  17.138 -    def overwrite(self):
  17.139 -        # Overwrite old proof.
  17.140 -        problemId,newDependencies = self.server.dicts.parse_overwrite(self.data)
  17.141 -        newDependencies = [problemId]+newDependencies
  17.142 -        self.server.model.overwrite(problemId,newDependencies,self.server.dicts)
  17.143 -        self.server.dicts.dependenciesDict[problemId] = newDependencies
  17.144 -        
  17.145 -    def predict(self):
  17.146 -        self.server.computeStats = True
  17.147 -        if self.server.args.algorithm == 'predef':
  17.148 -            return
  17.149 -        name,features,accessibles,hints,numberOfPredictions = self.server.dicts.parse_problem(self.data)
  17.150 -        if numberOfPredictions == None:
  17.151 -            numberOfPredictions = self.server.args.numberOfPredictions
  17.152 -        if not hints == []:
  17.153 -            self.server.model.update('hints',features,hints)
  17.154 -        if self.server.args.expandFeatures:
  17.155 -            features = self.server.expandFeatures.expand(features)
  17.156 -        # Create predictions
  17.157 -        self.server.logger.debug('Starting computation for line %s',self.server.callCounter)
  17.158 -                
  17.159 -        self.server.predictions,predictionValues = self.server.model.predict(features,accessibles,self.server.dicts)
  17.160 -        assert len(self.server.predictions) == len(predictionValues)
  17.161 -        self.server.logger.debug('Time needed: '+str(round(time()-self.startTime,2)))
  17.162 -
  17.163 -        # Output        
  17.164 -        predictionNames = [str(self.server.dicts.idNameDict[p]) for p in self.server.predictions[:numberOfPredictions]]
  17.165 -        predictionValues = [str(x) for x in predictionValues[:numberOfPredictions]]
  17.166 -        predictionsStringList = ['%s=%s' % (predictionNames[i],predictionValues[i]) for i in range(len(predictionNames))]
  17.167 -        predictionsString = string.join(predictionsStringList,' ')
  17.168 -        #predictionsString = string.join(predictionNames,' ')        
  17.169 -        outString = '%s: %s' % (name,predictionsString)
  17.170 -        self.request.sendall(outString)
  17.171 -    
  17.172 -    def shutdown(self,saveModels=True):
  17.173 -        self.request.sendall('Shutting down server.')
  17.174 -        if saveModels:
  17.175 -            self.server.save()
  17.176 -        self.server.idle_timer.cancel()
  17.177 -        self.server.idle_timer = Timer(0.5, self.server.shutdown)
  17.178 -        self.server.idle_timer.start()    
  17.179 -
  17.180 -    def handle(self):
  17.181 -        # self.request is the TCP socket connected to the client
  17.182 -        self.server.lock.acquire()
  17.183 -        self.data = self.rfile.readline().strip()
  17.184 -        try:
  17.185 -            # Update idle shutdown timer
  17.186 -            self.server.idle_timer.cancel()
  17.187 -            self.server.idle_timer = Timer(self.server.idle_timeout, self.server.save_and_shutdown)
  17.188 -            self.server.idle_timer.start()        
  17.189 -
  17.190 -            self.startTime = time()
  17.191 -            if self.data == 'shutdown':
  17.192 -                self.shutdown()         
  17.193 -            elif self.data == 'save':
  17.194 -                self.server.save()       
  17.195 -            elif self.data.startswith('ping'):
  17.196 -                mFile, dFile = self.data.split()[1:]
  17.197 -                if mFile == self.server.args.modelFile and dFile == self.server.args.dictsFile:
  17.198 -                    self.request.sendall('All good.')
  17.199 -                else:
  17.200 -                    self.request.sendall('Files do not match '+' '.join((self.server.args.modelFile,self.server.args.dictsFile)))
  17.201 -            elif self.data.startswith('i'):            
  17.202 -                self.init(self.data[2:])
  17.203 -            elif self.data.startswith('!'):
  17.204 -                self.update()
  17.205 -            elif self.data.startswith('p'):
  17.206 -                self.overwrite()
  17.207 -            elif self.data.startswith('?'):               
  17.208 -                self.predict()
  17.209 -            elif self.data == '':
  17.210 -                # Empty Socket
  17.211 -                return
  17.212 -            elif self.data == 'avgStats':
  17.213 -                self.request.sendall(self.server.stats.printAvg())            
  17.214 -            else:
  17.215 -                self.request.sendall('Unspecified input format: \n%s',self.data)
  17.216 -            self.server.callCounter += 1
  17.217 -            self.request.sendall('stop')                       
  17.218 -        except: # catch exceptions
  17.219 -            #print 'Caught an error. Check %s for more details' % (self.server.args.log+'server')
  17.220 -            logging.exception('')
  17.221 -        finally:
  17.222 -            self.server.lock.release()
  17.223 -
  17.224 -if __name__ == "__main__":
  17.225 -    if not len(sys.argv[1:]) == 2:
  17.226 -        print 'No Arguments for HOST and PORT found. Using localhost and 9255'
  17.227 -        HOST, PORT = "localhost", 9255
  17.228 -    else:
  17.229 -        HOST, PORT = sys.argv[1:]
  17.230 -    SocketServer.TCPServer.allow_reuse_address = True
  17.231 -    server = ThreadingTCPServer((HOST, int(PORT)), MaShHandler)
  17.232 -    server.serve_forever()        
  17.233 -
  17.234 -
  17.235 -    
  17.236 -    
  17.237 -    
  17.238 \ No newline at end of file
    18.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/singleNaiveBayes.py	Sat Jun 28 22:13:23 2014 +0200
    18.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.3 @@ -1,176 +0,0 @@
    18.4 -#     Title:      HOL/Tools/Sledgehammer/MaSh/src/singleNaiveBayes.py
    18.5 -#     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
    18.6 -#     Copyright   2012
    18.7 -#
    18.8 -# An updatable sparse naive Bayes classifier.
    18.9 -
   18.10 -'''
   18.11 -Created on Jul 11, 2012
   18.12 -
   18.13 -@author: Daniel Kuehlwein
   18.14 -'''
   18.15 -
   18.16 -from cPickle import dump,load
   18.17 -from math import log,exp
   18.18 -
   18.19 -
   18.20 -class singleNBClassifier(object):
   18.21 -    '''
   18.22 -    An updateable naive Bayes classifier.
   18.23 -    '''
   18.24 -
   18.25 -    def __init__(self,defValPos = -7.5,defValNeg = -15.0,posWeight = 10.0):
   18.26 -        '''
   18.27 -        Constructor
   18.28 -        '''
   18.29 -        self.neg = 0.0
   18.30 -        self.pos = 0.0
   18.31 -        self.counts = {} # Counts is the tuple poscounts,negcounts
   18.32 -        self.defValPos = defValPos       
   18.33 -        self.defValNeg = defValNeg
   18.34 -        self.posWeight = posWeight        
   18.35 -    
   18.36 -    def update(self,features,label):
   18.37 -        """
   18.38 -        Updates the Model.
   18.39 -        
   18.40 -        @param label: True or False, True if the features belong to a positive label, false else.
   18.41 -        """
   18.42 -        #print label,self.pos,self.neg,self.counts
   18.43 -        if label:
   18.44 -            self.pos += 1
   18.45 -        else:
   18.46 -            self.neg += 1
   18.47 -        
   18.48 -        for f,_w in features:
   18.49 -            if not self.counts.has_key(f):
   18.50 -                if label:
   18.51 -                    fPosCount = 0.0
   18.52 -                    fNegCount = 0.0
   18.53 -                    self.counts[f] = [fPosCount,fNegCount]
   18.54 -                else:
   18.55 -                    continue
   18.56 -            posCount,negCount = self.counts[f]
   18.57 -            if label:
   18.58 -                posCount += 1
   18.59 -            else:
   18.60 -                negCount += 1
   18.61 -            self.counts[f] = [posCount,negCount]
   18.62 -        #print label,self.pos,self.neg,self.counts
   18.63 -                
   18.64 - 
   18.65 -    def delete(self,features,label):
   18.66 -        """
   18.67 -        Deletes a single datapoint from the model.
   18.68 -        """
   18.69 -        if label:
   18.70 -            self.pos -= 1
   18.71 -        else:
   18.72 -            self.neg -= 1
   18.73 -        for f,_w in features:
   18.74 -            posCount,negCount = self.counts[f]
   18.75 -            if label:
   18.76 -                posCount -= 1
   18.77 -            else:
   18.78 -                negCount -= 1
   18.79 -            self.counts[f] = [posCount,negCount]
   18.80 -
   18.81 -            
   18.82 -    def overwrite(self,features,labelOld,labelNew):
   18.83 -        """
   18.84 -        Deletes the old dependencies of problemId and replaces them with the new ones. Updates the model accordingly.
   18.85 -        """
   18.86 -        self.delete(features,labelOld)
   18.87 -        self.update(features,labelNew)
   18.88 -    
   18.89 -    def predict_sparse(self,features):
   18.90 -        """
   18.91 -        Returns 1 if the probability of + being the correct label is greater than the probability that - is the correct label.
   18.92 -        """
   18.93 -        if self.neg == 0:
   18.94 -            return 1
   18.95 -        elif self.pos ==0:
   18.96 -            return 0
   18.97 -        logneg = log(self.neg)
   18.98 -        lognegprior=log(float(self.neg)/5)
   18.99 -        logpos = log(self.pos)
  18.100 -        prob = logpos - lognegprior
  18.101 -        
  18.102 -        for f,_w in features:
  18.103 -            if self.counts.has_key(f):
  18.104 -                posCount,negCount = self.counts[f]
  18.105 -                if posCount > 0:
  18.106 -                    prob += (log(self.posWeight * posCount) - logpos)
  18.107 -                else:
  18.108 -                    prob += self.defValPos
  18.109 -                if negCount > 0:
  18.110 -                    prob -= (log(negCount) - logneg)
  18.111 -                else:
  18.112 -                    prob -= self.defValNeg 
  18.113 -        if prob >= 0 : 
  18.114 -            return 1
  18.115 -        else:
  18.116 -            return 0
  18.117 -    
  18.118 -    def predict(self,features):    
  18.119 -        """
  18.120 -        Returns 1 if the probability is greater than 50%.
  18.121 -        """
  18.122 -        if self.neg == 0:
  18.123 -            return 1
  18.124 -        elif self.pos ==0:
  18.125 -            return 0
  18.126 -        defVal = -15.0       
  18.127 -        expDefVal = exp(defVal)
  18.128 -        
  18.129 -        logneg = log(self.neg)
  18.130 -        logpos = log(self.pos)
  18.131 -        prob = logpos - logneg
  18.132 -        
  18.133 -        for f in self.counts.keys():
  18.134 -            posCount,negCount = self.counts[f]
  18.135 -            if f in features:
  18.136 -                if posCount == 0:
  18.137 -                    prob += defVal
  18.138 -                else:
  18.139 -                    prob += log(float(posCount)/self.pos)
  18.140 -                if negCount == 0:
  18.141 -                    prob -= defVal
  18.142 -                else:
  18.143 -                    prob -= log(float(negCount)/self.neg)
  18.144 -            else:
  18.145 -                if posCount == self.pos:
  18.146 -                    prob += log(1-expDefVal)
  18.147 -                else:
  18.148 -                    prob += log(1-float(posCount)/self.pos)
  18.149 -                if negCount == self.neg:
  18.150 -                    prob -= log(1-expDefVal)
  18.151 -                else:
  18.152 -                    prob -= log(1-float(negCount)/self.neg)
  18.153 -
  18.154 -        if prob >= 0 : 
  18.155 -            return 1
  18.156 -        else:
  18.157 -            return 0        
  18.158 -        
  18.159 -    def save(self,fileName):
  18.160 -        OStream = open(fileName, 'wb')
  18.161 -        dump(self.counts,OStream)        
  18.162 -        OStream.close()
  18.163 -        
  18.164 -    def load(self,fileName):
  18.165 -        OStream = open(fileName, 'rb')
  18.166 -        self.counts = load(OStream)      
  18.167 -        OStream.close()
  18.168 -
  18.169 -if __name__ == '__main__':
  18.170 -    x = singleNBClassifier()
  18.171 -    x.update([0], True)
  18.172 -    assert x.predict([0]) == 1
  18.173 -    x = singleNBClassifier()
  18.174 -    x.update([0], False)
  18.175 -    assert x.predict([0]) == 0    
  18.176 -    
  18.177 -    x.update([0], True)
  18.178 -    x.update([1], True)
  18.179 -    print x.pos,x.neg,x.predict([0,1])
  18.180 \ No newline at end of file
    19.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/snow.py	Sat Jun 28 22:13:23 2014 +0200
    19.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.3 @@ -1,135 +0,0 @@
    19.4 -#     Title:      HOL/Tools/Sledgehammer/MaSh/src/snow.py
    19.5 -#     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
    19.6 -#     Copyright   2012
    19.7 -#
    19.8 -# Wrapper for SNoW.
    19.9 -
   19.10 -'''
   19.11 -
   19.12 -Created on Jul 12, 2012
   19.13 -
   19.14 -@author: daniel
   19.15 -'''
   19.16 -
   19.17 -import logging,shlex,subprocess,string,shutil
   19.18 -#from cPickle import load,dump
   19.19 -
   19.20 -class SNoW(object):
   19.21 -    '''
   19.22 -    Calls the SNoW framework.
   19.23 -    '''
   19.24 -
   19.25 -    def __init__(self):
   19.26 -        '''
   19.27 -        Constructor
   19.28 -        '''
   19.29 -        self.logger = logging.getLogger('SNoW')
   19.30 -        self.SNoWTrainFile = '../tmp/snow.train'
   19.31 -        self.SNoWTestFile = '../snow.test'
   19.32 -        self.SNoWNetFile = '../tmp/snow.net'
   19.33 -        self.defMaxNameId = 100000
   19.34 -
   19.35 -    def initializeModel(self,trainData,dicts):
   19.36 -        """
   19.37 -        Build basic model from training data.
   19.38 -        """
   19.39 -        # Prepare input files
   19.40 -        self.logger.debug('Creating IO Files')
   19.41 -        OS = open(self.SNoWTrainFile,'w')
   19.42 -        for nameId in trainData:
   19.43 -            features = [f+dicts.maxNameId for f,_w in dicts.featureDict[nameId]]
   19.44 -            #features = [f+self.defMaxNameId for f,_w in dicts.featureDict[nameId]]
   19.45 -            features = map(str,features)
   19.46 -            featureString = string.join(features,',')
   19.47 -            dependencies = dicts.dependenciesDict[nameId]
   19.48 -            dependencies = map(str,dependencies)
   19.49 -            dependenciesString = string.join(dependencies,',')
   19.50 -            snowString = string.join([featureString,dependenciesString],',')+':\n'
   19.51 -            OS.write(snowString)
   19.52 -        OS.close()
   19.53 -
   19.54 -        # Build Model
   19.55 -        self.logger.debug('Building Model START.')
   19.56 -        snowTrainCommand = '../bin/snow -train -M+ -I %s -F %s -g- -B :0-%s' % (self.SNoWTrainFile,self.SNoWNetFile,dicts.maxNameId-1)
   19.57 -        #print snowTrainCommand
   19.58 -        #snowTrainCommand = '../bin/snow -train -M+ -I %s -F %s -g- -B :0-%s' % (self.SNoWTrainFile,self.SNoWNetFile,self.defMaxNameId-1)
   19.59 -        args = shlex.split(snowTrainCommand)
   19.60 -        p = subprocess.Popen(args,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
   19.61 -        p.wait()
   19.62 -        self.logger.debug('Building Model END.')
   19.63 -
   19.64 -    def update(self,dataPoint,features,dependencies,dicts):
   19.65 -        """
   19.66 -        Updates the Model.
   19.67 -        """
   19.68 -        """
   19.69 -        self.logger.debug('Updating Model START')
   19.70 -        # Ignore Feature weights        
   19.71 -        features = [f+self.defMaxNameId for f,_w in features]
   19.72 -        
   19.73 -        OS = open(self.SNoWTestFile,'w')
   19.74 -        features = map(str,features)
   19.75 -        featureString = string.join(features, ',')
   19.76 -        dependencies = map(str,dependencies)
   19.77 -        dependenciesString = string.join(dependencies,',')
   19.78 -        snowString = string.join([featureString,dependenciesString],',')+':\n'
   19.79 -        OS.write(snowString)
   19.80 -        OS.close()
   19.81 -        snowTestCommand = '../bin/snow -test -I %s -F %s -o allboth -i+' % (self.SNoWTestFile,self.SNoWNetFile) 
   19.82 -        args = shlex.split(snowTestCommand)
   19.83 -        p = subprocess.Popen(args,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
   19.84 -        (_lines, _stderrdata) = p.communicate()
   19.85 -        # Move new net file        
   19.86 -        src = self.SNoWNetFile+'.new'
   19.87 -        dst = self.SNoWNetFile
   19.88 -        shutil.move(src, dst)        
   19.89 -        self.logger.debug('Updating Model END')
   19.90 -        """
   19.91 -        # Do nothing, only update at evaluation. Is a lot faster.
   19.92 -        pass
   19.93 -
   19.94 -
   19.95 -    def predict(self,features,accessibles,dicts):
   19.96 -        trainData = dicts.featureDict.keys()
   19.97 -        self.initializeModel(trainData, dicts)        
   19.98 -        
   19.99 -        logger = logging.getLogger('predict_SNoW')        
  19.100 -        # Ignore Feature weights
  19.101 -        #features = [f+self.defMaxNameId for f,_w in features]
  19.102 -        features = [f+dicts.maxNameId for f,_w in features]
  19.103 -
  19.104 -        OS = open(self.SNoWTestFile,'w')
  19.105 -        features = map(str,features)
  19.106 -        featureString = string.join(features, ',')
  19.107 -        snowString = featureString+':'
  19.108 -        OS.write(snowString)
  19.109 -        OS.close()
  19.110 -
  19.111 -        snowTestCommand = '../bin/snow -test -I %s -F %s -o allboth' % (self.SNoWTestFile,self.SNoWNetFile)
  19.112 -        args = shlex.split(snowTestCommand)
  19.113 -        p = subprocess.Popen(args,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
  19.114 -        (lines, _stderrdata) = p.communicate()
  19.115 -        logger.debug('SNoW finished.')
  19.116 -        lines = lines.split('\n')
  19.117 -        assert lines[9].startswith('Example ')
  19.118 -        assert lines[-4] == ''
  19.119 -        predictionsCon = []
  19.120 -        predictionsValues = []
  19.121 -        for line in lines[10:-4]:
  19.122 -            premiseId = int(line.split()[0][:-1])
  19.123 -            predictionsCon.append(premiseId)
  19.124 -            val = line.split()[4]
  19.125 -            if val.endswith('*'):
  19.126 -                val = float(val[:-1])
  19.127 -            else:
  19.128 -                val = float(val)
  19.129 -            predictionsValues.append(val)
  19.130 -        return predictionsCon,predictionsValues
  19.131 -
  19.132 -    def save(self,fileName):
  19.133 -        # Nothing to do since we don't update
  19.134 -        pass
  19.135 -    
  19.136 -    def load(self,fileName):
  19.137 -        # Nothing to do since we don't update
  19.138 -        pass
    20.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py	Sat Jun 28 22:13:23 2014 +0200
    20.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.3 @@ -1,179 +0,0 @@
    20.4 -#     Title:      HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py
    20.5 -#     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
    20.6 -#     Copyright   2012
    20.7 -#
    20.8 -# An updatable sparse naive Bayes classifier.
    20.9 -
   20.10 -'''
   20.11 -Created on Jul 11, 2012
   20.12 -
   20.13 -@author: Daniel Kuehlwein
   20.14 -'''
   20.15 -from cPickle import dump,load
   20.16 -from numpy import array
   20.17 -from math import log
   20.18 -
   20.19 -class sparseNBClassifier(object):
   20.20 -    '''
   20.21 -    An updateable naive Bayes classifier.
   20.22 -    '''
   20.23 -
   20.24 -    def __init__(self,defaultPriorWeight = 20.0,posWeight = 20.0,defVal = -15.0):
   20.25 -        '''
   20.26 -        Constructor
   20.27 -        '''
   20.28 -        self.counts = {}
   20.29 -        self.defaultPriorWeight = defaultPriorWeight
   20.30 -        self.posWeight = posWeight
   20.31 -        self.defVal = defVal
   20.32 -
   20.33 -    def initializeModel(self,trainData,dicts):
   20.34 -        """
   20.35 -        Build basic model from training data.
   20.36 -        """
   20.37 -        for d in trainData:            
   20.38 -            dFeatureCounts = {}
   20.39 -            # Add p proves p with weight self.defaultPriorWeight
   20.40 -            if not self.defaultPriorWeight == 0:            
   20.41 -                for f in dicts.featureDict[d].iterkeys():
   20.42 -                    dFeatureCounts[f] = self.defaultPriorWeight
   20.43 -            self.counts[d] = [self.defaultPriorWeight,dFeatureCounts]
   20.44 -
   20.45 -        for key,keyDeps in dicts.dependenciesDict.iteritems():
   20.46 -            keyFeatures = dicts.featureDict[key]
   20.47 -            for dep in keyDeps:
   20.48 -                self.counts[dep][0] += 1
   20.49 -                #depFeatures = dicts.featureDict[key]
   20.50 -                for f in keyFeatures.iterkeys():
   20.51 -                    if self.counts[dep][1].has_key(f):
   20.52 -                        self.counts[dep][1][f] += 1
   20.53 -                    else:
   20.54 -                        self.counts[dep][1][f] = 1
   20.55 -
   20.56 -
   20.57 -    def update(self,dataPoint,features,dependencies):
   20.58 -        """
   20.59 -        Updates the Model.
   20.60 -        """
   20.61 -        if (not self.counts.has_key(dataPoint)) and (not dataPoint == 0):
   20.62 -            dFeatureCounts = {}            
   20.63 -            # Give p |- p a higher weight
   20.64 -            if not self.defaultPriorWeight == 0:               
   20.65 -                for f in features.iterkeys():
   20.66 -                    dFeatureCounts[f] = self.defaultPriorWeight
   20.67 -            self.counts[dataPoint] = [self.defaultPriorWeight,dFeatureCounts]            
   20.68 -        for dep in dependencies:
   20.69 -            self.counts[dep][0] += 1
   20.70 -            for f in features.iterkeys():
   20.71 -                if self.counts[dep][1].has_key(f):
   20.72 -                    self.counts[dep][1][f] += 1
   20.73 -                else:
   20.74 -                    self.counts[dep][1][f] = 1
   20.75 -
   20.76 -    def delete(self,dataPoint,features,dependencies):
   20.77 -        """
   20.78 -        Deletes a single datapoint from the model.
   20.79 -        """
   20.80 -        for dep in dependencies:
   20.81 -            self.counts[dep][0] -= 1
   20.82 -            for f,_w in features.items():
   20.83 -                self.counts[dep][1][f] -= 1
   20.84 -                if self.counts[dep][1][f] == 0:
   20.85 -                    del self.counts[dep][1][f]
   20.86 -
   20.87 -
   20.88 -    def overwrite(self,problemId,newDependencies,dicts):
   20.89 -        """
   20.90 -        Deletes the old dependencies of problemId and replaces them with the new ones. Updates the model accordingly.
   20.91 -        """
   20.92 -        try:
   20.93 -            assert self.counts.has_key(problemId)
   20.94 -        except:
   20.95 -            raise LookupError('Trying to overwrite dependencies for unknown fact: %s. Facts need to be introduced before overwriting them.' % dicts.idNameDict[problemId])
   20.96 -        oldDeps = dicts.dependenciesDict[problemId]
   20.97 -        features = dicts.featureDict[problemId]
   20.98 -        self.delete(problemId,features,oldDeps)
   20.99 -        self.update(problemId,features,newDependencies)
  20.100 -
  20.101 -    def predict(self,features,accessibles,dicts):
  20.102 -        """
  20.103 -        For each accessible, predicts the probability of it being useful given the features.
  20.104 -        Returns a ranking of the accessibles.
  20.105 -        """
  20.106 -        tau = 0.05 # Jasmin, change value here
  20.107 -        predictions = []
  20.108 -        observedFeatures = features.keys()
  20.109 -        for fVal in features.itervalues():
  20.110 -            _w,alternateF = fVal
  20.111 -            observedFeatures += alternateF
  20.112 -            
  20.113 -        for a in accessibles:
  20.114 -            posA = self.counts[a][0]
  20.115 -            fA = set(self.counts[a][1].keys())
  20.116 -            fWeightsA = self.counts[a][1]
  20.117 -            resultA = log(posA)
  20.118 -            for f,fVal in features.iteritems():
  20.119 -                w,alternateF = fVal
  20.120 -                # DEBUG
  20.121 -                #w = 1.0
  20.122 -                # Test for multiple features
  20.123 -                isMatch = False
  20.124 -                matchF = None
  20.125 -                if f in fA:
  20.126 -                    isMatch = True
  20.127 -                    matchF = f
  20.128 -                elif len(alternateF) > 0:
  20.129 -                    inter = set(alternateF).intersection(fA)
  20.130 -                    if len(inter) > 0:
  20.131 -                        isMatch = True
  20.132 -                        for mF in inter:
  20.133 -                            ### TODO: matchF is randomly selected
  20.134 -                            matchF = mF
  20.135 -                            break
  20.136 -                 
  20.137 -                if isMatch:
  20.138 -                #if f in fA:
  20.139 -                    if fWeightsA[matchF] == 0:
  20.140 -                        resultA += w*self.defVal
  20.141 -                    else:
  20.142 -                        assert fWeightsA[matchF] <= posA
  20.143 -                        resultA += w*log(float(self.posWeight*fWeightsA[matchF])/posA)
  20.144 -                else:
  20.145 -                    resultA += w*self.defVal
  20.146 -            if not tau == 0.0:
  20.147 -                missingFeatures = list(fA.difference(observedFeatures))
  20.148 -                #sumOfWeights = sum([log(float(fWeightsA[x])/posA) for x in missingFeatures])  # slower
  20.149 -                sumOfWeights = sum([log(float(fWeightsA[x])) for x in missingFeatures]) - log(posA) * len(missingFeatures) #DEFAULT
  20.150 -                #sumOfWeights = sum([log(float(fWeightsA[x])/self.totalFeatureCounts[x]) for x in missingFeatures]) - log(posA) * len(missingFeatures)
  20.151 -                resultA -= tau * sumOfWeights
  20.152 -            predictions.append(resultA)
  20.153 -        predictions = array(predictions)
  20.154 -        perm = (-predictions).argsort()
  20.155 -        return array(accessibles)[perm],predictions[perm]
  20.156 -
  20.157 -    def save(self,fileName):
  20.158 -        OStream = open(fileName, 'wb')
  20.159 -        dump((self.counts,self.defaultPriorWeight,self.posWeight,self.defVal),OStream)
  20.160 -        OStream.close()
  20.161 -
  20.162 -    def load(self,fileName):
  20.163 -        OStream = open(fileName, 'rb')
  20.164 -        self.counts,self.defaultPriorWeight,self.posWeight,self.defVal = load(OStream)
  20.165 -        OStream.close()
  20.166 -
  20.167 -
  20.168 -if __name__ == '__main__':
  20.169 -    featureDict = {0:[0,1,2],1:[3,2,1]}
  20.170 -    dependenciesDict = {0:[0],1:[0,1]}
  20.171 -    libDicts = (featureDict,dependenciesDict,{})
  20.172 -    c = sparseNBClassifier()
  20.173 -    c.initializeModel([0,1],libDicts)
  20.174 -    c.update(2,[14,1,3],[0,2])
  20.175 -    print c.counts
  20.176 -    print c.predict([0,14],[0,1,2])
  20.177 -    c.storeModel('x')
  20.178 -    d = sparseNBClassifier()
  20.179 -    d.loadModel('x')
  20.180 -    print c.counts
  20.181 -    print d.counts
  20.182 -    print 'Done'
    21.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/spawnDaemon.py	Sat Jun 28 22:13:23 2014 +0200
    21.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.3 @@ -1,36 +0,0 @@
    21.4 -# http://stackoverflow.com/questions/972362/spawning-process-from-python/972383#972383
    21.5 -import os
    21.6 -
    21.7 -def spawnDaemon(path_to_executable, *args):
    21.8 -    """Spawn a completely detached subprocess (i.e., a daemon).
    21.9 -
   21.10 -    E.g. for mark:
   21.11 -    spawnDaemon("../bin/producenotify.py", "producenotify.py", "xx")
   21.12 -    """
   21.13 -    # fork the first time (to make a non-session-leader child process)
   21.14 -    try:
   21.15 -        pid = os.fork()
   21.16 -    except OSError, e:
   21.17 -        raise RuntimeError("1st fork failed: %s [%d]" % (e.strerror, e.errno))
   21.18 -    if pid != 0:
   21.19 -        # parent (calling) process is all done
   21.20 -        return
   21.21 -
   21.22 -    # detach from controlling terminal (to make child a session-leader)
   21.23 -    os.setsid()
   21.24 -    try:
   21.25 -        pid = os.fork()
   21.26 -    except OSError, e:
   21.27 -        raise RuntimeError("2nd fork failed: %s [%d]" % (e.strerror, e.errno))
   21.28 -        raise Exception, "%s [%d]" % (e.strerror, e.errno)
   21.29 -    if pid != 0:
   21.30 -        # child process is all done
   21.31 -        os._exit(0)
   21.32 -
   21.33 -    # and finally let's execute the executable for the daemon!
   21.34 -    try:        
   21.35 -        #os.execv(path_to_executable, [path_to_executable])
   21.36 -        os.execv(path_to_executable, args)
   21.37 -    except Exception, e:
   21.38 -        # oops, we're cut off from the world, let's just give up
   21.39 -        os._exit(255)
    22.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/stats.py	Sat Jun 28 22:13:23 2014 +0200
    22.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.3 @@ -1,155 +0,0 @@
    22.4 -#     Title:      HOL/Tools/Sledgehammer/MaSh/src/stats.py
    22.5 -#     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
    22.6 -#     Copyright   2012
    22.7 -#
    22.8 -# Statistics collector.
    22.9 -
   22.10 -'''
   22.11 -Created on Jul 9, 2012
   22.12 -
   22.13 -@author: Daniel Kuehlwein
   22.14 -'''
   22.15 -
   22.16 -import logging,string
   22.17 -from cPickle import load,dump
   22.18 -
   22.19 -class Statistics(object):
   22.20 -    '''
   22.21 -    Class for all the statistics
   22.22 -    '''
   22.23 -
   22.24 -    def __init__(self,cutOff=500):
   22.25 -        '''
   22.26 -        Constructor
   22.27 -        '''
   22.28 -        self.logger = logging.getLogger('Statistics')
   22.29 -        self.avgAUC = 0.0
   22.30 -        self.avgRecall100 = 0.0
   22.31 -        self.avgAvailable = 0.0
   22.32 -        self.avgDepNr = 0.0
   22.33 -        self.problems = 0.0
   22.34 -        self.cutOff = cutOff
   22.35 -        self.recallData = [0]*cutOff
   22.36 -        self.recall100Median = []
   22.37 -        self.recall100Data = [0]*cutOff
   22.38 -        self.aucData = []
   22.39 -        self.premiseOccurenceCounter = {}
   22.40 -        self.firstDepAppearance = {}
   22.41 -        self.depAppearances = []
   22.42 -
   22.43 -    def update(self,predictions,dependencies,statementCounter):
   22.44 -        """
   22.45 -        Evaluates AUC, dependencies, recall100 and number of available premises of a prediction.
   22.46 -        """
   22.47 -        available = len(predictions)
   22.48 -        predictions = predictions[:self.cutOff]
   22.49 -        dependencies = set(dependencies)
   22.50 -        # No Stats for if no dependencies
   22.51 -        if len(dependencies) == 0:
   22.52 -            self.logger.debug('No Dependencies for statement %s' % statementCounter )
   22.53 -            self.badPreds = []
   22.54 -            return
   22.55 -        if len(predictions) < self.cutOff:
   22.56 -            for i in range(len(predictions),self.cutOff):
   22.57 -                self.recall100Data[i] += 1
   22.58 -                self.recallData[i] += 1
   22.59 -        for d in dependencies:
   22.60 -            if self.premiseOccurenceCounter.has_key(d):
   22.61 -                self.premiseOccurenceCounter[d] += 1
   22.62 -            else:
   22.63 -                self.premiseOccurenceCounter[d] = 1
   22.64 -            if self.firstDepAppearance.has_key(d):
   22.65 -                self.depAppearances.append(statementCounter-self.firstDepAppearance[d])
   22.66 -            else:
   22.67 -                self.firstDepAppearance[d] = statementCounter
   22.68 -        depNr = len(dependencies)
   22.69 -        aucSum = 0.
   22.70 -        posResults = 0.
   22.71 -        positives, negatives = 0, 0
   22.72 -        recall100 = 0.0
   22.73 -        badPreds = []
   22.74 -        depsFound = []
   22.75 -        for index,pId in enumerate(predictions):
   22.76 -            if pId in dependencies:        #positive
   22.77 -                posResults+=1
   22.78 -                positives+=1
   22.79 -                recall100 = index+1
   22.80 -                depsFound.append(pId)
   22.81 -                if index > 200:
   22.82 -                    badPreds.append(pId)
   22.83 -            else:
   22.84 -                aucSum += posResults
   22.85 -                negatives+=1
   22.86 -            # Update Recall and Recall100 stats
   22.87 -            if depNr == positives:
   22.88 -                self.recall100Data[index] += 1
   22.89 -            if depNr == 0:
   22.90 -                self.recallData[index] += 1
   22.91 -            else:
   22.92 -                self.recallData[index] += float(positives)/depNr
   22.93 -
   22.94 -        if not depNr == positives:
   22.95 -            depsFound = set(depsFound)
   22.96 -            missing = []
   22.97 -            for dep in dependencies:
   22.98 -                if not dep in depsFound:
   22.99 -                    missing.append(dep)
  22.100 -                    badPreds.append(dep)
  22.101 -                    recall100 = len(predictions)+1
  22.102 -                    positives+=1
  22.103 -            self.logger.debug('Dependencies missing for %s in cutoff predictions! Estimating Statistics.',\
  22.104 -                              string.join([str(dep) for dep in missing],','))
  22.105 -
  22.106 -        if positives == 0 or negatives == 0:
  22.107 -            auc = 1.0
  22.108 -        else:
  22.109 -            auc = aucSum/(negatives*positives)
  22.110 -
  22.111 -        self.aucData.append(auc)
  22.112 -        self.avgAUC += auc
  22.113 -        self.avgRecall100 += recall100
  22.114 -        self.recall100Median.append(recall100)
  22.115 -        self.problems += 1
  22.116 -        self.badPreds = badPreds
  22.117 -        self.avgAvailable += available
  22.118 -        self.avgDepNr += depNr
  22.119 -        self.logger.info('Statement: %s: AUC: %s \t Needed: %s \t Recall100: %s \t Available: %s \t cutOff: %s',\
  22.120 -                          statementCounter,round(100*auc,2),depNr,recall100,available,self.cutOff)
  22.121 -
  22.122 -    def printAvg(self):
  22.123 -        self.logger.info('Average results:')
  22.124 -        #self.logger.info('avgAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t cutOff:%s', \
  22.125 -        #                 round(100*self.avgAUC/self.problems,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),self.cutOff)
  22.126 -        # HACK FOR PAPER
  22.127 -        assert len(self.aucData) == len(self.recall100Median)
  22.128 -        nrDataPoints = len(self.aucData)
  22.129 -        if nrDataPoints == 0:
  22.130 -            return "No data points"
  22.131 -        if nrDataPoints % 2 == 1:
  22.132 -            medianAUC = sorted(self.aucData)[nrDataPoints/2 + 1]
  22.133 -        else:
  22.134 -            medianAUC = float(sorted(self.aucData)[nrDataPoints/2] + sorted(self.aucData)[nrDataPoints/2 + 1])/2
  22.135 -        #nrDataPoints = len(self.recall100Median)
  22.136 -        if nrDataPoints % 2 == 1:
  22.137 -            medianrecall100 = sorted(self.recall100Median)[nrDataPoints/2 + 1]
  22.138 -        else:
  22.139 -            medianrecall100 = float(sorted(self.recall100Median)[nrDataPoints/2] + sorted(self.recall100Median)[nrDataPoints/2 + 1])/2
  22.140 -
  22.141 -        returnString = 'avgAUC: %s \t medianAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t medianRecall100: %s \t cutOff: %s' %\
  22.142 -                         (round(100*self.avgAUC/self.problems,2),round(100*medianAUC,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),round(medianrecall100,2),self.cutOff)
  22.143 -        self.logger.info(returnString)
  22.144 -        return returnString
  22.145 -    
  22.146 -        """
  22.147 -        self.logger.info('avgAUC: %s \t medianAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t medianRecall100: %s \t cutOff:%s', \
  22.148 -                         round(100*self.avgAUC/self.problems,2),round(100*medianAUC,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),round(medianrecall100,2),self.cutOff)
  22.149 -        """
  22.150 -
  22.151 -    def save(self,fileName):
  22.152 -        oStream = open(fileName, 'wb')
  22.153 -        dump((self.avgAUC,self.avgRecall100,self.avgAvailable,self.avgDepNr,self.problems,self.cutOff,self.recallData,self.recall100Data,self.aucData,self.premiseOccurenceCounter),oStream)
  22.154 -        oStream.close()
  22.155 -    def load(self,fileName):
  22.156 -        iStream = open(fileName, 'rb')
  22.157 -        self.avgAUC,self.avgRecall100,self.avgAvailable,self.avgDepNr,self.problems,self.cutOff,self.recallData,self.recall100Data,self.aucData,self.premiseOccurenceCounter = load(iStream)
  22.158 -        iStream.close()
    23.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/tester.py	Sat Jun 28 22:13:23 2014 +0200
    23.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.3 @@ -1,281 +0,0 @@
    23.4 -'''
    23.5 -Created on Jan 11, 2013
    23.6 -
    23.7 -Searches for the best parameters.
    23.8 -
    23.9 -@author: Daniel Kuehlwein
   23.10 -'''
   23.11 -
   23.12 -import logging,sys,os
   23.13 -from multiprocessing import Process,Queue,current_process,cpu_count
   23.14 -from mash import mash
   23.15 -
   23.16 -def worker(inQueue, outQueue):
   23.17 -    for func, args in iter(inQueue.get, 'STOP'):        
   23.18 -        result = func(*args)
   23.19 -        #print '%s says that %s%s = %s' % (current_process().name, func.__name__, args, result)
   23.20 -        outQueue.put(result)
   23.21 -
   23.22 -def run_mash(runId,inputDir,logFile,predictionFile,predef,\
   23.23 -             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
   23.24 -             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
   23.25 -             sineFeatures,sineWeight,quiet=True):
   23.26 -    # Init
   23.27 -    runId = str(runId)
   23.28 -    predictionFile = predictionFile + runId
   23.29 -    args = ['--statistics','--init','--inputDir',inputDir,'--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,
   23.30 -            '--theoryDefValPos',str(theoryDefValPos),'--theoryDefValNeg',str(theoryDefValNeg),'--theoryPosWeight',str(theoryPosWeight),\
   23.31 -            '--NBDefaultPriorWeight',str(NBDefaultPriorWeight),'--NBDefVal',str(NBDefVal),'--NBPosWeight',str(NBPosWeight)]    
   23.32 -    if learnTheories:
   23.33 -        args += ['--learnTheories']    
   23.34 -    if sineFeatures:
   23.35 -        args += ['--sineFeatures','--sineWeight',str(sineWeight)]
   23.36 -    if not predef == '':
   23.37 -        args += ['--predef',predef]
   23.38 -    if quit:
   23.39 -        args += ['-q']
   23.40 -    #print args
   23.41 -    mash(args)
   23.42 -    # Run
   23.43 -    args = ['-i',inputFile,'-p',predictionFile,'--statistics','--cutOff','1024','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,\
   23.44 -            '--theoryDefValPos',str(theoryDefValPos),'--theoryDefValNeg',str(theoryDefValNeg),'--theoryPosWeight',str(theoryPosWeight),\
   23.45 -            '--NBDefaultPriorWeight',str(NBDefaultPriorWeight),'--NBDefVal',str(NBDefVal),'--NBPosWeight',str(NBPosWeight)]
   23.46 -    if learnTheories:
   23.47 -        args += ['--learnTheories']    
   23.48 -    if sineFeatures:
   23.49 -        args += ['--sineFeatures','--sineWeight',str(sineWeight)]
   23.50 -    if not predef == '':
   23.51 -        args += ['--predef',predef]
   23.52 -    if quit:
   23.53 -        args += ['-q']
   23.54 -    #print args
   23.55 -    mash(args)
   23.56 -
   23.57 -    # Get Results
   23.58 -    IS = open(logFile,'r')
   23.59 -    lines =  IS.readlines()
   23.60 -    tmpRes = lines[-1].split()
   23.61 -    avgAuc = tmpRes[5]
   23.62 -    medianAuc = tmpRes[7]
   23.63 -    avgRecall100 = tmpRes[11]
   23.64 -    medianRecall100 = tmpRes[13]
   23.65 -    tmpTheoryRes = lines[-3].split()
   23.66 -    if learnTheories:
   23.67 -        avgTheoryPrecision = tmpTheoryRes[5] 
   23.68 -        avgTheoryRecall100 = tmpTheoryRes[7]
   23.69 -        avgTheoryRecall = tmpTheoryRes[9]
   23.70 -        avgTheoryPredictedPercent = tmpTheoryRes[11]
   23.71 -    else:
   23.72 -        avgTheoryPrecision = 'NA' 
   23.73 -        avgTheoryRecall100 = 'NA'
   23.74 -        avgTheoryRecall = 'NA'
   23.75 -        avgTheoryPredictedPercent = 'NA'    
   23.76 -    IS.close()
   23.77 -    
   23.78 -    # Delete old models
   23.79 -    os.remove(logFile)
   23.80 -    os.remove(predictionFile)
   23.81 -    if learnTheories:
   23.82 -        os.remove('../tmp/t'+runId)
   23.83 -    os.remove('../tmp/m'+runId)
   23.84 -    os.remove('../tmp/d'+runId)
   23.85 -    
   23.86 -    outFile = open('tester','a')
   23.87 -    #print 'avgAuc %s avgRecall100 %s avgTheoryPrecision %s avgTheoryRecall100 %s avgTheoryRecall %s avgTheoryPredictedPercent %s'
   23.88 -    outFile.write('\t'.join([str(learnTheories),str(theoryDefValPos),str(theoryDefValNeg),str(theoryPosWeight),\
   23.89 -                             str(NBDefaultPriorWeight),str(NBDefVal),str(NBPosWeight),str(sineFeatures),str(sineWeight),\
   23.90 -                             str(avgAuc),str(medianAuc),str(avgRecall100),str(medianRecall100),str(avgTheoryPrecision),\
   23.91 -                             str(avgTheoryRecall100),str(avgTheoryRecall),str(avgTheoryPredictedPercent)])+'\n')
   23.92 -    outFile.close()
   23.93 -    print learnTheories,'\t',theoryDefValPos,'\t',theoryDefValNeg,'\t',theoryPosWeight,'\t',\
   23.94 -        NBDefaultPriorWeight,'\t',NBDefVal,'\t',NBPosWeight,'\t',\
   23.95 -        sineFeatures,'\t',sineWeight,'\t',\
   23.96 -        avgAuc,'\t',medianAuc,'\t',avgRecall100,'\t',medianRecall100,'\t',\
   23.97 -        avgTheoryPrecision,'\t',avgTheoryRecall100,'\t',avgTheoryRecall,'\t',avgTheoryPredictedPercent    
   23.98 -    return learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
   23.99 -             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
  23.100 -             sineFeatures,sineWeight,\
  23.101 -             avgAuc,avgRecall100,avgTheoryPrecision,avgTheoryRecall100,avgTheoryRecall,avgTheoryPredictedPercent 
  23.102 -
  23.103 -def update_best_params(avgRecall100,bestAvgRecall100,\
  23.104 -                       bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight,\
  23.105 -                       bestlearnTheories,besttheoryDefValPos,besttheoryDefValNeg,besttheoryPosWeight,\
  23.106 -                       NBDefaultPriorWeight,NBDefVal,NBPosWeight,sineFeatures,sineWeight,\
  23.107 -                       learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight):
  23.108 -                        if avgRecall100 > bestAvgRecall100:
  23.109 -                            bestAvgRecall100 = avgRecall100
  23.110 -                            bestNBDefaultPriorWeight = NBDefaultPriorWeight
  23.111 -                            bestNBDefVal = NBDefVal
  23.112 -                            bestNBPosWeight = NBPosWeight
  23.113 -                            bestSineFeatures = sineFeatures
  23.114 -                            bestSineWeight = sineWeight  
  23.115 -                        return bestlearnTheories,besttheoryDefValPos,besttheoryDefValNeg,besttheoryPosWeight,bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight
  23.116 -
  23.117 -if __name__ == '__main__':
  23.118 -    cores = cpu_count()
  23.119 -    #cores = 1
  23.120 -    # Options
  23.121 -    depFile = 'mash_dependencies'
  23.122 -    predef = ''
  23.123 -    outputDir = '../tmp/'
  23.124 -    numberOfPredictions = 1024
  23.125 -    
  23.126 -    learnTheoriesRange = [True,False]
  23.127 -    theoryDefValPosRange = [-x for x in range(1,20)]
  23.128 -    theoryDefValNegRange = [-x for x in range(1,20)]
  23.129 -    theoryPosWeightRange = [x for x in range(1,10)]
  23.130 -    
  23.131 -    NBDefaultPriorWeightRange = [10*x for x in range(10)]
  23.132 -    NBDefValRange = [-x for x in range(1,20)]
  23.133 -    NBPosWeightRange = [10*x for x in range(1,10)]
  23.134 -    sineFeaturesRange = [True,False]    
  23.135 -    sineWeightRange = [0.1,0.25,0.5,0.75,1.0]
  23.136 -    
  23.137 -    """
  23.138 -    # Test 1
  23.139 -    inputFile = '../data/20121227b/Auth/mash_commands'
  23.140 -    inputDir = '../data/20121227b/Auth/'
  23.141 -    predictionFile = '../tmp/auth.pred'
  23.142 -    logFile = '../tmp/auth.log'
  23.143 -    learnTheories = True
  23.144 -    theoryDefValPos = -7.5
  23.145 -    theoryDefValNeg = -15.0
  23.146 -    theoryPosWeight = 10.0
  23.147 -    NBDefaultPriorWeight = 20.0
  23.148 -    NBDefVal =- 15.0
  23.149 -    NBPosWeight = 10.0
  23.150 -    sineFeatures = True
  23.151 -    sineWeight =  0.5
  23.152 -
  23.153 -    task_queue = Queue()
  23.154 -    done_queue = Queue()
  23.155 -
  23.156 -    runs = 0
  23.157 -    for inputDir in ['../data/20121227b/Auth/']:
  23.158 -        problemId = inputDir.split('/')[-2]
  23.159 -        inputFile = os.path.join(inputDir,'mash_commands')
  23.160 -        predictionFile = os.path.join('../tmp/',problemId+'.pred')
  23.161 -        logFile = os.path.join('../tmp/',problemId+'.log')
  23.162 -        learnTheories = True
  23.163 -        theoryDefValPos = -7.5
  23.164 -        theoryDefValNeg = -15.0
  23.165 -        theoryPosWeight = 10.0
  23.166 -        
  23.167 -        bestAvgRecall100 = 0.0
  23.168 -        bestNBDefaultPriorWeight = 1.0
  23.169 -        bestNBDefVal = 1.0
  23.170 -        bestNBPosWeight = 1.0
  23.171 -        bestSineFeatures = False
  23.172 -        bestSineWeight = 0.0
  23.173 -        bestlearnTheories = True
  23.174 -        besttheoryDefValPos = 1.0 
  23.175 -        besttheoryDefValNeg = -15.0
  23.176 -        besttheoryPosWeight = 5.0
  23.177 -        for theoryPosWeight in theoryPosWeightRange:
  23.178 -            for theoryDefValNeg in theoryDefValNegRange:
  23.179 -                for NBDefaultPriorWeight in NBDefaultPriorWeightRange:
  23.180 -                    for NBDefVal in NBDefValRange:
  23.181 -                        for NBPosWeight in NBPosWeightRange:
  23.182 -                            for sineFeatures in sineFeaturesRange:
  23.183 -                                if sineFeatures:
  23.184 -                                    for sineWeight in sineWeightRange:  
  23.185 -                                        localLogFile = logFile+str(runs)                           
  23.186 -                                        task_queue.put((run_mash,(runs,inputDir, localLogFile, predictionFile, learnTheories, theoryDefValPos, theoryDefValNeg, theoryPosWeight, NBDefaultPriorWeight, NBDefVal, NBPosWeight, sineFeatures, sineWeight)))
  23.187 -                                        runs += 1
  23.188 -                                else:
  23.189 -                                    localLogFile = logFile+str(runs)
  23.190 -                                    task_queue.put((run_mash,(runs,inputDir, localLogFile, predictionFile, learnTheories, theoryDefValPos, theoryDefValNeg, theoryPosWeight, NBDefaultPriorWeight, NBDefVal, NBPosWeight, sineFeatures, sineWeight)))
  23.191 -                                    runs += 1
  23.192 -        # Start worker processes
  23.193 -        processes = []
  23.194 -        for _i in range(cores):
  23.195 -            process = Process(target=worker, args=(task_queue, done_queue))
  23.196 -            process.start()
  23.197 -            processes.append(process)
  23.198 -    
  23.199 -        for _i in range(runs):      
  23.200 -            learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
  23.201 -             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
  23.202 -             sineFeatures,sineWeight,\
  23.203 -             avgAuc,avgRecall100,avgTheoryPrecision,avgTheoryRecall100,avgTheoryRecall,avgTheoryPredictedPercent  = done_queue.get()
  23.204 -            bestlearnTheories,besttheoryDefValPos,besttheoryDefValNeg,besttheoryPosWeight,bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight = update_best_params(avgRecall100,bestAvgRecall100,\
  23.205 -                       bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight,\
  23.206 -                       bestlearnTheories,besttheoryDefValPos,besttheoryDefValNeg,besttheoryPosWeight,\
  23.207 -                       NBDefaultPriorWeight,NBDefVal,NBPosWeight,sineFeatures,sineWeight,\
  23.208 -                       learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight)              
  23.209 -        print 'bestAvgRecall100 %s bestNBDefaultPriorWeight %s bestNBDefVal %s bestNBPosWeight %s bestSineFeatures %s bestSineWeight %s',bestAvgRecall100,bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight
  23.210 -    
  23.211 -    """
  23.212 -    # Test 2
  23.213 -    #inputDir = '../data/20130118/Jinja'
  23.214 -    inputDir = '../data/notheory/Prob'
  23.215 -    inputFile = inputDir+'/mash_commands'
  23.216 -    #inputFile = inputDir+'/mash_prover_commands'
  23.217 -    
  23.218 -    #depFile = 'mash_prover_dependencies'
  23.219 -    depFile = 'mash_dependencies'    
  23.220 -    outputDir = '../tmp/'
  23.221 -    numberOfPredictions = 1024
  23.222 -    predictionFile = '../tmp/auth.pred'
  23.223 -    logFile = '../tmp/auth.log'
  23.224 -    learnTheories = False
  23.225 -    theoryDefValPos = -7.5
  23.226 -    theoryDefValNeg = -10.0
  23.227 -    theoryPosWeight = 2.0
  23.228 -    NBDefaultPriorWeight = 20.0
  23.229 -    NBDefVal =- 15.0
  23.230 -    NBPosWeight = 10.0
  23.231 -    sineFeatures = False
  23.232 -    sineWeight =  0.5    
  23.233 -    quiet = False
  23.234 -    print inputDir
  23.235 -    
  23.236 -    #predef = inputDir+'/mash_prover_suggestions'
  23.237 -    predef = inputDir+'/mash_suggestions'    
  23.238 -    print 'Mash Isar'
  23.239 -    run_mash(0,inputDir,logFile,predictionFile,predef,\
  23.240 -             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
  23.241 -             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
  23.242 -             sineFeatures,sineWeight,quiet=quiet)
  23.243 -
  23.244 -    #"""
  23.245 -    predef = inputDir+'/mesh_suggestions'
  23.246 -    #predef = inputDir+'/mesh_prover_suggestions'
  23.247 -    print 'Mesh Isar'
  23.248 -    run_mash(0,inputDir,logFile,predictionFile,predef,\
  23.249 -             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
  23.250 -             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
  23.251 -             sineFeatures,sineWeight,quiet=quiet)
  23.252 -    #"""
  23.253 -    predef = inputDir+'/mepo_suggestions'
  23.254 -    print 'Mepo Isar'
  23.255 -    run_mash(0,inputDir,logFile,predictionFile,predef,\
  23.256 -             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
  23.257 -             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
  23.258 -             sineFeatures,sineWeight,quiet=quiet)    
  23.259 -    
  23.260 -    """
  23.261 -    inputFile = inputDir+'/mash_prover_commands'
  23.262 -    depFile = 'mash_prover_dependencies'
  23.263 -    
  23.264 -    predef = inputDir+'/mash_prover_suggestions'    
  23.265 -    print 'Mash Prover Isar'
  23.266 -    run_mash(0,inputDir,logFile,predictionFile,predef,\
  23.267 -             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
  23.268 -             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
  23.269 -             sineFeatures,sineWeight,quiet=quiet)
  23.270 -
  23.271 -    predef = inputDir+'/mesh_prover_suggestions'
  23.272 -    print 'Mesh Prover Isar'
  23.273 -    run_mash(0,inputDir,logFile,predictionFile,predef,\
  23.274 -             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
  23.275 -             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
  23.276 -             sineFeatures,sineWeight,quiet=quiet)
  23.277 -
  23.278 -    predef = inputDir+'/mepo_suggestions'
  23.279 -    print 'Mepo Prover Isar'
  23.280 -    run_mash(0,inputDir,logFile,predictionFile,predef,\
  23.281 -             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
  23.282 -             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
  23.283 -             sineFeatures,sineWeight,quiet=quiet)
  23.284 -    #"""
  23.285 \ No newline at end of file
    24.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/theoryModels.py	Sat Jun 28 22:13:23 2014 +0200
    24.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.3 @@ -1,151 +0,0 @@
    24.4 -#     Title:      HOL/Tools/Sledgehammer/MaSh/src/theoryModels.py
    24.5 -#     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
    24.6 -#     Copyright   2012
    24.7 -#
    24.8 -# An updatable sparse naive Bayes classifier.
    24.9 -
   24.10 -'''
   24.11 -Created on Dec 26, 2012
   24.12 -
   24.13 -@author: Daniel Kuehlwein
   24.14 -'''
   24.15 -
   24.16 -from singleNaiveBayes import singleNBClassifier
   24.17 -from cPickle import load,dump
   24.18 -import sys,logging
   24.19 -
   24.20 -class TheoryModels(object):
   24.21 -    '''
   24.22 -    MetaClass for all the theory models.
   24.23 -    '''
   24.24 -
   24.25 -
   24.26 -    def __init__(self,defValPos = -7.5,defValNeg = -15.0,posWeight = 10.0):
   24.27 -        '''
   24.28 -        Constructor
   24.29 -        '''
   24.30 -        self.theoryModels = {}
   24.31 -        # Model Params
   24.32 -        self.defValPos = defValPos       
   24.33 -        self.defValNeg = defValNeg
   24.34 -        self.posWeight = posWeight
   24.35 -        self.theoryDict = {}
   24.36 -        self.accessibleTheories = set([])
   24.37 -        self.currentTheory = None
   24.38 -  
   24.39 -    def init(self,depFile,dicts):      
   24.40 -        logger = logging.getLogger('TheoryModels')
   24.41 -        IS = open(depFile,'r')
   24.42 -        for line in IS:
   24.43 -            line = line.split(':')
   24.44 -            name = line[0]
   24.45 -            theory = name.split('.')[0]
   24.46 -            # Name Id
   24.47 -            if not dicts.nameIdDict.has_key(name):
   24.48 -                logger.warning('%s is missing in nameIdDict. Aborting.',name)
   24.49 -                sys.exit(-1)
   24.50 -    
   24.51 -            nameId = dicts.nameIdDict[name]
   24.52 -            features = dicts.featureDict[nameId]
   24.53 -            if not self.theoryDict.has_key(theory):
   24.54 -                assert not theory == self.currentTheory
   24.55 -                if not self.currentTheory == None:
   24.56 -                    self.accessibleTheories.add(self.currentTheory)
   24.57 -                self.currentTheory = theory
   24.58 -                self.theoryDict[theory] = set([nameId])
   24.59 -                theoryModel = singleNBClassifier(self.defValPos,self.defValNeg,self.posWeight)
   24.60 -                self.theoryModels[theory] = theoryModel 
   24.61 -            else:
   24.62 -                self.theoryDict[theory] = self.theoryDict[theory].union([nameId])               
   24.63 -            
   24.64 -            # Find the actually used theories
   24.65 -            usedtheories = []    
   24.66 -            dependencies = line[1].split()
   24.67 -            if len(dependencies) == 0:
   24.68 -                continue
   24.69 -            for dep in dependencies:
   24.70 -                depId = dicts.nameIdDict[dep.strip()]
   24.71 -                deptheory = dep.split('.')[0]
   24.72 -                usedtheories.append(deptheory)
   24.73 -                if not self.theoryDict.has_key(deptheory):
   24.74 -                    self.theoryDict[deptheory] = set([depId])
   24.75 -                else:
   24.76 -                    self.theoryDict[deptheory] = self.theoryDict[deptheory].union([depId])                   
   24.77 -                        
   24.78 -            # Update theoryModels
   24.79 -            self.theoryModels[self.currentTheory].update(features,self.currentTheory in usedtheories)
   24.80 -            for a in self.accessibleTheories:                
   24.81 -                self.theoryModels[a].update(dicts.featureDict[nameId],a in usedtheories)
   24.82 -        IS.close()
   24.83 -    
   24.84 -    def overwrite(self,problemId,newDependencies,dicts):
   24.85 -        features = dicts.featureDict[problemId]
   24.86 -        unExpAccessibles = dicts.accessibleDict[problemId]
   24.87 -        accessibles = dicts.expand_accessibles(unExpAccessibles)
   24.88 -        accTheories = []
   24.89 -        for x in accessibles:
   24.90 -            xArt = (dicts.idNameDict[x]).split('.')[0]
   24.91 -            accTheories.append(xArt)    
   24.92 -        oldTheories = set([x.split('.')[0] for x in dicts.dependenciesDict[problemId]])
   24.93 -        newTheories = set([x.split('.')[0] for x in newDependencies])    
   24.94 -        for a in self.accTheories:                
   24.95 -            self.theoryModels[a].overwrite(features,a in oldTheories,a in newTheories) 
   24.96 -    
   24.97 -    def delete(self,problemId,features,dependencies,dicts):
   24.98 -        tmp = [dicts.idNameDict[x] for x in dependencies]
   24.99 -        usedTheories = set([x.split('.')[0] for x in tmp])   
  24.100 -        for a in self.accessibleTheories:        
  24.101 -            self.theoryModels[a].delete(features,a in usedTheories)  
  24.102 -    
  24.103 -    def update(self,problemId,features,dependencies,dicts): 
  24.104 -        # TODO: Implicit assumption that self.accessibleTheories contains all accessible theories!
  24.105 -        currentTheory = (dicts.idNameDict[problemId]).split('.')[0]       
  24.106 -        # Create new theory model, if there is a new theory 
  24.107 -        if not self.theoryDict.has_key(currentTheory):
  24.108 -            assert not currentTheory == self.currentTheory            
  24.109 -            if not currentTheory == None:
  24.110 -                self.theoryDict[currentTheory] = []
  24.111 -                self.currentTheory = currentTheory
  24.112 -                theoryModel = singleNBClassifier(self.defValPos,self.defValNeg,self.posWeight)
  24.113 -                self.theoryModels[currentTheory] = theoryModel
  24.114 -                self.accessibleTheories.add(self.currentTheory) 
  24.115 -        self.update_with_acc(problemId,features,dependencies,dicts,self.accessibleTheories)  
  24.116 -    
  24.117 -    def update_with_acc(self,problemId,features,dependencies,dicts,accessibleTheories):        
  24.118 -        # Find the actually used theories
  24.119 -        tmp = [dicts.idNameDict[x] for x in dependencies]
  24.120 -        usedTheories = set([x.split('.')[0] for x in tmp]) 
  24.121 -        if not len(usedTheories) == 0: 
  24.122 -            for a in accessibleTheories:                
  24.123 -                self.theoryModels[a].update(features,a in usedTheories)   
  24.124 -    
  24.125 -    def predict(self,features,accessibles,dicts):
  24.126 -        """
  24.127 -        Predicts the relevant theories. Returns the predicted theories and a list of all accessible premises in these theories.
  24.128 -        """         
  24.129 -        self.accessibleTheories = set([(dicts.idNameDict[x]).split('.')[0] for x in accessibles])
  24.130 -        
  24.131 -        # Predict Theories
  24.132 -        predictedTheories = [self.currentTheory]
  24.133 -        for a in self.accessibleTheories:
  24.134 -            if self.theoryModels[a].predict_sparse(features):
  24.135 -            #if theoryModels[a].predict(dicts.featureDict[nameId]):
  24.136 -                predictedTheories.append(a)
  24.137 -        predictedTheories = set(predictedTheories)
  24.138 -
  24.139 -        # Delete accessibles in unpredicted theories
  24.140 -        newAcc = []
  24.141 -        for x in accessibles:
  24.142 -            xArt = (dicts.idNameDict[x]).split('.')[0]
  24.143 -            if xArt in predictedTheories:
  24.144 -                newAcc.append(x)
  24.145 -        return predictedTheories,newAcc
  24.146 -        
  24.147 -    def save(self,fileName):
  24.148 -        outStream = open(fileName, 'wb')
  24.149 -        dump((self.currentTheory,self.accessibleTheories,self.theoryModels,self.theoryDict,self.defValPos,self.defValNeg,self.posWeight),outStream)
  24.150 -        outStream.close()
  24.151 -    def load(self,fileName):
  24.152 -        inStream = open(fileName, 'rb')
  24.153 -        self.currentTheory,self.accessibleTheories,self.theoryModels,self.theoryDict,self.defValPos,self.defValNeg,self.posWeight = load(inStream)
  24.154 -        inStream.close()
  24.155 \ No newline at end of file
    25.1 --- a/src/HOL/Tools/Sledgehammer/MaSh/src/theoryStats.py	Sat Jun 28 22:13:23 2014 +0200
    25.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.3 @@ -1,70 +0,0 @@
    25.4 -#     Title:      HOL/Tools/Sledgehammer/MaSh/src/theoryStats.py
    25.5 -#     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
    25.6 -#     Copyright   2012
    25.7 -#
    25.8 -# An updatable sparse naive Bayes classifier.
    25.9 -
   25.10 -'''
   25.11 -Created on Dec 26, 2012
   25.12 -
   25.13 -@author: Daniel Kuehlwein
   25.14 -'''
   25.15 -
   25.16 -from cPickle import load,dump
   25.17 -import logging,string
   25.18 -
   25.19 -class TheoryStatistics(object):
   25.20 -    '''
   25.21 -    Stores statistics for theory lvl predictions
   25.22 -    '''
   25.23 -
   25.24 -
   25.25 -    def __init__(self):
   25.26 -        '''
   25.27 -        Constructor
   25.28 -        '''
   25.29 -        self.logger = logging.getLogger('TheoryStatistics')
   25.30 -        self.count = 0
   25.31 -        self.precision = 0.0
   25.32 -        self.recall100 = 0
   25.33 -        self.recall = 0.0
   25.34 -        self.predicted = 0.0
   25.35 -        self.predictedPercent = 0.0
   25.36 -    
   25.37 -    def update(self,currentTheory,predictedTheories,usedTheories,nrAvailableTheories):
   25.38 -        self.count += 1
   25.39 -        allPredTheories = predictedTheories.union([currentTheory])
   25.40 -        if set(usedTheories).issubset(allPredTheories):
   25.41 -            self.recall100 += 1
   25.42 -        localPredicted = len(allPredTheories)
   25.43 -        self.predicted += localPredicted 
   25.44 -        localPredictedPercent = float(localPredicted)/nrAvailableTheories
   25.45 -        self.predictedPercent += localPredictedPercent 
   25.46 -        localPrec = float(len(set(usedTheories).intersection(allPredTheories))) / localPredicted
   25.47 -        self.precision += localPrec
   25.48 -        if len(set(usedTheories)) == 0:
   25.49 -            localRecall = 1.0
   25.50 -        else:
   25.51 -            localRecall = float(len(set(usedTheories).intersection(allPredTheories))) / len(set(usedTheories))
   25.52 -        self.recall += localRecall
   25.53 -        self.logger.info('Theory prediction results:')
   25.54 -        self.logger.info('Problem: %s \t Recall100: %s \t Precision: %s \t Recall: %s \t PredictedTeoriesPercent: %s PredictedTeories: %s',\
   25.55 -                         self.count,self.recall100,round(localPrec,2),round(localRecall,2),round(localPredictedPercent,2),localPredicted)
   25.56 -        
   25.57 -    def printAvg(self):
   25.58 -        self.logger.info('Average theory results:')
   25.59 -        self.logger.info('avgPrecision: %s \t avgRecall100: %s \t avgRecall: %s \t avgPredictedPercent: %s \t avgPredicted: %s', \
   25.60 -                         round(self.precision/self.count,2),\
   25.61 -                         round(float(self.recall100)/self.count,2),\
   25.62 -                         round(self.recall/self.count,2),\
   25.63 -                         round(self.predictedPercent /self.count,2),\
   25.64 -                         round(self.predicted /self.count,2))
   25.65 -        
   25.66 -    def save(self,fileName):
   25.67 -        oStream = open(fileName, 'wb')
   25.68 -        dump((self.count,self.precision,self.recall100,self.recall,self.predicted),oStream)
   25.69 -        oStream.close()
   25.70 -    def load(self,fileName):
   25.71 -        iStream = open(fileName, 'rb')
   25.72 -        self.count,self.precision,self.recall100,self.recall,self.predicted = load(iStream)
   25.73 -        iStream.close()
   25.74 \ No newline at end of file
    26.1 --- a/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Sat Jun 28 22:13:23 2014 +0200
    26.2 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Sun Jun 29 18:28:27 2014 +0200
    26.3 @@ -33,19 +33,16 @@
    26.4    val decode_str : string -> string
    26.5    val decode_strs : string -> string list
    26.6    val encode_features : (string * real) list -> string
    26.7 -  val extract_suggestions : string -> string * (string * real) list
    26.8  
    26.9    datatype mash_engine =
   26.10 -    MaSh_Py
   26.11 -  | MaSh_SML_kNN
   26.12 -  | MaSh_SML_kNN_Ext
   26.13 -  | MaSh_SML_NB
   26.14 -  | MaSh_SML_NB_Ext
   26.15 +    MaSh_kNN
   26.16 +  | MaSh_kNN_Ext
   26.17 +  | MaSh_NB
   26.18 +  | MaSh_NB_Ext
   26.19  
   26.20    val is_mash_enabled : unit -> bool
   26.21    val the_mash_engine : unit -> mash_engine
   26.22  
   26.23 -  val mash_unlearn : Proof.context -> params -> unit
   26.24    val nickname_of_thm : thm -> string
   26.25    val find_suggested_facts : Proof.context -> ('b * thm) list -> string list -> ('b * thm) list
   26.26    val mesh_facts : ('a * 'a -> bool) -> int -> (real * (('a * real) list * 'a list)) list -> 'a list
   26.27 @@ -69,18 +66,20 @@
   26.28      ('b * thm) list -> ('b * thm) list -> ('b * thm) list * ('b * thm) list
   26.29    val mash_suggested_facts : Proof.context -> theory -> params -> int -> term list -> term ->
   26.30      raw_fact list -> fact list * fact list
   26.31 +
   26.32 +  val mash_unlearn : unit -> unit
   26.33    val mash_learn_proof : Proof.context -> params -> term -> ('a * thm) list -> thm list -> unit
   26.34 -  val mash_learn_facts : Proof.context -> params -> string -> bool -> int -> bool -> Time.time ->
   26.35 +  val mash_learn_facts : Proof.context -> params -> string -> int -> bool -> Time.time ->
   26.36      raw_fact list -> string
   26.37    val mash_learn : Proof.context -> params -> fact_override -> thm list -> bool -> unit
   26.38  
   26.39 -  val mash_can_suggest_facts : Proof.context -> bool -> bool
   26.40 +  val mash_can_suggest_facts : Proof.context -> bool
   26.41    val generous_max_suggestions : int -> int
   26.42    val mepo_weight : real
   26.43    val mash_weight : real
   26.44    val relevant_facts : Proof.context -> params -> string -> int -> fact_override -> term list ->
   26.45      term -> raw_fact list -> (string * fact list) list
   26.46 -  val kill_learners : Proof.context -> params -> unit
   26.47 +  val kill_learners : unit -> unit
   26.48    val running_learners : unit -> unit
   26.49  end;
   26.50  
   26.51 @@ -140,84 +139,28 @@
   26.52    end
   26.53  
   26.54  datatype mash_engine =
   26.55 -  MaSh_Py
   26.56 -| MaSh_SML_kNN
   26.57 -| MaSh_SML_kNN_Ext
   26.58 -| MaSh_SML_NB
   26.59 -| MaSh_SML_NB_Ext
   26.60 +  MaSh_kNN
   26.61 +| MaSh_kNN_Ext
   26.62 +| MaSh_NB
   26.63 +| MaSh_NB_Ext
   26.64  
   26.65  fun mash_engine () =
   26.66    let val flag1 = Options.default_string @{system_option MaSh} in
   26.67      (case if flag1 <> "none" (* default *) then flag1 else getenv "MASH" of
   26.68 -      "yes" => SOME MaSh_SML_NB
   26.69 -    | "py" => SOME MaSh_Py
   26.70 -    | "sml" => SOME MaSh_SML_NB
   26.71 -    | "sml_knn" => SOME MaSh_SML_kNN
   26.72 -    | "sml_knn_ext" => SOME MaSh_SML_kNN_Ext
   26.73 -    | "sml_nb" => SOME MaSh_SML_NB
   26.74 -    | "sml_nb_ext" => SOME MaSh_SML_NB_Ext
   26.75 +      "yes" => SOME MaSh_NB
   26.76 +    | "sml" => SOME MaSh_NB
   26.77 +    | "knn" => SOME MaSh_kNN
   26.78 +    | "knn_ext" => SOME MaSh_kNN_Ext
   26.79 +    | "nb" => SOME MaSh_NB
   26.80 +    | "nb_ext" => SOME MaSh_NB_Ext
   26.81      | _ => NONE)
   26.82    end
   26.83  
   26.84  val is_mash_enabled = is_some o mash_engine
   26.85 -val the_mash_engine = the_default MaSh_SML_NB o mash_engine
   26.86 +val the_mash_engine = the_default MaSh_NB o mash_engine
   26.87  
   26.88  
   26.89 -(*** Low-level communication with the Python version of MaSh ***)
   26.90 -
   26.91 -val save_models_arg = "--saveModels"
   26.92 -val shutdown_server_arg = "--shutdownServer"
   26.93 -
   26.94 -fun wipe_out_file file = ignore (try (File.rm o Path.explode) file)
   26.95 -
   26.96 -fun write_file banner (xs, f) path =
   26.97 -  (case banner of SOME s => File.write path s | NONE => ();
   26.98 -   xs |> chunk_list 500 |> List.app (File.append path o implode o map f))
   26.99 -  handle IO.Io _ => ()
  26.100 -
  26.101 -fun run_mash_tool ctxt overlord extra_args background write_cmds read_suggs =
  26.102 -  let
  26.103 -    val (temp_dir, serial) =
  26.104 -      if overlord then (getenv "ISABELLE_HOME_USER", "")
  26.105 -      else (getenv "ISABELLE_TMP", serial_string ())
  26.106 -    val log_file = temp_dir ^ "/mash_log" ^ serial
  26.107 -    val err_file = temp_dir ^ "/mash_err" ^ serial
  26.108 -    val sugg_file = temp_dir ^ "/mash_suggs" ^ serial
  26.109 -    val sugg_path = Path.explode sugg_file
  26.110 -    val cmd_file = temp_dir ^ "/mash_commands" ^ serial
  26.111 -    val cmd_path = Path.explode cmd_file
  26.112 -    val model_dir = File.shell_path (mash_state_dir ())
  26.113 -
  26.114 -    val command =
  26.115 -      "cd \"$ISABELLE_SLEDGEHAMMER_MASH\"/src; \
  26.116 -      \PYTHONDONTWRITEBYTECODE=y ./mash.py\
  26.117 -      \ --quiet\
  26.118 -      \ --port=$MASH_PORT\
  26.119 -      \ --outputDir " ^ model_dir ^
  26.120 -      " --modelFile=" ^ model_dir ^ "/model.pickle\
  26.121 -      \ --dictsFile=" ^ model_dir ^ "/dict.pickle\
  26.122 -      \ --log " ^ log_file ^
  26.123 -      " --inputFile " ^ cmd_file ^
  26.124 -      " --predictions " ^ sugg_file ^
  26.125 -      (if extra_args = [] then "" else " " ^ space_implode " " extra_args) ^ " >& " ^ err_file ^
  26.126 -      (if background then " &" else "")
  26.127 -
  26.128 -    fun run_on () =
  26.129 -      (Isabelle_System.bash command
  26.130 -       |> tap (fn _ =>
  26.131 -         (case try File.read (Path.explode err_file) |> the_default "" of
  26.132 -           "" => trace_msg ctxt (K "Done")
  26.133 -         | s => warning ("MaSh error: " ^ elide_string 1000 s)));
  26.134 -       read_suggs (fn () => try File.read_lines sugg_path |> these))
  26.135 -
  26.136 -    fun clean_up () =
  26.137 -      if overlord then () else List.app wipe_out_file [err_file, sugg_file, cmd_file]
  26.138 -  in
  26.139 -    write_file (SOME "") ([], K "") sugg_path;
  26.140 -    write_file (SOME "") write_cmds cmd_path;
  26.141 -    trace_msg ctxt (fn () => "Running " ^ command);
  26.142 -    with_cleanup clean_up run_on ()
  26.143 -  end
  26.144 +(*** Maintenance of the persistent, string-based state ***)
  26.145  
  26.146  fun meta_char c =
  26.147    if Char.isAlphaNum c orelse c = #"_" orelse c = #"." orelse c = #"(" orelse c = #")" orelse
  26.148 @@ -246,70 +189,10 @@
  26.149  
  26.150  val encode_features = map encode_feature #> space_implode " "
  26.151  
  26.152 -fun str_of_learn (name, parents, feats, deps) =
  26.153 -  "! " ^ encode_str name ^ ": " ^ encode_strs parents ^ "; " ^ encode_strs feats ^ "; " ^
  26.154 -  encode_strs deps ^ "\n"
  26.155 -
  26.156 -fun str_of_relearn (name, deps) = "p " ^ encode_str name ^ ": " ^ encode_strs deps ^ "\n"
  26.157 -
  26.158 -fun str_of_query max_suggs (learns, parents, feats) =
  26.159 -  implode (map str_of_learn learns) ^
  26.160 -  "? " ^ string_of_int max_suggs ^ " # " ^ encode_strs parents ^ "; " ^ encode_features feats ^ "\n"
  26.161 -
  26.162 -(* The suggested weights do not make much sense. *)
  26.163 -fun extract_suggestion sugg =
  26.164 -  (case space_explode "=" sugg of
  26.165 -    [name, weight] => SOME (decode_str name, Real.fromString weight |> the_default 1.0)
  26.166 -  | [name] => SOME (decode_str name, 1.0)
  26.167 -  | _ => NONE)
  26.168 -
  26.169 -fun extract_suggestions line =
  26.170 -  (case space_explode ":" line of
  26.171 -    [goal, suggs] => (decode_str goal, map_filter extract_suggestion (space_explode " " suggs))
  26.172 -  | _ => ("", []))
  26.173 -
  26.174 -structure MaSh_Py =
  26.175 -struct
  26.176 -
  26.177 -fun shutdown ctxt overlord =
  26.178 -  (trace_msg ctxt (K "MaSh_Py shutdown");
  26.179 -   run_mash_tool ctxt overlord [shutdown_server_arg] false ([], K "") (K ()))
  26.180  
  26.181 -fun save ctxt overlord =
  26.182 -  (trace_msg ctxt (K "MaSh_Py save");
  26.183 -   run_mash_tool ctxt overlord [save_models_arg] true ([], K "") (K ()))
  26.184 -
  26.185 -fun unlearn ctxt overlord =
  26.186 -  (trace_msg ctxt (K "MaSh_Py unlearn");
  26.187 -   shutdown ctxt overlord;
  26.188 -   wipe_out_mash_state_dir ())
  26.189 -
  26.190 -fun learn _ _ _ [] = ()
  26.191 -  | learn ctxt overlord save learns =
  26.192 -    (trace_msg ctxt (fn () =>
  26.193 -       "MaSh_Py learn {" ^ elide_string 1000 (space_implode " " (map #1 learns)) ^ "}");
  26.194 -     run_mash_tool ctxt overlord ([] |> save ? cons save_models_arg) false (learns, str_of_learn)
  26.195 -       (K ()))
  26.196 +(*** Isabelle-agnostic machine learning ***)
  26.197  
  26.198 -fun relearn _ _ _ [] = ()
  26.199 -  | relearn ctxt overlord save relearns =
  26.200 -    (trace_msg ctxt (fn () => "MaSh_Py relearn " ^
  26.201 -       elide_string 1000 (space_implode " " (map #1 relearns)));
  26.202 -     run_mash_tool ctxt overlord ([] |> save ? cons save_models_arg) false
  26.203 -       (relearns, str_of_relearn) (K ()))
  26.204 -
  26.205 -fun query ctxt overlord max_suggs (query as (_, _, feats)) =
  26.206 -  (trace_msg ctxt (fn () => "MaSh_Py query " ^ encode_features feats);
  26.207 -   run_mash_tool ctxt overlord [] false ([query], str_of_query max_suggs) (fn suggs =>
  26.208 -     (case suggs () of [] => [] | suggs => snd (extract_suggestions (List.last suggs))))
  26.209 -   handle List.Empty => [])
  26.210 -
  26.211 -end;
  26.212 -
  26.213 -
  26.214 -(*** Standard ML version of MaSh ***)
  26.215 -
  26.216 -structure MaSh_SML =
  26.217 +structure MaSh =
  26.218  struct
  26.219  
  26.220  fun heap cmp bnd al a =
  26.221 @@ -542,25 +425,6 @@
  26.222    end
  26.223  
  26.224  (* experimental *)
  26.225 -fun naive_bayes_py ctxt overlord num_facts depss featss max_suggs goal_feats =
  26.226 -  let
  26.227 -    fun name_of_fact j = "f" ^ string_of_int j
  26.228 -    fun fact_of_name s = the (Int.fromString (unprefix "f" s))
  26.229 -    fun name_of_feature j = "F" ^ string_of_int j
  26.230 -    fun parents_of j = if j = 0 then [] else [name_of_fact (j - 1)]
  26.231 -
  26.232 -    val learns = map (fn j => (name_of_fact j, parents_of j,
  26.233 -      map name_of_feature (Vector.sub (featss, j)),
  26.234 -      map name_of_fact (Vector.sub (depss, j)))) (0 upto num_facts - 1)
  26.235 -    val parents' = parents_of num_facts
  26.236 -  in
  26.237 -    MaSh_Py.unlearn ctxt overlord;
  26.238 -    OS.Process.sleep (seconds 2.0); (* hack *)
  26.239 -    MaSh_Py.query ctxt overlord max_suggs (learns, parents', goal_feats)
  26.240 -    |> map (apfst fact_of_name)
  26.241 -  end
  26.242 -
  26.243 -(* experimental *)
  26.244  fun external_tool tool max_suggs learns goal_feats =
  26.245    let
  26.246      val ser = string_of_int (serial ()) (* poor person's attempt at thread-safety *)
  26.247 @@ -600,17 +464,17 @@
  26.248  val naive_bayes_ext = external_tool "predict/nbayes"
  26.249  
  26.250  fun query_external ctxt engine max_suggs learns goal_feats =
  26.251 -  (trace_msg ctxt (fn () => "MaSh_SML query external " ^ encode_features goal_feats);
  26.252 +  (trace_msg ctxt (fn () => "MaSh query external " ^ encode_features goal_feats);
  26.253     (case engine of
  26.254 -     MaSh_SML_kNN_Ext => k_nearest_neighbors_ext max_suggs learns goal_feats
  26.255 -   | MaSh_SML_NB_Ext => naive_bayes_ext max_suggs learns goal_feats))
  26.256 +     MaSh_kNN_Ext => k_nearest_neighbors_ext max_suggs learns goal_feats
  26.257 +   | MaSh_NB_Ext => naive_bayes_ext max_suggs learns goal_feats))
  26.258  
  26.259  fun query_internal ctxt engine num_facts num_feats (fact_names, featss, depss)
  26.260      (freqs as (_, _, dffreq)) visible_facts max_suggs goal_feats int_goal_feats =
  26.261 -  (trace_msg ctxt (fn () => "MaSh_SML query internal " ^ encode_features goal_feats ^ " from {" ^
  26.262 +  (trace_msg ctxt (fn () => "MaSh query internal " ^ encode_features goal_feats ^ " from {" ^
  26.263       elide_string 1000 (space_implode " " (Vector.foldr (op ::) [] fact_names)) ^ "}");
  26.264     (case engine of
  26.265 -     MaSh_SML_kNN =>
  26.266 +     MaSh_kNN =>
  26.267       let
  26.268         val feat_facts = Array.array (num_feats, [])
  26.269         val _ =
  26.270 @@ -620,7 +484,7 @@
  26.271       in
  26.272         k_nearest_neighbors dffreq num_facts depss feat_facts max_suggs visible_facts int_goal_feats
  26.273       end
  26.274 -   | MaSh_SML_NB => naive_bayes freqs num_facts max_suggs visible_facts int_goal_feats)
  26.275 +   | MaSh_NB => naive_bayes freqs num_facts max_suggs visible_facts int_goal_feats)
  26.276     |> map (curry Vector.sub fact_names o fst))
  26.277  
  26.278  end;
  26.279 @@ -698,7 +562,7 @@
  26.280        Vector.fromList (map (map_filter (Symtab.lookup fact_tab) o #3) learns)]
  26.281    in
  26.282      ((fact_names, featss, depss),
  26.283 -     MaSh_SML.learn_facts freqs0 num_facts0 num_facts num_feats depss featss)
  26.284 +     MaSh.learn_facts freqs0 num_facts0 num_facts num_feats depss featss)
  26.285    end
  26.286  
  26.287  fun reorder_learns (num_facts, fact_tab) learns =
  26.288 @@ -734,7 +598,7 @@
  26.289      | _ => NONE)
  26.290    | _ => NONE)
  26.291  
  26.292 -fun load_state ctxt overlord (time_state as (memory_time, _)) =
  26.293 +fun load_state ctxt (time_state as (memory_time, _)) =
  26.294    let val path = mash_state_file () in
  26.295      (case try OS.FileSys.modTime (Path.implode path) of
  26.296        NONE => time_state
  26.297 @@ -758,11 +622,7 @@
  26.298                   EQUAL =>
  26.299                   try_graph ctxt "loading state" empty_G_etc
  26.300                     (fn () => fold extract_line_and_add_node node_lines empty_G_etc)
  26.301 -               | LESS =>
  26.302 -                 (* cannot parse old file *)
  26.303 -                 (if the_mash_engine () = MaSh_Py then MaSh_Py.unlearn ctxt overlord
  26.304 -                  else wipe_out_mash_state_dir ();
  26.305 -                  empty_G_etc)
  26.306 +               | LESS => (wipe_out_mash_state_dir (); empty_G_etc) (* cannot parse old file *)
  26.307                 | GREATER => raise FILE_VERSION_TOO_NEW ())
  26.308  
  26.309               val (ffds, freqs) =
  26.310 @@ -794,7 +654,9 @@
  26.311            SOME names => (NONE, fold (append_entry o Graph.get_entry access_G) names [])
  26.312          | NONE => (SOME (version ^ "\n"), Graph.fold append_entry access_G []))
  26.313      in
  26.314 -      write_file banner (entries, str_of_entry) path;
  26.315 +      (case banner of SOME s => File.write path s | NONE => ();
  26.316 +       entries |> chunk_list 500 |> List.app (File.append path o implode o map str_of_entry))
  26.317 +      handle IO.Io _ => ();
  26.318        trace_msg ctxt (fn () =>
  26.319          "Saved fact graph (" ^ graph_info access_G ^
  26.320          (case dirty_facts of
  26.321 @@ -808,25 +670,19 @@
  26.322  
  26.323  in
  26.324  
  26.325 -fun map_state ctxt overlord f =
  26.326 -  Synchronized.change global_state (load_state ctxt overlord ##> f #> save_state ctxt)
  26.327 +fun map_state ctxt f =
  26.328 +  Synchronized.change global_state (load_state ctxt ##> f #> save_state ctxt)
  26.329    handle FILE_VERSION_TOO_NEW () => ()
  26.330  
  26.331 -fun peek_state ctxt overlord f =
  26.332 -  Synchronized.change_result global_state (perhaps (try (load_state ctxt overlord)) #> `snd #>> f)
  26.333 +fun peek_state ctxt =
  26.334 +  Synchronized.change_result global_state (perhaps (try (load_state ctxt)) #> `snd)
  26.335  
  26.336 -fun clear_state ctxt overlord =
  26.337 -  (* "MaSh_Py.unlearn" also removes the state file *)
  26.338 +fun clear_state () =
  26.339    Synchronized.change global_state (fn _ =>
  26.340 -    (if the_mash_engine () = MaSh_Py then MaSh_Py.unlearn ctxt overlord
  26.341 -     else wipe_out_mash_state_dir ();
  26.342 -     (Time.zeroTime, empty_state)))
  26.343 +    (wipe_out_mash_state_dir (); (Time.zeroTime, empty_state)))
  26.344  
  26.345  end
  26.346  
  26.347 -fun mash_unlearn ctxt ({overlord, ...} : params) =
  26.348 -  (clear_state ctxt overlord; Output.urgent_message "Reset MaSh.")
  26.349 -
  26.350  
  26.351  (*** Isabelle helpers ***)
  26.352  
  26.353 @@ -1284,7 +1140,7 @@
  26.354      (mesh_facts (eq_snd (gen_eq_thm ctxt)) max_facts mess, unknown)
  26.355    end
  26.356  
  26.357 -fun mash_suggested_facts ctxt thy ({debug, overlord, ...} : params) max_suggs hyp_ts concl_t facts =
  26.358 +fun mash_suggested_facts ctxt thy ({debug, ...} : params) max_suggs hyp_ts concl_t facts =
  26.359    let
  26.360      val thy_name = Context.theory_name thy
  26.361      val engine = the_mash_engine ()
  26.362 @@ -1323,50 +1179,40 @@
  26.363          (parents, feats)
  26.364        end
  26.365  
  26.366 -    val ((access_G, ((num_facts, fact_tab), (num_feats, feat_tab)), ffds, freqs), py_suggs) =
  26.367 -      peek_state ctxt overlord (fn {access_G, xtabs, ffds, freqs, ...} =>
  26.368 -        ((access_G, xtabs, ffds, freqs),
  26.369 -         if Graph.is_empty access_G then
  26.370 -           (trace_msg ctxt (K "Nothing has been learned yet"); [])
  26.371 -         else if engine = MaSh_Py then
  26.372 -           let val (parents, feats) = query_args access_G in
  26.373 -             MaSh_Py.query ctxt overlord max_suggs ([], parents, feats)
  26.374 -             |> map fst
  26.375 -           end
  26.376 -         else
  26.377 -           []))
  26.378 +    val {access_G, xtabs = ((num_facts, fact_tab), (num_feats, feat_tab)), ffds, freqs, ...} =
  26.379 +      peek_state ctxt
  26.380  
  26.381 -    val sml_suggs =
  26.382 -      if engine = MaSh_Py then
  26.383 -        []
  26.384 -      else
  26.385 -        let
  26.386 -          val (parents, goal_feats) = query_args access_G
  26.387 -          val visible_facts = map_filter (Symtab.lookup fact_tab) (Graph.all_preds access_G parents)
  26.388 -        in
  26.389 -          if engine = MaSh_SML_kNN_Ext orelse engine = MaSh_SML_NB_Ext then
  26.390 -            let
  26.391 -              val learns =
  26.392 -                Graph.schedule (fn _ => fn (fact, (_, feats, deps)) => (fact, feats, deps)) access_G
  26.393 -            in
  26.394 -              MaSh_SML.query_external ctxt engine max_suggs learns goal_feats
  26.395 -            end
  26.396 -          else
  26.397 -            let
  26.398 -              val int_goal_feats =
  26.399 -                map_filter (fn (s, w) => Option.map (rpair w) (Symtab.lookup feat_tab s)) goal_feats
  26.400 -            in
  26.401 -              MaSh_SML.query_internal ctxt engine num_facts num_feats ffds freqs visible_facts
  26.402 -                max_suggs goal_feats int_goal_feats
  26.403 -            end
  26.404 -        end
  26.405 +    val suggs =
  26.406 +      let
  26.407 +        val (parents, goal_feats) = query_args access_G
  26.408 +        val visible_facts = map_filter (Symtab.lookup fact_tab) (Graph.all_preds access_G parents)
  26.409 +      in
  26.410 +        if engine = MaSh_kNN_Ext orelse engine = MaSh_NB_Ext then
  26.411 +          let
  26.412 +            val learns =
  26.413 +              Graph.schedule (fn _ => fn (fact, (_, feats, deps)) => (fact, feats, deps)) access_G
  26.414 +          in
  26.415 +            MaSh.query_external ctxt engine max_suggs learns goal_feats
  26.416 +          end
  26.417 +        else
  26.418 +          let
  26.419 +            val int_goal_feats =
  26.420 +              map_filter (fn (s, w) => Option.map (rpair w) (Symtab.lookup feat_tab s)) goal_feats
  26.421 +          in
  26.422 +            MaSh.query_internal ctxt engine num_facts num_feats ffds freqs visible_facts max_suggs
  26.423 +              goal_feats int_goal_feats
  26.424 +          end
  26.425 +      end
  26.426  
  26.427      val unknown = filter_out (is_fact_in_graph access_G o snd) facts
  26.428    in
  26.429 -    find_mash_suggestions ctxt max_suggs (py_suggs @ sml_suggs) facts chained unknown
  26.430 +    find_mash_suggestions ctxt max_suggs suggs facts chained unknown
  26.431      |> pairself (map fact_of_raw_fact)
  26.432    end
  26.433  
  26.434 +fun mash_unlearn () =
  26.435 +  (clear_state (); Output.urgent_message "Reset MaSh.")
  26.436 +
  26.437  fun learn_wrt_access_graph ctxt (name, parents, feats, deps) (access_G, (fact_xtab, feat_xtab)) =
  26.438    let
  26.439      fun maybe_learn_from from (accum as (parents, access_G)) =
  26.440 @@ -1413,35 +1259,30 @@
  26.441  fun learned_proof_name () =
  26.442    Date.fmt ".%Y%m%d.%H%M%S." (Date.fromTimeLocal (Time.now ())) ^ serial_string ()
  26.443  
  26.444 -fun mash_learn_proof ctxt ({overlord, timeout, ...} : params) t facts used_ths =
  26.445 +fun mash_learn_proof ctxt ({timeout, ...} : params) t facts used_ths =
  26.446    if not (null used_ths) andalso is_mash_enabled () then
  26.447      launch_thread timeout (fn () =>
  26.448        let
  26.449          val thy = Proof_Context.theory_of ctxt
  26.450          val feats = features_of ctxt thy (Local, General) [t]
  26.451        in
  26.452 -        map_state ctxt overlord
  26.453 -          (fn state as {access_G, xtabs as ((num_facts0, _), _), ffds, freqs, dirty_facts} =>
  26.454 +        map_state ctxt
  26.455 +          (fn {access_G, xtabs as ((num_facts0, _), _), ffds, freqs, dirty_facts} =>
  26.456               let
  26.457                 val parents = maximal_wrt_access_graph access_G facts
  26.458                 val deps = used_ths
  26.459                   |> filter (is_fact_in_graph access_G)
  26.460                   |> map nickname_of_thm
  26.461 +
  26.462 +               val name = learned_proof_name ()
  26.463 +               val (access_G', xtabs', rev_learns) =
  26.464 +                 add_node Automatic_Proof name parents feats deps (access_G, xtabs, [])
  26.465 +
  26.466 +               val (ffds', freqs') =
  26.467 +                 recompute_ffds_freqs_from_learns (rev rev_learns) xtabs' num_facts0 ffds freqs
  26.468               in
  26.469 -               if the_mash_engine () = MaSh_Py then
  26.470 -                 (MaSh_Py.learn ctxt overlord true [("", parents, feats, deps)]; state)
  26.471 -               else
  26.472 -                 let
  26.473 -                   val name = learned_proof_name ()
  26.474 -                   val (access_G', xtabs', rev_learns) =
  26.475 -                     add_node Automatic_Proof name parents feats deps (access_G, xtabs, [])
  26.476 -
  26.477 -                   val (ffds', freqs') =
  26.478 -                     recompute_ffds_freqs_from_learns (rev rev_learns) xtabs' num_facts0 ffds freqs
  26.479 -                 in
  26.480 -                   {access_G = access_G', xtabs = xtabs', ffds = ffds', freqs = freqs',
  26.481 -                    dirty_facts = Option.map (cons name) dirty_facts}
  26.482 -                 end
  26.483 +               {access_G = access_G', xtabs = xtabs', ffds = ffds', freqs = freqs',
  26.484 +                dirty_facts = Option.map (cons name) dirty_facts}
  26.485               end);
  26.486          (true, "")
  26.487        end)
  26.488 @@ -1453,14 +1294,13 @@
  26.489  val commit_timeout = seconds 30.0
  26.490  
  26.491  (* The timeout is understood in a very relaxed fashion. *)
  26.492 -fun mash_learn_facts ctxt (params as {debug, verbose, overlord, ...}) prover save auto_level
  26.493 -    run_prover learn_timeout facts =
  26.494 +fun mash_learn_facts ctxt (params as {debug, verbose, ...}) prover auto_level run_prover
  26.495 +    learn_timeout facts =
  26.496    let
  26.497      val timer = Timer.startRealTimer ()
  26.498      fun next_commit_time () = Time.+ (Timer.checkRealTimer timer, commit_timeout)
  26.499  
  26.500 -    val engine = the_mash_engine ()
  26.501 -    val {access_G, ...} = peek_state ctxt overlord I
  26.502 +    val {access_G, ...} = peek_state ctxt
  26.503      val is_in_access_G = is_fact_in_graph access_G o snd
  26.504      val no_new_facts = forall is_in_access_G facts
  26.505    in
  26.506 @@ -1511,18 +1351,13 @@
  26.507                  else
  26.508                    recompute_ffds_freqs_from_access_G access_G xtabs
  26.509              in
  26.510 -              if engine = MaSh_Py then
  26.511 -                (MaSh_Py.learn ctxt overlord (save andalso null relearns) learns;
  26.512 -                 MaSh_Py.relearn ctxt overlord save relearns)
  26.513 -              else
  26.514 -                ();
  26.515                {access_G = access_G, xtabs = xtabs, ffds = ffds', freqs = freqs',
  26.516                 dirty_facts = dirty_facts}
  26.517              end
  26.518  
  26.519          fun commit last learns relearns flops =
  26.520            (if debug andalso auto_level = 0 then Output.urgent_message "Committing..." else ();
  26.521 -           map_state ctxt overlord (do_commit (rev learns) relearns flops);
  26.522 +           map_state ctxt (do_commit (rev learns) relearns flops);
  26.523             if not last andalso auto_level = 0 then
  26.524               let val num_proofs = length learns + length relearns in
  26.525                 Output.urgent_message ("Learned " ^ string_of_int num_proofs ^ " " ^
  26.526 @@ -1633,7 +1468,7 @@
  26.527      val prover = hd provers
  26.528  
  26.529      fun learn auto_level run_prover =
  26.530 -      mash_learn_facts ctxt params prover true auto_level run_prover one_year facts
  26.531 +      mash_learn_facts ctxt params prover auto_level run_prover one_year facts
  26.532        |> Output.urgent_message
  26.533    in
  26.534      if run_prover then
  26.535 @@ -1650,8 +1485,8 @@
  26.536         learn 0 false)
  26.537    end
  26.538  
  26.539 -fun mash_can_suggest_facts ctxt overlord =
  26.540 -  not (Graph.is_empty (#access_G (peek_state ctxt overlord I)))
  26.541 +fun mash_can_suggest_facts ctxt =
  26.542 +  not (Graph.is_empty (#access_G (peek_state ctxt)))
  26.543  
  26.544  (* Generate more suggestions than requested, because some might be thrown out later for various
  26.545     reasons (e.g., duplicates). *)
  26.546 @@ -1666,7 +1501,7 @@
  26.547     and Try. *)
  26.548  val min_secs_for_learning = 15
  26.549  
  26.550 -fun relevant_facts ctxt (params as {verbose, overlord, learn, fact_filter, timeout, ...}) prover
  26.551 +fun relevant_facts ctxt (params as {verbose, learn, fact_filter, timeout, ...}) prover
  26.552      max_facts ({add, only, ...} : fact_override) hyp_ts concl_t facts =
  26.553    if not (subset (op =) (the_list fact_filter, fact_filters)) then
  26.554      error ("Unknown fact filter: " ^ quote (the fact_filter) ^ ".")
  26.555 @@ -1689,42 +1524,41 @@
  26.556               else
  26.557                 ());
  26.558              launch_thread timeout
  26.559 -              (fn () => (true, mash_learn_facts ctxt params prover true 2 false timeout facts))
  26.560 +              (fn () => (true, mash_learn_facts ctxt params prover 2 false timeout facts))
  26.561            end
  26.562          else
  26.563            ()
  26.564  
  26.565        fun maybe_learn () =
  26.566 -        if is_mash_enabled () andalso learn then
  26.567 +        if learn then
  26.568            let
  26.569 -            val {access_G, xtabs = ((num_facts0, _), _), ...} = peek_state ctxt overlord I
  26.570 +            val {access_G, xtabs = ((num_facts0, _), _), ...} = peek_state ctxt
  26.571              val is_in_access_G = is_fact_in_graph access_G o snd
  26.572              val min_num_facts_to_learn = length facts - num_facts0
  26.573            in
  26.574              if min_num_facts_to_learn <= max_facts_to_learn_before_query then
  26.575                (case length (filter_out is_in_access_G facts) of
  26.576 -                0 => false
  26.577 +                0 => ()
  26.578                | num_facts_to_learn =>
  26.579                  if num_facts_to_learn <= max_facts_to_learn_before_query then
  26.580 -                  (mash_learn_facts ctxt params prover false 2 false timeout facts
  26.581 -                   |> (fn "" => () | s => Output.urgent_message (MaShN ^ ": " ^ s));
  26.582 -                   true)
  26.583 +                  mash_learn_facts ctxt params prover 2 false timeout facts
  26.584 +                  |> (fn "" => () | s => Output.urgent_message (MaShN ^ ": " ^ s))
  26.585                  else
  26.586 -                  (maybe_launch_thread num_facts_to_learn; false))
  26.587 +                  maybe_launch_thread num_facts_to_learn)
  26.588              else
  26.589 -              (maybe_launch_thread min_num_facts_to_learn; false)
  26.590 +              maybe_launch_thread min_num_facts_to_learn
  26.591            end
  26.592          else
  26.593 -          false
  26.594 +          ()
  26.595  
  26.596 -      val (save, effective_fact_filter) =
  26.597 +      val effective_fact_filter =
  26.598          (case fact_filter of
  26.599 -          SOME ff => (ff <> mepoN andalso maybe_learn (), ff)
  26.600 +          SOME ff => ff
  26.601          | NONE =>
  26.602            if is_mash_enabled () then
  26.603 -            (maybe_learn (), if mash_can_suggest_facts ctxt overlord then meshN else mepoN)
  26.604 +            (maybe_learn (); if mash_can_suggest_facts ctxt then meshN else mepoN)
  26.605            else
  26.606 -            (false, mepoN))
  26.607 +            mepoN)
  26.608  
  26.609        val unique_facts = drop_duplicate_facts facts
  26.610        val add_ths = Attrib.eval_thms ctxt add
  26.611 @@ -1754,7 +1588,6 @@
  26.612             |> Par_List.map (apsnd (fn f => f ()))
  26.613        val mesh = mesh_facts (eq_snd (gen_eq_thm ctxt)) max_facts mess |> add_and_take
  26.614      in
  26.615 -      if the_mash_engine () = MaSh_Py andalso save then MaSh_Py.save ctxt overlord else ();
  26.616        (case (fact_filter, mess) of
  26.617          (NONE, [(_, (mepo, _)), (_, (mash, _))]) =>
  26.618          [(meshN, mesh), (mepoN, mepo |> map fst |> add_and_take),
  26.619 @@ -1762,9 +1595,7 @@
  26.620        | _ => [(effective_fact_filter, mesh)])
  26.621      end
  26.622  
  26.623 -fun kill_learners ctxt ({overlord, ...} : params) =
  26.624 -  (Async_Manager.kill_threads MaShN "learner";
  26.625 -   if the_mash_engine () = MaSh_Py then MaSh_Py.shutdown ctxt overlord else ())
  26.626 +fun kill_learners () = Async_Manager.kill_threads MaShN "learner"
  26.627  
  26.628  fun running_learners () = Async_Manager.running_threads MaShN "learner"
  26.629  
    27.1 --- a/src/HOL/Tools/etc/options	Sat Jun 28 22:13:23 2014 +0200
    27.2 +++ b/src/HOL/Tools/etc/options	Sun Jun 29 18:28:27 2014 +0200
    27.3 @@ -36,4 +36,4 @@
    27.4    -- "status of Z3 activation for non-commercial use (yes, no, unknown)"
    27.5  
    27.6  public option MaSh : string = "sml"
    27.7 -  -- "machine learning engine to use by Sledgehammer (sml, sml_knn, sml_nb, py, none)"
    27.8 +  -- "machine learning engine to use by Sledgehammer (sml, nb, knn, none)"