src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py
changeset 53555 12251bc889f1
parent 53100 1133b9e83f09
child 54056 8298976acb54
equal deleted inserted replaced
53554:78fe0002024d 53555:12251bc889f1
     1 #     Title:      HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py
     1 #     Title:      HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py
     2 #     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
     2 #     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
     3 #     Copyright   2012
     3 #     Copyright   2012-2013
     4 #
     4 #
     5 # Persistent dictionaries: accessibility, dependencies, and features.
     5 # Persistent dictionaries: accessibility, dependencies, and features.
     6 
     6 
     7 '''
     7 import logging,sys
     8 Created on Jul 12, 2012
       
     9 
       
    10 @author: daniel
       
    11 '''
       
    12 
       
    13 from os.path import join
     8 from os.path import join
    14 from Queue import Queue
     9 from Queue import Queue
    15 from readData import create_accessible_dict,create_dependencies_dict,create_feature_dict
    10 from readData import create_accessible_dict,create_dependencies_dict
    16 from cPickle import load,dump
    11 from cPickle import load,dump
    17 
    12 
    18 class Dictionaries(object):
    13 class Dictionaries(object):
    19     '''
    14     '''
    20     This class contains all info about name-> id mapping, etc.
    15     This class contains all info about name-> id mapping, etc.
    30         self.maxFeatureId = 0
    25         self.maxFeatureId = 0
    31         self.featureDict = {}
    26         self.featureDict = {}
    32         self.dependenciesDict = {}
    27         self.dependenciesDict = {}
    33         self.accessibleDict = {}
    28         self.accessibleDict = {}
    34         self.expandedAccessibles = {}
    29         self.expandedAccessibles = {}
    35         # For SInE features
    30         self.accFile =  ''
    36         self.useSine = False
       
    37         self.featureCountDict = {} 
       
    38         self.triggerFeaturesDict = {} 
       
    39         self.featureTriggeredFormulasDict = {}
       
    40         self.changed = True
    31         self.changed = True
    41 
    32 
    42     """
    33     """
    43     Init functions. nameIdDict, idNameDict, featureIdDict, articleDict get filled!
    34     Init functions. nameIdDict, idNameDict, featureIdDict, articleDict get filled!
    44     """
    35     """
    45     def init_featureDict(self,featureFile,sineFeatures):
    36     def init_featureDict(self,featureFile):
    46         self.featureDict,self.maxNameId,self.maxFeatureId,self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict =\
    37         self.create_feature_dict(featureFile)
    47          create_feature_dict(self.nameIdDict,self.idNameDict,self.maxNameId,self.featureIdDict,self.maxFeatureId,self.featureCountDict,\
    38         #self.featureDict,self.maxNameId,self.maxFeatureId,self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict =\
    48                              self.triggerFeaturesDict,self.featureTriggeredFormulasDict,sineFeatures,featureFile)
    39         # create_feature_dict(self.nameIdDict,self.idNameDict,self.maxNameId,self.featureIdDict,self.maxFeatureId,self.featureCountDict,\
       
    40         #                     self.triggerFeaturesDict,self.featureTriggeredFormulasDict,sineFeatures,featureFile)
    49     def init_dependenciesDict(self,depFile):
    41     def init_dependenciesDict(self,depFile):
    50         self.dependenciesDict = create_dependencies_dict(self.nameIdDict,depFile)
    42         self.dependenciesDict = create_dependencies_dict(self.nameIdDict,depFile)
    51     def init_accessibleDict(self,accFile):
    43     def init_accessibleDict(self,accFile):
    52         self.accessibleDict,self.maxNameId = create_accessible_dict(self.nameIdDict,self.idNameDict,self.maxNameId,accFile)
    44         self.accessibleDict,self.maxNameId = create_accessible_dict(self.nameIdDict,self.idNameDict,self.maxNameId,accFile)
    53 
    45 
    54     def init_all(self,args):
    46     def init_all(self,args):
    55         self.featureFileName = 'mash_features'
    47         self.featureFileName = 'mash_features'
    56         self.accFileName = 'mash_accessibility'
    48         self.accFileName = 'mash_accessibility'
    57         self.useSine = args.sineFeatures
       
    58         featureFile = join(args.inputDir,self.featureFileName)
    49         featureFile = join(args.inputDir,self.featureFileName)
    59         depFile = join(args.inputDir,args.depFile)
    50         depFile = join(args.inputDir,args.depFile)
    60         accFile = join(args.inputDir,self.accFileName)
    51         self.accFile = join(args.inputDir,self.accFileName)
    61         self.init_featureDict(featureFile,self.useSine)
    52         self.init_featureDict(featureFile)
    62         self.init_accessibleDict(accFile)
    53         self.init_accessibleDict(self.accFile)
    63         self.init_dependenciesDict(depFile)
    54         self.init_dependenciesDict(depFile)
    64         self.expandedAccessibles = {}
    55         self.expandedAccessibles = {}
    65         self.changed = True
    56         self.changed = True
       
    57 
       
    58     def create_feature_dict(self,inputFile):
       
    59         logger = logging.getLogger('create_feature_dict')
       
    60         self.featureDict = {}
       
    61         IS = open(inputFile,'r')
       
    62         for line in IS:
       
    63             line = line.split(':')
       
    64             name = line[0]
       
    65             # Name Id
       
    66             if self.nameIdDict.has_key(name):
       
    67                 logger.warning('%s appears twice in the feature file. Aborting.',name)
       
    68                 sys.exit(-1)
       
    69             else:
       
    70                 self.nameIdDict[name] = self.maxNameId
       
    71                 self.idNameDict[self.maxNameId] = name
       
    72                 nameId = self.maxNameId
       
    73                 self.maxNameId += 1
       
    74             features = self.get_features(line)
       
    75             # Store results
       
    76             self.featureDict[nameId] = features
       
    77         IS.close()
       
    78         return
    66 
    79 
    67     def get_name_id(self,name):
    80     def get_name_id(self,name):
    68         """
    81         """
    69         Return the Id for a name.
    82         Return the Id for a name.
    70         If it doesn't exist yet, a new entry is created.
    83         If it doesn't exist yet, a new entry is created.
    80         return nameId
    93         return nameId
    81 
    94 
    82     def add_feature(self,featureName):
    95     def add_feature(self,featureName):
    83         if not self.featureIdDict.has_key(featureName):
    96         if not self.featureIdDict.has_key(featureName):
    84             self.featureIdDict[featureName] = self.maxFeatureId
    97             self.featureIdDict[featureName] = self.maxFeatureId
    85             if self.useSine:
       
    86                 self.featureCountDict[self.maxFeatureId] = 0
       
    87             self.maxFeatureId += 1
    98             self.maxFeatureId += 1
    88             self.changed = True
    99             self.changed = True
    89         fId = self.featureIdDict[featureName]
   100         fId = self.featureIdDict[featureName]
    90         if self.useSine:
       
    91             self.featureCountDict[fId] += 1
       
    92         return fId
   101         return fId
    93 
   102 
    94     def get_features(self,line):
   103     def get_features(self,line):
    95         # Feature Ids
       
    96         featureNames = [f.strip() for f in line[1].split()]
   104         featureNames = [f.strip() for f in line[1].split()]
    97         features = []
   105         features = {}
    98         for fn in featureNames:
   106         for fn in featureNames:
    99             tmp = fn.split('=')
   107             tmp = fn.split('=')
   100             weight = 1.0
   108             weight = 1.0 
   101             if len(tmp) == 2:
   109             if len(tmp) == 2:
   102                 fn = tmp[0]
   110                 fn = tmp[0]
   103                 weight = float(tmp[1])
   111                 weight = float(tmp[1])
   104             fId = self.add_feature(tmp[0])
   112             fId = self.add_feature(tmp[0])
   105             features.append((fId,weight))
   113             features[fId] = weight
       
   114             #features[fId] = 1.0 ###
   106         return features
   115         return features
   107 
   116 
   108     def expand_accessibles(self,acc):
   117     def expand_accessibles(self,acc):
   109         accessibles = set(acc)
   118         accessibles = set(acc)
   110         unexpandedQueue = Queue()
   119         unexpandedQueue = Queue()
   140         # Accessible Ids
   149         # Accessible Ids
   141         unExpAcc = [self.nameIdDict[a.strip()] for a in line[0].split()]
   150         unExpAcc = [self.nameIdDict[a.strip()] for a in line[0].split()]
   142         self.accessibleDict[nameId] = unExpAcc
   151         self.accessibleDict[nameId] = unExpAcc
   143         features = self.get_features(line)
   152         features = self.get_features(line)
   144         self.featureDict[nameId] = features
   153         self.featureDict[nameId] = features
   145         if self.useSine:
       
   146             # SInE Features
       
   147             minFeatureCount = min([self.featureCountDict[f] for f,_w in features])
       
   148             triggerFeatures = [f for f,_w in features if self.featureCountDict[f] == minFeatureCount]
       
   149             self.triggerFeaturesDict[nameId] = triggerFeatures
       
   150             for f in triggerFeatures:
       
   151                 if self.featureTriggeredFormulasDict.has_key(f): 
       
   152                     self.featureTriggeredFormulasDict[f].append(nameId)
       
   153                 else:
       
   154                     self.featureTriggeredFormulasDict[f] = [nameId]        
       
   155         self.dependenciesDict[nameId] = [self.nameIdDict[d.strip()] for d in line[2].split()]        
   154         self.dependenciesDict[nameId] = [self.nameIdDict[d.strip()] for d in line[2].split()]        
   156         self.changed = True
   155         self.changed = True
   157         return nameId
   156         return nameId
   158 
   157 
   159     def parse_overwrite(self,line):
   158     def parse_overwrite(self,line):
   217 
   216 
   218     def save(self,fileName):
   217     def save(self,fileName):
   219         if self.changed:
   218         if self.changed:
   220             dictsStream = open(fileName, 'wb')
   219             dictsStream = open(fileName, 'wb')
   221             dump((self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\
   220             dump((self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\
   222                 self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,\
   221                 self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict),dictsStream)
   223                 self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict,self.useSine),dictsStream)
       
   224             self.changed = False
   222             self.changed = False
   225             dictsStream.close()
   223             dictsStream.close()
   226     def load(self,fileName):
   224     def load(self,fileName):
   227         dictsStream = open(fileName, 'rb')
   225         dictsStream = open(fileName, 'rb')
   228         self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\
   226         self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\
   229               self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,\
   227               self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict = load(dictsStream)
   230               self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict,self.useSine = load(dictsStream)
       
   231         self.changed = False
   228         self.changed = False
   232         dictsStream.close()
   229         dictsStream.close()