1 # Title: HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py |
1 # Title: HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py |
2 # Author: Daniel Kuehlwein, ICIS, Radboud University Nijmegen |
2 # Author: Daniel Kuehlwein, ICIS, Radboud University Nijmegen |
3 # Copyright 2012 |
3 # Copyright 2012-2013 |
4 # |
4 # |
5 # Persistent dictionaries: accessibility, dependencies, and features. |
5 # Persistent dictionaries: accessibility, dependencies, and features. |
6 |
6 |
7 ''' |
7 import logging,sys |
8 Created on Jul 12, 2012 |
|
9 |
|
10 @author: daniel |
|
11 ''' |
|
12 |
|
13 from os.path import join |
8 from os.path import join |
14 from Queue import Queue |
9 from Queue import Queue |
15 from readData import create_accessible_dict,create_dependencies_dict,create_feature_dict |
10 from readData import create_accessible_dict,create_dependencies_dict |
16 from cPickle import load,dump |
11 from cPickle import load,dump |
17 |
12 |
18 class Dictionaries(object): |
13 class Dictionaries(object): |
19 ''' |
14 ''' |
20 This class contains all info about name-> id mapping, etc. |
15 This class contains all info about name-> id mapping, etc. |
30 self.maxFeatureId = 0 |
25 self.maxFeatureId = 0 |
31 self.featureDict = {} |
26 self.featureDict = {} |
32 self.dependenciesDict = {} |
27 self.dependenciesDict = {} |
33 self.accessibleDict = {} |
28 self.accessibleDict = {} |
34 self.expandedAccessibles = {} |
29 self.expandedAccessibles = {} |
35 # For SInE features |
30 self.accFile = '' |
36 self.useSine = False |
|
37 self.featureCountDict = {} |
|
38 self.triggerFeaturesDict = {} |
|
39 self.featureTriggeredFormulasDict = {} |
|
40 self.changed = True |
31 self.changed = True |
41 |
32 |
42 """ |
33 """ |
43 Init functions. nameIdDict, idNameDict, featureIdDict, articleDict get filled! |
34 Init functions. nameIdDict, idNameDict, featureIdDict, articleDict get filled! |
44 """ |
35 """ |
45 def init_featureDict(self,featureFile,sineFeatures): |
36 def init_featureDict(self,featureFile): |
46 self.featureDict,self.maxNameId,self.maxFeatureId,self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict =\ |
37 self.create_feature_dict(featureFile) |
47 create_feature_dict(self.nameIdDict,self.idNameDict,self.maxNameId,self.featureIdDict,self.maxFeatureId,self.featureCountDict,\ |
38 #self.featureDict,self.maxNameId,self.maxFeatureId,self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict =\ |
48 self.triggerFeaturesDict,self.featureTriggeredFormulasDict,sineFeatures,featureFile) |
39 # create_feature_dict(self.nameIdDict,self.idNameDict,self.maxNameId,self.featureIdDict,self.maxFeatureId,self.featureCountDict,\ |
|
40 # self.triggerFeaturesDict,self.featureTriggeredFormulasDict,sineFeatures,featureFile) |
49 def init_dependenciesDict(self,depFile): |
41 def init_dependenciesDict(self,depFile): |
50 self.dependenciesDict = create_dependencies_dict(self.nameIdDict,depFile) |
42 self.dependenciesDict = create_dependencies_dict(self.nameIdDict,depFile) |
51 def init_accessibleDict(self,accFile): |
43 def init_accessibleDict(self,accFile): |
52 self.accessibleDict,self.maxNameId = create_accessible_dict(self.nameIdDict,self.idNameDict,self.maxNameId,accFile) |
44 self.accessibleDict,self.maxNameId = create_accessible_dict(self.nameIdDict,self.idNameDict,self.maxNameId,accFile) |
53 |
45 |
54 def init_all(self,args): |
46 def init_all(self,args): |
55 self.featureFileName = 'mash_features' |
47 self.featureFileName = 'mash_features' |
56 self.accFileName = 'mash_accessibility' |
48 self.accFileName = 'mash_accessibility' |
57 self.useSine = args.sineFeatures |
|
58 featureFile = join(args.inputDir,self.featureFileName) |
49 featureFile = join(args.inputDir,self.featureFileName) |
59 depFile = join(args.inputDir,args.depFile) |
50 depFile = join(args.inputDir,args.depFile) |
60 accFile = join(args.inputDir,self.accFileName) |
51 self.accFile = join(args.inputDir,self.accFileName) |
61 self.init_featureDict(featureFile,self.useSine) |
52 self.init_featureDict(featureFile) |
62 self.init_accessibleDict(accFile) |
53 self.init_accessibleDict(self.accFile) |
63 self.init_dependenciesDict(depFile) |
54 self.init_dependenciesDict(depFile) |
64 self.expandedAccessibles = {} |
55 self.expandedAccessibles = {} |
65 self.changed = True |
56 self.changed = True |
|
57 |
|
58 def create_feature_dict(self,inputFile): |
|
59 logger = logging.getLogger('create_feature_dict') |
|
60 self.featureDict = {} |
|
61 IS = open(inputFile,'r') |
|
62 for line in IS: |
|
63 line = line.split(':') |
|
64 name = line[0] |
|
65 # Name Id |
|
66 if self.nameIdDict.has_key(name): |
|
67 logger.warning('%s appears twice in the feature file. Aborting.',name) |
|
68 sys.exit(-1) |
|
69 else: |
|
70 self.nameIdDict[name] = self.maxNameId |
|
71 self.idNameDict[self.maxNameId] = name |
|
72 nameId = self.maxNameId |
|
73 self.maxNameId += 1 |
|
74 features = self.get_features(line) |
|
75 # Store results |
|
76 self.featureDict[nameId] = features |
|
77 IS.close() |
|
78 return |
66 |
79 |
67 def get_name_id(self,name): |
80 def get_name_id(self,name): |
68 """ |
81 """ |
69 Return the Id for a name. |
82 Return the Id for a name. |
70 If it doesn't exist yet, a new entry is created. |
83 If it doesn't exist yet, a new entry is created. |
80 return nameId |
93 return nameId |
81 |
94 |
82 def add_feature(self,featureName): |
95 def add_feature(self,featureName): |
83 if not self.featureIdDict.has_key(featureName): |
96 if not self.featureIdDict.has_key(featureName): |
84 self.featureIdDict[featureName] = self.maxFeatureId |
97 self.featureIdDict[featureName] = self.maxFeatureId |
85 if self.useSine: |
|
86 self.featureCountDict[self.maxFeatureId] = 0 |
|
87 self.maxFeatureId += 1 |
98 self.maxFeatureId += 1 |
88 self.changed = True |
99 self.changed = True |
89 fId = self.featureIdDict[featureName] |
100 fId = self.featureIdDict[featureName] |
90 if self.useSine: |
|
91 self.featureCountDict[fId] += 1 |
|
92 return fId |
101 return fId |
93 |
102 |
94 def get_features(self,line): |
103 def get_features(self,line): |
95 # Feature Ids |
|
96 featureNames = [f.strip() for f in line[1].split()] |
104 featureNames = [f.strip() for f in line[1].split()] |
97 features = [] |
105 features = {} |
98 for fn in featureNames: |
106 for fn in featureNames: |
99 tmp = fn.split('=') |
107 tmp = fn.split('=') |
100 weight = 1.0 |
108 weight = 1.0 |
101 if len(tmp) == 2: |
109 if len(tmp) == 2: |
102 fn = tmp[0] |
110 fn = tmp[0] |
103 weight = float(tmp[1]) |
111 weight = float(tmp[1]) |
104 fId = self.add_feature(tmp[0]) |
112 fId = self.add_feature(tmp[0]) |
105 features.append((fId,weight)) |
113 features[fId] = weight |
|
114 #features[fId] = 1.0 ### |
106 return features |
115 return features |
107 |
116 |
108 def expand_accessibles(self,acc): |
117 def expand_accessibles(self,acc): |
109 accessibles = set(acc) |
118 accessibles = set(acc) |
110 unexpandedQueue = Queue() |
119 unexpandedQueue = Queue() |
140 # Accessible Ids |
149 # Accessible Ids |
141 unExpAcc = [self.nameIdDict[a.strip()] for a in line[0].split()] |
150 unExpAcc = [self.nameIdDict[a.strip()] for a in line[0].split()] |
142 self.accessibleDict[nameId] = unExpAcc |
151 self.accessibleDict[nameId] = unExpAcc |
143 features = self.get_features(line) |
152 features = self.get_features(line) |
144 self.featureDict[nameId] = features |
153 self.featureDict[nameId] = features |
145 if self.useSine: |
|
146 # SInE Features |
|
147 minFeatureCount = min([self.featureCountDict[f] for f,_w in features]) |
|
148 triggerFeatures = [f for f,_w in features if self.featureCountDict[f] == minFeatureCount] |
|
149 self.triggerFeaturesDict[nameId] = triggerFeatures |
|
150 for f in triggerFeatures: |
|
151 if self.featureTriggeredFormulasDict.has_key(f): |
|
152 self.featureTriggeredFormulasDict[f].append(nameId) |
|
153 else: |
|
154 self.featureTriggeredFormulasDict[f] = [nameId] |
|
155 self.dependenciesDict[nameId] = [self.nameIdDict[d.strip()] for d in line[2].split()] |
154 self.dependenciesDict[nameId] = [self.nameIdDict[d.strip()] for d in line[2].split()] |
156 self.changed = True |
155 self.changed = True |
157 return nameId |
156 return nameId |
158 |
157 |
159 def parse_overwrite(self,line): |
158 def parse_overwrite(self,line): |
217 |
216 |
218 def save(self,fileName): |
217 def save(self,fileName): |
219 if self.changed: |
218 if self.changed: |
220 dictsStream = open(fileName, 'wb') |
219 dictsStream = open(fileName, 'wb') |
221 dump((self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
220 dump((self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
222 self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,\ |
221 self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict),dictsStream) |
223 self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict,self.useSine),dictsStream) |
|
224 self.changed = False |
222 self.changed = False |
225 dictsStream.close() |
223 dictsStream.close() |
226 def load(self,fileName): |
224 def load(self,fileName): |
227 dictsStream = open(fileName, 'rb') |
225 dictsStream = open(fileName, 'rb') |
228 self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
226 self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
229 self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,\ |
227 self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict = load(dictsStream) |
230 self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict,self.useSine = load(dictsStream) |
|
231 self.changed = False |
228 self.changed = False |
232 dictsStream.close() |
229 dictsStream.close() |