30 self.maxFeatureId = 0 |
30 self.maxFeatureId = 0 |
31 self.featureDict = {} |
31 self.featureDict = {} |
32 self.dependenciesDict = {} |
32 self.dependenciesDict = {} |
33 self.accessibleDict = {} |
33 self.accessibleDict = {} |
34 self.expandedAccessibles = {} |
34 self.expandedAccessibles = {} |
35 # For SInE-like prior |
35 # For SInE features |
36 self.featureCountDict = {} |
36 self.useSine = False |
37 self.triggerFeatures = {} |
37 self.featureCountDict = {} |
|
38 self.triggerFeaturesDict = {} |
|
39 self.featureTriggeredFormulasDict = {} |
38 self.changed = True |
40 self.changed = True |
39 |
41 |
40 """ |
42 """ |
41 Init functions. Side Effect: nameIdDict, idNameDict, featureIdDict, articleDict get filled! |
43 Init functions. nameIdDict, idNameDict, featureIdDict, articleDict get filled! |
42 """ |
44 """ |
43 def init_featureDict(self,featureFile): |
45 def init_featureDict(self,featureFile,sineFeatures): |
44 self.featureDict,self.maxNameId,self.maxFeatureId, self.featureCountDict,self.triggerFeatures = create_feature_dict(self.nameIdDict,self.idNameDict,self.maxNameId,self.featureIdDict,\ |
46 self.featureDict,self.maxNameId,self.maxFeatureId,self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict =\ |
45 self.maxFeatureId,self.featureCountDict,self.triggerFeatures,featureFile) |
47 create_feature_dict(self.nameIdDict,self.idNameDict,self.maxNameId,self.featureIdDict,self.maxFeatureId,self.featureCountDict,\ |
|
48 self.triggerFeaturesDict,self.featureTriggeredFormulasDict,sineFeatures,featureFile) |
46 def init_dependenciesDict(self,depFile): |
49 def init_dependenciesDict(self,depFile): |
47 self.dependenciesDict = create_dependencies_dict(self.nameIdDict,depFile) |
50 self.dependenciesDict = create_dependencies_dict(self.nameIdDict,depFile) |
48 def init_accessibleDict(self,accFile): |
51 def init_accessibleDict(self,accFile): |
49 self.accessibleDict,self.maxNameId = create_accessible_dict(self.nameIdDict,self.idNameDict,self.maxNameId,accFile) |
52 self.accessibleDict,self.maxNameId = create_accessible_dict(self.nameIdDict,self.idNameDict,self.maxNameId,accFile) |
50 |
53 |
51 def init_all(self,inputFolder,featureFileName = 'mash_features',depFileName='mash_dependencies',accFileName = 'mash_accessibility'): |
54 def init_all(self,args): |
52 featureFile = join(inputFolder,featureFileName) |
55 self.featureFileName = 'mash_features' |
53 depFile = join(inputFolder,depFileName) |
56 self.accFileName = 'mash_accessibility' |
54 accFile = join(inputFolder,accFileName) |
57 self.useSine = args.sineFeatures |
55 self.init_featureDict(featureFile) |
58 featureFile = join(args.inputDir,self.featureFileName) |
|
59 depFile = join(args.inputDir,args.depFile) |
|
60 accFile = join(args.inputDir,self.accFileName) |
|
61 self.init_featureDict(featureFile,self.useSine) |
56 self.init_accessibleDict(accFile) |
62 self.init_accessibleDict(accFile) |
57 self.init_dependenciesDict(depFile) |
63 self.init_dependenciesDict(depFile) |
58 self.expandedAccessibles = {} |
64 self.expandedAccessibles = {} |
59 self.changed = True |
65 self.changed = True |
60 |
66 |
74 return nameId |
80 return nameId |
75 |
81 |
76 def add_feature(self,featureName): |
82 def add_feature(self,featureName): |
77 if not self.featureIdDict.has_key(featureName): |
83 if not self.featureIdDict.has_key(featureName): |
78 self.featureIdDict[featureName] = self.maxFeatureId |
84 self.featureIdDict[featureName] = self.maxFeatureId |
79 self.featureCountDict[self.maxFeatureId] = 0 |
85 if self.useSine: |
|
86 self.featureCountDict[self.maxFeatureId] = 0 |
80 self.maxFeatureId += 1 |
87 self.maxFeatureId += 1 |
81 self.changed = True |
88 self.changed = True |
82 fId = self.featureIdDict[featureName] |
89 fId = self.featureIdDict[featureName] |
83 self.featureCountDict[fId] += 1 |
90 if self.useSine: |
|
91 self.featureCountDict[fId] += 1 |
84 return fId |
92 return fId |
85 |
93 |
86 def get_features(self,line): |
94 def get_features(self,line): |
87 # Feature Ids |
95 # Feature Ids |
88 featureNames = [f.strip() for f in line[1].split()] |
96 featureNames = [f.strip() for f in line[1].split()] |
126 |
134 |
127 # line = name:accessibles;features;dependencies |
135 # line = name:accessibles;features;dependencies |
128 line = line.split(':') |
136 line = line.split(':') |
129 name = line[0].strip() |
137 name = line[0].strip() |
130 nameId = self.get_name_id(name) |
138 nameId = self.get_name_id(name) |
131 |
|
132 line = line[1].split(';') |
139 line = line[1].split(';') |
133 # Accessible Ids |
140 # Accessible Ids |
134 unExpAcc = [self.nameIdDict[a.strip()] for a in line[0].split()] |
141 unExpAcc = [self.nameIdDict[a.strip()] for a in line[0].split()] |
135 self.accessibleDict[nameId] = unExpAcc |
142 self.accessibleDict[nameId] = unExpAcc |
136 features = self.get_features(line) |
143 features = self.get_features(line) |
137 self.featureDict[nameId] = features |
144 self.featureDict[nameId] = features |
138 minVal = min([self.featureCountDict[f] for f,_w in features]) |
145 if self.useSine: |
139 self.triggerFeatures[nameId] = [f for f,_w in features if self.featureCountDict[f] == minVal] |
146 # SInE Features |
|
147 minFeatureCount = min([self.featureCountDict[f] for f,_w in features]) |
|
148 triggerFeatures = [f for f,_w in features if self.featureCountDict[f] == minFeatureCount] |
|
149 self.triggerFeaturesDict[nameId] = triggerFeatures |
|
150 for f in triggerFeatures: |
|
151 if self.featureTriggeredFormulasDict.has_key(f): |
|
152 self.featureTriggeredFormulasDict[f].append(nameId) |
|
153 else: |
|
154 self.featureTriggeredFormulasDict[f] = [nameId] |
140 self.dependenciesDict[nameId] = [self.nameIdDict[d.strip()] for d in line[2].split()] |
155 self.dependenciesDict[nameId] = [self.nameIdDict[d.strip()] for d in line[2].split()] |
141 self.changed = True |
156 self.changed = True |
142 return nameId |
157 return nameId |
143 |
158 |
144 def parse_overwrite(self,line): |
159 def parse_overwrite(self,line): |
195 |
210 |
196 def save(self,fileName): |
211 def save(self,fileName): |
197 if self.changed: |
212 if self.changed: |
198 dictsStream = open(fileName, 'wb') |
213 dictsStream = open(fileName, 'wb') |
199 dump((self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
214 dump((self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
200 self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,self.featureCountDict,self.triggerFeatures),dictsStream) |
215 self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,\ |
|
216 self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict,self.useSine),dictsStream) |
201 self.changed = False |
217 self.changed = False |
202 dictsStream.close() |
218 dictsStream.close() |
203 def load(self,fileName): |
219 def load(self,fileName): |
204 dictsStream = open(fileName, 'rb') |
220 dictsStream = open(fileName, 'rb') |
205 self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
221 self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
206 self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,self.featureCountDict,self.triggerFeatures = load(dictsStream) |
222 self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,\ |
|
223 self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict,self.useSine = load(dictsStream) |
207 self.changed = False |
224 self.changed = False |
208 dictsStream.close() |
225 dictsStream.close() |