29 self.maxNameId = 0 |
29 self.maxNameId = 0 |
30 self.maxFeatureId = 0 |
30 self.maxFeatureId = 0 |
31 self.featureDict = {} |
31 self.featureDict = {} |
32 self.dependenciesDict = {} |
32 self.dependenciesDict = {} |
33 self.accessibleDict = {} |
33 self.accessibleDict = {} |
|
34 self.featureCountDict = {} |
34 self.expandedAccessibles = {} |
35 self.expandedAccessibles = {} |
35 self.changed = True |
36 self.changed = True |
36 |
37 |
37 """ |
38 """ |
38 Init functions. Side Effect: nameIdDict, idNameDict, featureIdDict, articleDict get filled! |
39 Init functions. Side Effect: nameIdDict, idNameDict, featureIdDict, articleDict get filled! |
39 """ |
40 """ |
40 def init_featureDict(self,featureFile): |
41 def init_featureDict(self,featureFile): |
41 self.featureDict,self.maxNameId,self.maxFeatureId = create_feature_dict(self.nameIdDict,self.idNameDict,self.maxNameId,self.featureIdDict,\ |
42 self.featureDict,self.maxNameId,self.maxFeatureId, self.featureCountDict = create_feature_dict(self.nameIdDict,self.idNameDict,self.maxNameId,self.featureIdDict,\ |
42 self.maxFeatureId,featureFile) |
43 self.maxFeatureId,self.featureCountDict,featureFile) |
43 def init_dependenciesDict(self,depFile): |
44 def init_dependenciesDict(self,depFile): |
44 self.dependenciesDict = create_dependencies_dict(self.nameIdDict,depFile) |
45 self.dependenciesDict = create_dependencies_dict(self.nameIdDict,depFile) |
45 def init_accessibleDict(self,accFile): |
46 def init_accessibleDict(self,accFile): |
46 self.accessibleDict,self.maxNameId = create_accessible_dict(self.nameIdDict,self.idNameDict,self.maxNameId,accFile) |
47 self.accessibleDict,self.maxNameId = create_accessible_dict(self.nameIdDict,self.idNameDict,self.maxNameId,accFile) |
47 |
48 |
71 return nameId |
72 return nameId |
72 |
73 |
73 def add_feature(self,featureName): |
74 def add_feature(self,featureName): |
74 if not self.featureIdDict.has_key(featureName): |
75 if not self.featureIdDict.has_key(featureName): |
75 self.featureIdDict[featureName] = self.maxFeatureId |
76 self.featureIdDict[featureName] = self.maxFeatureId |
|
77 self.featureCountDict[self.maxFeatureId] = 0 |
76 self.maxFeatureId += 1 |
78 self.maxFeatureId += 1 |
77 self.changed = True |
79 self.changed = True |
78 return self.featureIdDict[featureName] |
80 fId = self.featureIdDict[featureName] |
|
81 self.featureCountDict[fId] += 1 |
|
82 return fId |
79 |
83 |
80 def get_features(self,line): |
84 def get_features(self,line): |
81 # Feature Ids |
85 # Feature Ids |
82 featureNames = [f.strip() for f in line[1].split()] |
86 featureNames = [f.strip() for f in line[1].split()] |
83 features = [] |
87 features = [] |
84 for fn in featureNames: |
88 for fn in featureNames: |
85 tmp = fn.split('=') |
89 tmp = fn.split('=') |
|
90 weight = 1.0 |
86 if len(tmp) == 2: |
91 if len(tmp) == 2: |
87 fId = self.add_feature(tmp[0]) |
92 fn = tmp[0] |
88 features.append((fId,float(tmp[1]))) |
93 weight = float(tmp[1]) |
89 else: |
94 fId = self.add_feature(tmp[0]) |
90 fId = self.add_feature(fn) |
95 features.append((fId,weight)) |
91 features.append((fId,1.0)) |
|
92 return features |
96 return features |
93 |
97 |
94 def expand_accessibles(self,acc): |
98 def expand_accessibles(self,acc): |
95 accessibles = set(acc) |
99 accessibles = set(acc) |
96 unexpandedQueue = Queue() |
100 unexpandedQueue = Queue() |
174 accIdAcc = self.accessibleDict[accId] |
178 accIdAcc = self.accessibleDict[accId] |
175 self.expandedAccessibles[accId] = self.expand_accessibles(accIdAcc) |
179 self.expandedAccessibles[accId] = self.expand_accessibles(accIdAcc) |
176 self.changed = True |
180 self.changed = True |
177 accessibles = self.expand_accessibles(unExpAcc) |
181 accessibles = self.expand_accessibles(unExpAcc) |
178 features = self.get_features(line) |
182 features = self.get_features(line) |
179 |
183 # Get hints: |
180 return name,features,accessibles |
184 if len(line) == 3: |
|
185 hints = [self.nameIdDict[d.strip()] for d in line[2].split()] |
|
186 else: |
|
187 hints = [] |
|
188 |
|
189 return name,features,accessibles,hints |
181 |
190 |
182 def save(self,fileName): |
191 def save(self,fileName): |
183 if self.changed: |
192 if self.changed: |
184 dictsStream = open(fileName, 'wb') |
193 dictsStream = open(fileName, 'wb') |
185 dump((self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
194 dump((self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
186 self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict),dictsStream) |
195 self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,self.featureCountDict),dictsStream) |
187 self.changed = False |
196 self.changed = False |
188 dictsStream.close() |
197 dictsStream.close() |
189 def load(self,fileName): |
198 def load(self,fileName): |
190 dictsStream = open(fileName, 'rb') |
199 dictsStream = open(fileName, 'rb') |
191 self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
200 self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
192 self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict = load(dictsStream) |
201 self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,self.featureCountDict = load(dictsStream) |
193 self.changed = False |
202 self.changed = False |
194 dictsStream.close() |
203 dictsStream.close() |