11 |
11 |
12 @author: Daniel Kuehlwein |
12 @author: Daniel Kuehlwein |
13 ''' |
13 ''' |
14 |
14 |
15 import sys,logging |
15 import sys,logging |
16 |
|
17 def create_feature_dict(nameIdDict,idNameDict,maxNameId,featureIdDict,maxFeatureId,featureCountDict,\ |
|
18 triggerFeaturesDict,featureTriggeredFormulasDict,sineFeatures,inputFile): |
|
19 logger = logging.getLogger('create_feature_dict') |
|
20 featureDict = {} |
|
21 IS = open(inputFile,'r') |
|
22 for line in IS: |
|
23 line = line.split(':') |
|
24 name = line[0] |
|
25 # Name Id |
|
26 if nameIdDict.has_key(name): |
|
27 logger.warning('%s appears twice in the feature file. Aborting.',name) |
|
28 sys.exit(-1) |
|
29 else: |
|
30 nameIdDict[name] = maxNameId |
|
31 idNameDict[maxNameId] = name |
|
32 nameId = maxNameId |
|
33 maxNameId += 1 |
|
34 # Feature Ids |
|
35 featureNames = [f.strip() for f in line[1].split()] |
|
36 features = [] |
|
37 minFeatureCount = 9999999 |
|
38 for fn in featureNames: |
|
39 weight = 1.0 |
|
40 tmp = fn.split('=') |
|
41 if len(tmp) == 2: |
|
42 fn = tmp[0] |
|
43 weight = float(tmp[1]) |
|
44 if not featureIdDict.has_key(fn): |
|
45 featureIdDict[fn] = maxFeatureId |
|
46 featureCountDict[maxFeatureId] = 0 |
|
47 maxFeatureId += 1 |
|
48 fId = featureIdDict[fn] |
|
49 features.append((fId,weight)) |
|
50 if sineFeatures: |
|
51 featureCountDict[fId] += 1 |
|
52 minFeatureCount = min(minFeatureCount,featureCountDict[fId]) |
|
53 # Store results |
|
54 featureDict[nameId] = features |
|
55 if sineFeatures: |
|
56 triggerFeatures = [f for f,_w in features if featureCountDict[f] == minFeatureCount] |
|
57 triggerFeaturesDict[nameId] = triggerFeatures |
|
58 for f in triggerFeatures: |
|
59 if featureTriggeredFormulasDict.has_key(f): |
|
60 featureTriggeredFormulasDict[f].append(nameId) |
|
61 else: |
|
62 featureTriggeredFormulasDict[f] = [nameId] |
|
63 IS.close() |
|
64 return featureDict,maxNameId,maxFeatureId,featureCountDict,triggerFeaturesDict,featureTriggeredFormulasDict |
|
65 |
16 |
66 def create_dependencies_dict(nameIdDict,inputFile): |
17 def create_dependencies_dict(nameIdDict,inputFile): |
67 logger = logging.getLogger('create_dependencies_dict') |
18 logger = logging.getLogger('create_dependencies_dict') |
68 dependenciesDict = {} |
19 dependenciesDict = {} |
69 IS = open(inputFile,'r') |
20 IS = open(inputFile,'r') |