author | blanchet |
Tue, 20 Aug 2013 14:36:22 +0200 | |
changeset 53100 | 1133b9e83f09 |
parent 50951 | e1cbaa7d5536 |
child 53555 | 12251bc889f1 |
permissions | -rw-r--r-- |
50222 | 1 |
# Title: HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py |
2 |
# Author: Daniel Kuehlwein, ICIS, Radboud University Nijmegen |
|
3 |
# Copyright 2012 |
|
4 |
# |
|
5 |
# Persistent dictionaries: accessibility, dependencies, and features. |
|
6 |
||
50220 | 7 |
''' |
8 |
Created on Jul 12, 2012 |
|
9 |
||
10 |
@author: daniel |
|
11 |
''' |
|
12 |
||
13 |
from os.path import join |
|
14 |
from Queue import Queue |
|
15 |
from readData import create_accessible_dict,create_dependencies_dict,create_feature_dict |
|
16 |
from cPickle import load,dump |
|
17 |
||
18 |
class Dictionaries(object): |
|
19 |
''' |
|
20 |
This class contains all info about name-> id mapping, etc. |
|
21 |
''' |
|
22 |
def __init__(self): |
|
23 |
''' |
|
24 |
Constructor |
|
25 |
''' |
|
26 |
self.nameIdDict = {} |
|
27 |
self.idNameDict = {} |
|
28 |
self.featureIdDict={} |
|
29 |
self.maxNameId = 0 |
|
30 |
self.maxFeatureId = 0 |
|
31 |
self.featureDict = {} |
|
32 |
self.dependenciesDict = {} |
|
33 |
self.accessibleDict = {} |
|
50840 | 34 |
self.expandedAccessibles = {} |
50951 | 35 |
# For SInE features |
36 |
self.useSine = False |
|
37 |
self.featureCountDict = {} |
|
38 |
self.triggerFeaturesDict = {} |
|
39 |
self.featureTriggeredFormulasDict = {} |
|
50220 | 40 |
self.changed = True |
50388 | 41 |
|
50220 | 42 |
""" |
50951 | 43 |
Init functions. nameIdDict, idNameDict, featureIdDict, articleDict get filled! |
50388 | 44 |
""" |
50951 | 45 |
def init_featureDict(self,featureFile,sineFeatures): |
46 |
self.featureDict,self.maxNameId,self.maxFeatureId,self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict =\ |
|
47 |
create_feature_dict(self.nameIdDict,self.idNameDict,self.maxNameId,self.featureIdDict,self.maxFeatureId,self.featureCountDict,\ |
|
48 |
self.triggerFeaturesDict,self.featureTriggeredFormulasDict,sineFeatures,featureFile) |
|
50220 | 49 |
def init_dependenciesDict(self,depFile): |
50 |
self.dependenciesDict = create_dependencies_dict(self.nameIdDict,depFile) |
|
51 |
def init_accessibleDict(self,accFile): |
|
52 |
self.accessibleDict,self.maxNameId = create_accessible_dict(self.nameIdDict,self.idNameDict,self.maxNameId,accFile) |
|
50388 | 53 |
|
50951 | 54 |
def init_all(self,args): |
55 |
self.featureFileName = 'mash_features' |
|
56 |
self.accFileName = 'mash_accessibility' |
|
57 |
self.useSine = args.sineFeatures |
|
58 |
featureFile = join(args.inputDir,self.featureFileName) |
|
59 |
depFile = join(args.inputDir,args.depFile) |
|
60 |
accFile = join(args.inputDir,self.accFileName) |
|
61 |
self.init_featureDict(featureFile,self.useSine) |
|
50220 | 62 |
self.init_accessibleDict(accFile) |
63 |
self.init_dependenciesDict(depFile) |
|
64 |
self.expandedAccessibles = {} |
|
65 |
self.changed = True |
|
50388 | 66 |
|
50220 | 67 |
def get_name_id(self,name): |
68 |
""" |
|
69 |
Return the Id for a name. |
|
70 |
If it doesn't exist yet, a new entry is created. |
|
71 |
""" |
|
72 |
if self.nameIdDict.has_key(name): |
|
73 |
nameId = self.nameIdDict[name] |
|
74 |
else: |
|
75 |
self.nameIdDict[name] = self.maxNameId |
|
76 |
self.idNameDict[self.maxNameId] = name |
|
77 |
nameId = self.maxNameId |
|
50388 | 78 |
self.maxNameId += 1 |
50220 | 79 |
self.changed = True |
80 |
return nameId |
|
81 |
||
82 |
def add_feature(self,featureName): |
|
83 |
if not self.featureIdDict.has_key(featureName): |
|
84 |
self.featureIdDict[featureName] = self.maxFeatureId |
|
50951 | 85 |
if self.useSine: |
86 |
self.featureCountDict[self.maxFeatureId] = 0 |
|
50220 | 87 |
self.maxFeatureId += 1 |
50388 | 88 |
self.changed = True |
50827 | 89 |
fId = self.featureIdDict[featureName] |
50951 | 90 |
if self.useSine: |
91 |
self.featureCountDict[fId] += 1 |
|
50827 | 92 |
return fId |
50388 | 93 |
|
94 |
def get_features(self,line): |
|
95 |
# Feature Ids |
|
96 |
featureNames = [f.strip() for f in line[1].split()] |
|
97 |
features = [] |
|
98 |
for fn in featureNames: |
|
99 |
tmp = fn.split('=') |
|
50827 | 100 |
weight = 1.0 |
50388 | 101 |
if len(tmp) == 2: |
50827 | 102 |
fn = tmp[0] |
103 |
weight = float(tmp[1]) |
|
104 |
fId = self.add_feature(tmp[0]) |
|
105 |
features.append((fId,weight)) |
|
50388 | 106 |
return features |
107 |
||
50220 | 108 |
def expand_accessibles(self,acc): |
109 |
accessibles = set(acc) |
|
110 |
unexpandedQueue = Queue() |
|
111 |
for a in acc: |
|
112 |
if self.expandedAccessibles.has_key(a): |
|
113 |
accessibles = accessibles.union(self.expandedAccessibles[a]) |
|
114 |
else: |
|
115 |
unexpandedQueue.put(a) |
|
116 |
while not unexpandedQueue.empty(): |
|
117 |
nextUnExp = unexpandedQueue.get() |
|
50388 | 118 |
nextUnExpAcc = self.accessibleDict[nextUnExp] |
50220 | 119 |
for a in nextUnExpAcc: |
120 |
if not a in accessibles: |
|
121 |
accessibles = accessibles.union([a]) |
|
122 |
if self.expandedAccessibles.has_key(a): |
|
123 |
accessibles = accessibles.union(self.expandedAccessibles[a]) |
|
124 |
else: |
|
50388 | 125 |
unexpandedQueue.put(a) |
50220 | 126 |
return list(accessibles) |
50388 | 127 |
|
50220 | 128 |
def parse_fact(self,line): |
129 |
""" |
|
130 |
Parses a single line, extracting accessibles, features, and dependencies. |
|
131 |
""" |
|
132 |
assert line.startswith('! ') |
|
133 |
line = line[2:] |
|
50388 | 134 |
|
50220 | 135 |
# line = name:accessibles;features;dependencies |
136 |
line = line.split(':') |
|
137 |
name = line[0].strip() |
|
138 |
nameId = self.get_name_id(name) |
|
50388 | 139 |
line = line[1].split(';') |
50220 | 140 |
# Accessible Ids |
141 |
unExpAcc = [self.nameIdDict[a.strip()] for a in line[0].split()] |
|
142 |
self.accessibleDict[nameId] = unExpAcc |
|
50840 | 143 |
features = self.get_features(line) |
144 |
self.featureDict[nameId] = features |
|
50951 | 145 |
if self.useSine: |
146 |
# SInE Features |
|
147 |
minFeatureCount = min([self.featureCountDict[f] for f,_w in features]) |
|
148 |
triggerFeatures = [f for f,_w in features if self.featureCountDict[f] == minFeatureCount] |
|
149 |
self.triggerFeaturesDict[nameId] = triggerFeatures |
|
150 |
for f in triggerFeatures: |
|
151 |
if self.featureTriggeredFormulasDict.has_key(f): |
|
152 |
self.featureTriggeredFormulasDict[f].append(nameId) |
|
153 |
else: |
|
154 |
self.featureTriggeredFormulasDict[f] = [nameId] |
|
50840 | 155 |
self.dependenciesDict[nameId] = [self.nameIdDict[d.strip()] for d in line[2].split()] |
50220 | 156 |
self.changed = True |
157 |
return nameId |
|
50388 | 158 |
|
50220 | 159 |
def parse_overwrite(self,line): |
160 |
""" |
|
161 |
Parses a single line, extracts the problemId and the Ids of the dependencies. |
|
162 |
""" |
|
163 |
assert line.startswith('p ') |
|
164 |
line = line[2:] |
|
50388 | 165 |
|
50220 | 166 |
# line = name:dependencies |
167 |
line = line.split(':') |
|
168 |
name = line[0].strip() |
|
169 |
nameId = self.get_name_id(name) |
|
50388 | 170 |
|
50220 | 171 |
dependencies = [self.nameIdDict[d.strip()] for d in line[1].split()] |
172 |
self.changed = True |
|
173 |
return nameId,dependencies |
|
50388 | 174 |
|
50220 | 175 |
def parse_problem(self,line): |
176 |
""" |
|
50827 | 177 |
Parses a problem and returns the features, the accessibles, and any hints. |
50220 | 178 |
""" |
179 |
assert line.startswith('? ') |
|
180 |
line = line[2:] |
|
181 |
name = None |
|
53100 | 182 |
numberOfPredictions = None |
183 |
||
184 |
# Check whether there is a problem name: |
|
185 |
tmp = line.split('#') |
|
186 |
if len(tmp) == 2: |
|
187 |
numberOfPredictions = int(tmp[0].strip()) |
|
188 |
line = tmp[1] |
|
50388 | 189 |
|
50220 | 190 |
# Check whether there is a problem name: |
191 |
tmp = line.split(':') |
|
192 |
if len(tmp) == 2: |
|
193 |
name = tmp[0].strip() |
|
194 |
line = tmp[1] |
|
50388 | 195 |
|
50220 | 196 |
# line = accessibles;features |
197 |
line = line.split(';') |
|
198 |
# Accessible Ids, expand and store the accessibles. |
|
199 |
unExpAcc = [self.nameIdDict[a.strip()] for a in line[0].split()] |
|
200 |
if len(self.expandedAccessibles.keys())>=100: |
|
201 |
self.expandedAccessibles = {} |
|
202 |
self.changed = True |
|
203 |
for accId in unExpAcc: |
|
204 |
if not self.expandedAccessibles.has_key(accId): |
|
205 |
accIdAcc = self.accessibleDict[accId] |
|
206 |
self.expandedAccessibles[accId] = self.expand_accessibles(accIdAcc) |
|
207 |
self.changed = True |
|
208 |
accessibles = self.expand_accessibles(unExpAcc) |
|
50388 | 209 |
features = self.get_features(line) |
50827 | 210 |
# Get hints: |
211 |
if len(line) == 3: |
|
212 |
hints = [self.nameIdDict[d.strip()] for d in line[2].split()] |
|
213 |
else: |
|
214 |
hints = [] |
|
50619
b958a94cf811
new version of MaSh, with theory-level reasoning
blanchet
parents:
50388
diff
changeset
|
215 |
|
53100 | 216 |
return name,features,accessibles,hints,numberOfPredictions |
50388 | 217 |
|
50220 | 218 |
def save(self,fileName): |
219 |
if self.changed: |
|
220 |
dictsStream = open(fileName, 'wb') |
|
221 |
dump((self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
|
50951 | 222 |
self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,\ |
223 |
self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict,self.useSine),dictsStream) |
|
50220 | 224 |
self.changed = False |
225 |
dictsStream.close() |
|
226 |
def load(self,fileName): |
|
50388 | 227 |
dictsStream = open(fileName, 'rb') |
50220 | 228 |
self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
50951 | 229 |
self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,\ |
230 |
self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict,self.useSine = load(dictsStream) |
|
50220 | 231 |
self.changed = False |
232 |
dictsStream.close() |