author | blanchet |
Thu, 14 Nov 2013 15:57:48 +0100 | |
changeset 54432 | 68f8bd1641da |
parent 54056 | 8298976acb54 |
child 54692 | 5ce1b9613705 |
permissions | -rw-r--r-- |
50222 | 1 |
# Title: HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py |
2 |
# Author: Daniel Kuehlwein, ICIS, Radboud University Nijmegen |
|
53555 | 3 |
# Copyright 2012-2013 |
50222 | 4 |
# |
5 |
# Persistent dictionaries: accessibility, dependencies, and features. |
|
6 |
||
54056 | 7 |
import sys |
50220 | 8 |
from os.path import join |
9 |
from Queue import Queue |
|
53555 | 10 |
from readData import create_accessible_dict,create_dependencies_dict |
50220 | 11 |
from cPickle import load,dump |
54056 | 12 |
from exceptions import LookupError |
50220 | 13 |
|
14 |
class Dictionaries(object): |
|
15 |
''' |
|
16 |
This class contains all info about name-> id mapping, etc. |
|
17 |
''' |
|
18 |
def __init__(self): |
|
19 |
''' |
|
20 |
Constructor |
|
21 |
''' |
|
22 |
self.nameIdDict = {} |
|
23 |
self.idNameDict = {} |
|
24 |
self.featureIdDict={} |
|
54432
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
25 |
self.maxNameId = 1 |
50220 | 26 |
self.maxFeatureId = 0 |
27 |
self.featureDict = {} |
|
28 |
self.dependenciesDict = {} |
|
29 |
self.accessibleDict = {} |
|
50840 | 30 |
self.expandedAccessibles = {} |
53555 | 31 |
self.accFile = '' |
50220 | 32 |
self.changed = True |
54432
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
33 |
# Unnamed facts |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
34 |
self.nameIdDict[''] = 0 |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
35 |
self.idNameDict[0] = 'Unnamed Fact' |
50388 | 36 |
|
50220 | 37 |
""" |
50951 | 38 |
Init functions. nameIdDict, idNameDict, featureIdDict, articleDict get filled! |
50388 | 39 |
""" |
53555 | 40 |
def init_featureDict(self,featureFile): |
41 |
self.create_feature_dict(featureFile) |
|
42 |
#self.featureDict,self.maxNameId,self.maxFeatureId,self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict =\ |
|
43 |
# create_feature_dict(self.nameIdDict,self.idNameDict,self.maxNameId,self.featureIdDict,self.maxFeatureId,self.featureCountDict,\ |
|
44 |
# self.triggerFeaturesDict,self.featureTriggeredFormulasDict,sineFeatures,featureFile) |
|
50220 | 45 |
def init_dependenciesDict(self,depFile): |
46 |
self.dependenciesDict = create_dependencies_dict(self.nameIdDict,depFile) |
|
47 |
def init_accessibleDict(self,accFile): |
|
48 |
self.accessibleDict,self.maxNameId = create_accessible_dict(self.nameIdDict,self.idNameDict,self.maxNameId,accFile) |
|
50388 | 49 |
|
50951 | 50 |
def init_all(self,args): |
51 |
self.featureFileName = 'mash_features' |
|
52 |
self.accFileName = 'mash_accessibility' |
|
53 |
featureFile = join(args.inputDir,self.featureFileName) |
|
54 |
depFile = join(args.inputDir,args.depFile) |
|
53555 | 55 |
self.accFile = join(args.inputDir,self.accFileName) |
56 |
self.init_featureDict(featureFile) |
|
57 |
self.init_accessibleDict(self.accFile) |
|
50220 | 58 |
self.init_dependenciesDict(depFile) |
59 |
self.expandedAccessibles = {} |
|
60 |
self.changed = True |
|
50388 | 61 |
|
53555 | 62 |
def create_feature_dict(self,inputFile): |
63 |
self.featureDict = {} |
|
64 |
IS = open(inputFile,'r') |
|
65 |
for line in IS: |
|
66 |
line = line.split(':') |
|
67 |
name = line[0] |
|
68 |
# Name Id |
|
69 |
if self.nameIdDict.has_key(name): |
|
54056 | 70 |
raise LookupError('%s appears twice in the feature file. Aborting.'% name) |
53555 | 71 |
sys.exit(-1) |
72 |
else: |
|
73 |
self.nameIdDict[name] = self.maxNameId |
|
74 |
self.idNameDict[self.maxNameId] = name |
|
75 |
nameId = self.maxNameId |
|
76 |
self.maxNameId += 1 |
|
77 |
features = self.get_features(line) |
|
78 |
# Store results |
|
79 |
self.featureDict[nameId] = features |
|
80 |
IS.close() |
|
81 |
return |
|
82 |
||
50220 | 83 |
def get_name_id(self,name): |
84 |
""" |
|
85 |
Return the Id for a name. |
|
86 |
If it doesn't exist yet, a new entry is created. |
|
87 |
""" |
|
88 |
if self.nameIdDict.has_key(name): |
|
89 |
nameId = self.nameIdDict[name] |
|
90 |
else: |
|
91 |
self.nameIdDict[name] = self.maxNameId |
|
92 |
self.idNameDict[self.maxNameId] = name |
|
93 |
nameId = self.maxNameId |
|
50388 | 94 |
self.maxNameId += 1 |
50220 | 95 |
self.changed = True |
96 |
return nameId |
|
97 |
||
98 |
def add_feature(self,featureName): |
|
99 |
if not self.featureIdDict.has_key(featureName): |
|
100 |
self.featureIdDict[featureName] = self.maxFeatureId |
|
101 |
self.maxFeatureId += 1 |
|
50388 | 102 |
self.changed = True |
50827 | 103 |
fId = self.featureIdDict[featureName] |
104 |
return fId |
|
50388 | 105 |
|
106 |
def get_features(self,line): |
|
107 |
featureNames = [f.strip() for f in line[1].split()] |
|
53555 | 108 |
features = {} |
50388 | 109 |
for fn in featureNames: |
110 |
tmp = fn.split('=') |
|
53555 | 111 |
weight = 1.0 |
50388 | 112 |
if len(tmp) == 2: |
50827 | 113 |
fn = tmp[0] |
114 |
weight = float(tmp[1]) |
|
115 |
fId = self.add_feature(tmp[0]) |
|
53555 | 116 |
features[fId] = weight |
117 |
#features[fId] = 1.0 ### |
|
50388 | 118 |
return features |
119 |
||
50220 | 120 |
def expand_accessibles(self,acc): |
121 |
accessibles = set(acc) |
|
122 |
unexpandedQueue = Queue() |
|
123 |
for a in acc: |
|
124 |
if self.expandedAccessibles.has_key(a): |
|
125 |
accessibles = accessibles.union(self.expandedAccessibles[a]) |
|
126 |
else: |
|
127 |
unexpandedQueue.put(a) |
|
128 |
while not unexpandedQueue.empty(): |
|
129 |
nextUnExp = unexpandedQueue.get() |
|
50388 | 130 |
nextUnExpAcc = self.accessibleDict[nextUnExp] |
50220 | 131 |
for a in nextUnExpAcc: |
132 |
if not a in accessibles: |
|
133 |
accessibles = accessibles.union([a]) |
|
134 |
if self.expandedAccessibles.has_key(a): |
|
135 |
accessibles = accessibles.union(self.expandedAccessibles[a]) |
|
136 |
else: |
|
50388 | 137 |
unexpandedQueue.put(a) |
50220 | 138 |
return list(accessibles) |
50388 | 139 |
|
54056 | 140 |
def parse_unExpAcc(self,line): |
141 |
try: |
|
142 |
unExpAcc = [self.nameIdDict[a.strip()] for a in line.split()] |
|
143 |
except: |
|
144 |
raise LookupError('Cannot find the accessibles:%s. Accessibles need to be introduced before referring to them.' % line) |
|
145 |
return unExpAcc |
|
146 |
||
50220 | 147 |
def parse_fact(self,line): |
148 |
""" |
|
149 |
Parses a single line, extracting accessibles, features, and dependencies. |
|
150 |
""" |
|
151 |
assert line.startswith('! ') |
|
152 |
line = line[2:] |
|
50388 | 153 |
|
50220 | 154 |
# line = name:accessibles;features;dependencies |
155 |
line = line.split(':') |
|
156 |
name = line[0].strip() |
|
157 |
nameId = self.get_name_id(name) |
|
50388 | 158 |
line = line[1].split(';') |
50840 | 159 |
features = self.get_features(line) |
160 |
self.featureDict[nameId] = features |
|
54432
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
161 |
try: |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
162 |
self.dependenciesDict[nameId] = [self.nameIdDict[d.strip()] for d in line[2].split()] |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
163 |
except: |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
164 |
unknownDeps = [] |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
165 |
for d in line[2].split(): |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
166 |
if not self.nameIdDict.has_key(d): |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
167 |
unknownDeps.append(d) |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
168 |
raise LookupError('Unknown fact used as dependency: %s. Facts need to be introduced before being used as depedency.' % ','.join(unknownDeps)) |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
169 |
self.accessibleDict[nameId] = self.parse_unExpAcc(line[0]) |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
170 |
|
50220 | 171 |
self.changed = True |
172 |
return nameId |
|
50388 | 173 |
|
50220 | 174 |
def parse_overwrite(self,line): |
175 |
""" |
|
176 |
Parses a single line, extracts the problemId and the Ids of the dependencies. |
|
177 |
""" |
|
178 |
assert line.startswith('p ') |
|
179 |
line = line[2:] |
|
50388 | 180 |
|
50220 | 181 |
# line = name:dependencies |
182 |
line = line.split(':') |
|
183 |
name = line[0].strip() |
|
54432
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
184 |
try: |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
185 |
nameId = self.nameIdDict[name] |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
186 |
except: |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
187 |
raise LookupError('Trying to overwrite dependencies for unknown fact: %s. Facts need to be introduced before overwriting them.' % name) |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
188 |
try: |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
189 |
dependencies = [self.nameIdDict[d.strip()] for d in line[1].split()] |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
190 |
except: |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
191 |
unknownDeps = [] |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
192 |
for d in line[1].split(): |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
193 |
if not self.nameIdDict.has_key(d): |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
194 |
unknownDeps.append(d) |
68f8bd1641da
have MaSh support nameless facts (i.e. proofs) and use that support
blanchet
parents:
54056
diff
changeset
|
195 |
raise LookupError('Unknown fact used as dependency: %s. Facts need to be introduced before being used as depedency.' % ','.join(unknownDeps)) |
50220 | 196 |
self.changed = True |
197 |
return nameId,dependencies |
|
50388 | 198 |
|
50220 | 199 |
def parse_problem(self,line): |
200 |
""" |
|
50827 | 201 |
Parses a problem and returns the features, the accessibles, and any hints. |
50220 | 202 |
""" |
203 |
assert line.startswith('? ') |
|
204 |
line = line[2:] |
|
205 |
name = None |
|
53100 | 206 |
numberOfPredictions = None |
207 |
||
54056 | 208 |
# How many predictions should be returned: |
53100 | 209 |
tmp = line.split('#') |
210 |
if len(tmp) == 2: |
|
211 |
numberOfPredictions = int(tmp[0].strip()) |
|
212 |
line = tmp[1] |
|
50388 | 213 |
|
50220 | 214 |
# Check whether there is a problem name: |
215 |
tmp = line.split(':') |
|
216 |
if len(tmp) == 2: |
|
217 |
name = tmp[0].strip() |
|
218 |
line = tmp[1] |
|
50388 | 219 |
|
50220 | 220 |
# line = accessibles;features |
221 |
line = line.split(';') |
|
54056 | 222 |
features = self.get_features(line) |
223 |
||
50220 | 224 |
# Accessible Ids, expand and store the accessibles. |
54056 | 225 |
#unExpAcc = [self.nameIdDict[a.strip()] for a in line[0].split()] |
226 |
unExpAcc = self.parse_unExpAcc(line[0]) |
|
50220 | 227 |
if len(self.expandedAccessibles.keys())>=100: |
228 |
self.expandedAccessibles = {} |
|
229 |
self.changed = True |
|
230 |
for accId in unExpAcc: |
|
231 |
if not self.expandedAccessibles.has_key(accId): |
|
232 |
accIdAcc = self.accessibleDict[accId] |
|
233 |
self.expandedAccessibles[accId] = self.expand_accessibles(accIdAcc) |
|
234 |
self.changed = True |
|
235 |
accessibles = self.expand_accessibles(unExpAcc) |
|
54056 | 236 |
|
50827 | 237 |
# Get hints: |
238 |
if len(line) == 3: |
|
239 |
hints = [self.nameIdDict[d.strip()] for d in line[2].split()] |
|
240 |
else: |
|
241 |
hints = [] |
|
50619
b958a94cf811
new version of MaSh, with theory-level reasoning
blanchet
parents:
50388
diff
changeset
|
242 |
|
53100 | 243 |
return name,features,accessibles,hints,numberOfPredictions |
50388 | 244 |
|
50220 | 245 |
def save(self,fileName): |
246 |
if self.changed: |
|
247 |
dictsStream = open(fileName, 'wb') |
|
248 |
dump((self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
|
53555 | 249 |
self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict),dictsStream) |
50220 | 250 |
self.changed = False |
251 |
dictsStream.close() |
|
252 |
def load(self,fileName): |
|
50388 | 253 |
dictsStream = open(fileName, 'rb') |
50220 | 254 |
self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
53555 | 255 |
self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict = load(dictsStream) |
50220 | 256 |
self.changed = False |
257 |
dictsStream.close() |