author | blanchet |
Thu, 12 Sep 2013 09:59:45 +0200 | |
changeset 53555 | 12251bc889f1 |
parent 53100 | 1133b9e83f09 |
child 54056 | 8298976acb54 |
permissions | -rw-r--r-- |
50222 | 1 |
# Title: HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py |
2 |
# Author: Daniel Kuehlwein, ICIS, Radboud University Nijmegen |
|
53555 | 3 |
# Copyright 2012-2013 |
50222 | 4 |
# |
5 |
# Persistent dictionaries: accessibility, dependencies, and features. |
|
6 |
||
53555 | 7 |
import logging,sys |
50220 | 8 |
from os.path import join |
9 |
from Queue import Queue |
|
53555 | 10 |
from readData import create_accessible_dict,create_dependencies_dict |
50220 | 11 |
from cPickle import load,dump |
12 |
||
13 |
class Dictionaries(object): |
|
14 |
''' |
|
15 |
This class contains all info about name-> id mapping, etc. |
|
16 |
''' |
|
17 |
def __init__(self): |
|
18 |
''' |
|
19 |
Constructor |
|
20 |
''' |
|
21 |
self.nameIdDict = {} |
|
22 |
self.idNameDict = {} |
|
23 |
self.featureIdDict={} |
|
24 |
self.maxNameId = 0 |
|
25 |
self.maxFeatureId = 0 |
|
26 |
self.featureDict = {} |
|
27 |
self.dependenciesDict = {} |
|
28 |
self.accessibleDict = {} |
|
50840 | 29 |
self.expandedAccessibles = {} |
53555 | 30 |
self.accFile = '' |
50220 | 31 |
self.changed = True |
50388 | 32 |
|
50220 | 33 |
""" |
50951 | 34 |
Init functions. nameIdDict, idNameDict, featureIdDict, articleDict get filled! |
50388 | 35 |
""" |
53555 | 36 |
def init_featureDict(self,featureFile): |
37 |
self.create_feature_dict(featureFile) |
|
38 |
#self.featureDict,self.maxNameId,self.maxFeatureId,self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict =\ |
|
39 |
# create_feature_dict(self.nameIdDict,self.idNameDict,self.maxNameId,self.featureIdDict,self.maxFeatureId,self.featureCountDict,\ |
|
40 |
# self.triggerFeaturesDict,self.featureTriggeredFormulasDict,sineFeatures,featureFile) |
|
50220 | 41 |
def init_dependenciesDict(self,depFile): |
42 |
self.dependenciesDict = create_dependencies_dict(self.nameIdDict,depFile) |
|
43 |
def init_accessibleDict(self,accFile): |
|
44 |
self.accessibleDict,self.maxNameId = create_accessible_dict(self.nameIdDict,self.idNameDict,self.maxNameId,accFile) |
|
50388 | 45 |
|
50951 | 46 |
def init_all(self,args): |
47 |
self.featureFileName = 'mash_features' |
|
48 |
self.accFileName = 'mash_accessibility' |
|
49 |
featureFile = join(args.inputDir,self.featureFileName) |
|
50 |
depFile = join(args.inputDir,args.depFile) |
|
53555 | 51 |
self.accFile = join(args.inputDir,self.accFileName) |
52 |
self.init_featureDict(featureFile) |
|
53 |
self.init_accessibleDict(self.accFile) |
|
50220 | 54 |
self.init_dependenciesDict(depFile) |
55 |
self.expandedAccessibles = {} |
|
56 |
self.changed = True |
|
50388 | 57 |
|
53555 | 58 |
def create_feature_dict(self,inputFile): |
59 |
logger = logging.getLogger('create_feature_dict') |
|
60 |
self.featureDict = {} |
|
61 |
IS = open(inputFile,'r') |
|
62 |
for line in IS: |
|
63 |
line = line.split(':') |
|
64 |
name = line[0] |
|
65 |
# Name Id |
|
66 |
if self.nameIdDict.has_key(name): |
|
67 |
logger.warning('%s appears twice in the feature file. Aborting.',name) |
|
68 |
sys.exit(-1) |
|
69 |
else: |
|
70 |
self.nameIdDict[name] = self.maxNameId |
|
71 |
self.idNameDict[self.maxNameId] = name |
|
72 |
nameId = self.maxNameId |
|
73 |
self.maxNameId += 1 |
|
74 |
features = self.get_features(line) |
|
75 |
# Store results |
|
76 |
self.featureDict[nameId] = features |
|
77 |
IS.close() |
|
78 |
return |
|
79 |
||
50220 | 80 |
def get_name_id(self,name): |
81 |
""" |
|
82 |
Return the Id for a name. |
|
83 |
If it doesn't exist yet, a new entry is created. |
|
84 |
""" |
|
85 |
if self.nameIdDict.has_key(name): |
|
86 |
nameId = self.nameIdDict[name] |
|
87 |
else: |
|
88 |
self.nameIdDict[name] = self.maxNameId |
|
89 |
self.idNameDict[self.maxNameId] = name |
|
90 |
nameId = self.maxNameId |
|
50388 | 91 |
self.maxNameId += 1 |
50220 | 92 |
self.changed = True |
93 |
return nameId |
|
94 |
||
95 |
def add_feature(self,featureName): |
|
96 |
if not self.featureIdDict.has_key(featureName): |
|
97 |
self.featureIdDict[featureName] = self.maxFeatureId |
|
98 |
self.maxFeatureId += 1 |
|
50388 | 99 |
self.changed = True |
50827 | 100 |
fId = self.featureIdDict[featureName] |
101 |
return fId |
|
50388 | 102 |
|
103 |
def get_features(self,line): |
|
104 |
featureNames = [f.strip() for f in line[1].split()] |
|
53555 | 105 |
features = {} |
50388 | 106 |
for fn in featureNames: |
107 |
tmp = fn.split('=') |
|
53555 | 108 |
weight = 1.0 |
50388 | 109 |
if len(tmp) == 2: |
50827 | 110 |
fn = tmp[0] |
111 |
weight = float(tmp[1]) |
|
112 |
fId = self.add_feature(tmp[0]) |
|
53555 | 113 |
features[fId] = weight |
114 |
#features[fId] = 1.0 ### |
|
50388 | 115 |
return features |
116 |
||
50220 | 117 |
def expand_accessibles(self,acc): |
118 |
accessibles = set(acc) |
|
119 |
unexpandedQueue = Queue() |
|
120 |
for a in acc: |
|
121 |
if self.expandedAccessibles.has_key(a): |
|
122 |
accessibles = accessibles.union(self.expandedAccessibles[a]) |
|
123 |
else: |
|
124 |
unexpandedQueue.put(a) |
|
125 |
while not unexpandedQueue.empty(): |
|
126 |
nextUnExp = unexpandedQueue.get() |
|
50388 | 127 |
nextUnExpAcc = self.accessibleDict[nextUnExp] |
50220 | 128 |
for a in nextUnExpAcc: |
129 |
if not a in accessibles: |
|
130 |
accessibles = accessibles.union([a]) |
|
131 |
if self.expandedAccessibles.has_key(a): |
|
132 |
accessibles = accessibles.union(self.expandedAccessibles[a]) |
|
133 |
else: |
|
50388 | 134 |
unexpandedQueue.put(a) |
50220 | 135 |
return list(accessibles) |
50388 | 136 |
|
50220 | 137 |
def parse_fact(self,line): |
138 |
""" |
|
139 |
Parses a single line, extracting accessibles, features, and dependencies. |
|
140 |
""" |
|
141 |
assert line.startswith('! ') |
|
142 |
line = line[2:] |
|
50388 | 143 |
|
50220 | 144 |
# line = name:accessibles;features;dependencies |
145 |
line = line.split(':') |
|
146 |
name = line[0].strip() |
|
147 |
nameId = self.get_name_id(name) |
|
50388 | 148 |
line = line[1].split(';') |
50220 | 149 |
# Accessible Ids |
150 |
unExpAcc = [self.nameIdDict[a.strip()] for a in line[0].split()] |
|
151 |
self.accessibleDict[nameId] = unExpAcc |
|
50840 | 152 |
features = self.get_features(line) |
153 |
self.featureDict[nameId] = features |
|
154 |
self.dependenciesDict[nameId] = [self.nameIdDict[d.strip()] for d in line[2].split()] |
|
50220 | 155 |
self.changed = True |
156 |
return nameId |
|
50388 | 157 |
|
50220 | 158 |
def parse_overwrite(self,line): |
159 |
""" |
|
160 |
Parses a single line, extracts the problemId and the Ids of the dependencies. |
|
161 |
""" |
|
162 |
assert line.startswith('p ') |
|
163 |
line = line[2:] |
|
50388 | 164 |
|
50220 | 165 |
# line = name:dependencies |
166 |
line = line.split(':') |
|
167 |
name = line[0].strip() |
|
168 |
nameId = self.get_name_id(name) |
|
50388 | 169 |
|
50220 | 170 |
dependencies = [self.nameIdDict[d.strip()] for d in line[1].split()] |
171 |
self.changed = True |
|
172 |
return nameId,dependencies |
|
50388 | 173 |
|
50220 | 174 |
def parse_problem(self,line): |
175 |
""" |
|
50827 | 176 |
Parses a problem and returns the features, the accessibles, and any hints. |
50220 | 177 |
""" |
178 |
assert line.startswith('? ') |
|
179 |
line = line[2:] |
|
180 |
name = None |
|
53100 | 181 |
numberOfPredictions = None |
182 |
||
183 |
# Check whether there is a problem name: |
|
184 |
tmp = line.split('#') |
|
185 |
if len(tmp) == 2: |
|
186 |
numberOfPredictions = int(tmp[0].strip()) |
|
187 |
line = tmp[1] |
|
50388 | 188 |
|
50220 | 189 |
# Check whether there is a problem name: |
190 |
tmp = line.split(':') |
|
191 |
if len(tmp) == 2: |
|
192 |
name = tmp[0].strip() |
|
193 |
line = tmp[1] |
|
50388 | 194 |
|
50220 | 195 |
# line = accessibles;features |
196 |
line = line.split(';') |
|
197 |
# Accessible Ids, expand and store the accessibles. |
|
198 |
unExpAcc = [self.nameIdDict[a.strip()] for a in line[0].split()] |
|
199 |
if len(self.expandedAccessibles.keys())>=100: |
|
200 |
self.expandedAccessibles = {} |
|
201 |
self.changed = True |
|
202 |
for accId in unExpAcc: |
|
203 |
if not self.expandedAccessibles.has_key(accId): |
|
204 |
accIdAcc = self.accessibleDict[accId] |
|
205 |
self.expandedAccessibles[accId] = self.expand_accessibles(accIdAcc) |
|
206 |
self.changed = True |
|
207 |
accessibles = self.expand_accessibles(unExpAcc) |
|
50388 | 208 |
features = self.get_features(line) |
50827 | 209 |
# Get hints: |
210 |
if len(line) == 3: |
|
211 |
hints = [self.nameIdDict[d.strip()] for d in line[2].split()] |
|
212 |
else: |
|
213 |
hints = [] |
|
50619
b958a94cf811
new version of MaSh, with theory-level reasoning
blanchet
parents:
50388
diff
changeset
|
214 |
|
53100 | 215 |
return name,features,accessibles,hints,numberOfPredictions |
50388 | 216 |
|
50220 | 217 |
def save(self,fileName): |
218 |
if self.changed: |
|
219 |
dictsStream = open(fileName, 'wb') |
|
220 |
dump((self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
|
53555 | 221 |
self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict),dictsStream) |
50220 | 222 |
self.changed = False |
223 |
dictsStream.close() |
|
224 |
def load(self,fileName): |
|
50388 | 225 |
dictsStream = open(fileName, 'rb') |
50220 | 226 |
self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
53555 | 227 |
self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict = load(dictsStream) |
50220 | 228 |
self.changed = False |
229 |
dictsStream.close() |