author | blanchet |
Fri, 11 Jan 2013 16:30:56 +0100 | |
changeset 50827 | aba769dc82e9 |
parent 50619 | b958a94cf811 |
child 50840 | a5cc092156da |
permissions | -rw-r--r-- |
50222 | 1 |
# Title: HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py |
2 |
# Author: Daniel Kuehlwein, ICIS, Radboud University Nijmegen |
|
3 |
# Copyright 2012 |
|
4 |
# |
|
5 |
# Persistent dictionaries: accessibility, dependencies, and features. |
|
6 |
||
50220 | 7 |
''' |
8 |
Created on Jul 12, 2012 |
|
9 |
||
10 |
@author: daniel |
|
11 |
''' |
|
12 |
||
13 |
from os.path import join |
|
14 |
from Queue import Queue |
|
15 |
from readData import create_accessible_dict,create_dependencies_dict,create_feature_dict |
|
16 |
from cPickle import load,dump |
|
17 |
||
18 |
class Dictionaries(object): |
|
19 |
''' |
|
20 |
This class contains all info about name-> id mapping, etc. |
|
21 |
''' |
|
22 |
def __init__(self): |
|
23 |
''' |
|
24 |
Constructor |
|
25 |
''' |
|
26 |
self.nameIdDict = {} |
|
27 |
self.idNameDict = {} |
|
28 |
self.featureIdDict={} |
|
29 |
self.maxNameId = 0 |
|
30 |
self.maxFeatureId = 0 |
|
31 |
self.featureDict = {} |
|
32 |
self.dependenciesDict = {} |
|
33 |
self.accessibleDict = {} |
|
50827 | 34 |
self.featureCountDict = {} |
50220 | 35 |
self.expandedAccessibles = {} |
36 |
self.changed = True |
|
50388 | 37 |
|
50220 | 38 |
""" |
50619
b958a94cf811
new version of MaSh, with theory-level reasoning
blanchet
parents:
50388
diff
changeset
|
39 |
Init functions. Side Effect: nameIdDict, idNameDict, featureIdDict, articleDict get filled! |
50388 | 40 |
""" |
50220 | 41 |
def init_featureDict(self,featureFile): |
50827 | 42 |
self.featureDict,self.maxNameId,self.maxFeatureId, self.featureCountDict = create_feature_dict(self.nameIdDict,self.idNameDict,self.maxNameId,self.featureIdDict,\ |
43 |
self.maxFeatureId,self.featureCountDict,featureFile) |
|
50220 | 44 |
def init_dependenciesDict(self,depFile): |
45 |
self.dependenciesDict = create_dependencies_dict(self.nameIdDict,depFile) |
|
46 |
def init_accessibleDict(self,accFile): |
|
47 |
self.accessibleDict,self.maxNameId = create_accessible_dict(self.nameIdDict,self.idNameDict,self.maxNameId,accFile) |
|
50388 | 48 |
|
50220 | 49 |
def init_all(self,inputFolder,featureFileName = 'mash_features',depFileName='mash_dependencies',accFileName = 'mash_accessibility'): |
50 |
featureFile = join(inputFolder,featureFileName) |
|
51 |
depFile = join(inputFolder,depFileName) |
|
52 |
accFile = join(inputFolder,accFileName) |
|
53 |
self.init_featureDict(featureFile) |
|
54 |
self.init_accessibleDict(accFile) |
|
55 |
self.init_dependenciesDict(depFile) |
|
56 |
self.expandedAccessibles = {} |
|
57 |
self.changed = True |
|
50388 | 58 |
|
50220 | 59 |
def get_name_id(self,name): |
60 |
""" |
|
61 |
Return the Id for a name. |
|
62 |
If it doesn't exist yet, a new entry is created. |
|
63 |
""" |
|
64 |
if self.nameIdDict.has_key(name): |
|
65 |
nameId = self.nameIdDict[name] |
|
66 |
else: |
|
67 |
self.nameIdDict[name] = self.maxNameId |
|
68 |
self.idNameDict[self.maxNameId] = name |
|
69 |
nameId = self.maxNameId |
|
50388 | 70 |
self.maxNameId += 1 |
50220 | 71 |
self.changed = True |
72 |
return nameId |
|
73 |
||
74 |
def add_feature(self,featureName): |
|
75 |
if not self.featureIdDict.has_key(featureName): |
|
76 |
self.featureIdDict[featureName] = self.maxFeatureId |
|
50827 | 77 |
self.featureCountDict[self.maxFeatureId] = 0 |
50220 | 78 |
self.maxFeatureId += 1 |
50388 | 79 |
self.changed = True |
50827 | 80 |
fId = self.featureIdDict[featureName] |
81 |
self.featureCountDict[fId] += 1 |
|
82 |
return fId |
|
50388 | 83 |
|
84 |
def get_features(self,line): |
|
85 |
# Feature Ids |
|
86 |
featureNames = [f.strip() for f in line[1].split()] |
|
87 |
features = [] |
|
88 |
for fn in featureNames: |
|
89 |
tmp = fn.split('=') |
|
50827 | 90 |
weight = 1.0 |
50388 | 91 |
if len(tmp) == 2: |
50827 | 92 |
fn = tmp[0] |
93 |
weight = float(tmp[1]) |
|
94 |
fId = self.add_feature(tmp[0]) |
|
95 |
features.append((fId,weight)) |
|
50388 | 96 |
return features |
97 |
||
50220 | 98 |
def expand_accessibles(self,acc): |
99 |
accessibles = set(acc) |
|
100 |
unexpandedQueue = Queue() |
|
101 |
for a in acc: |
|
102 |
if self.expandedAccessibles.has_key(a): |
|
103 |
accessibles = accessibles.union(self.expandedAccessibles[a]) |
|
104 |
else: |
|
105 |
unexpandedQueue.put(a) |
|
106 |
while not unexpandedQueue.empty(): |
|
107 |
nextUnExp = unexpandedQueue.get() |
|
50388 | 108 |
nextUnExpAcc = self.accessibleDict[nextUnExp] |
50220 | 109 |
for a in nextUnExpAcc: |
110 |
if not a in accessibles: |
|
111 |
accessibles = accessibles.union([a]) |
|
112 |
if self.expandedAccessibles.has_key(a): |
|
113 |
accessibles = accessibles.union(self.expandedAccessibles[a]) |
|
114 |
else: |
|
50388 | 115 |
unexpandedQueue.put(a) |
50220 | 116 |
return list(accessibles) |
50388 | 117 |
|
50220 | 118 |
def parse_fact(self,line): |
119 |
""" |
|
120 |
Parses a single line, extracting accessibles, features, and dependencies. |
|
121 |
""" |
|
122 |
assert line.startswith('! ') |
|
123 |
line = line[2:] |
|
50388 | 124 |
|
50220 | 125 |
# line = name:accessibles;features;dependencies |
126 |
line = line.split(':') |
|
127 |
name = line[0].strip() |
|
128 |
nameId = self.get_name_id(name) |
|
50388 | 129 |
|
130 |
line = line[1].split(';') |
|
50220 | 131 |
# Accessible Ids |
132 |
unExpAcc = [self.nameIdDict[a.strip()] for a in line[0].split()] |
|
133 |
self.accessibleDict[nameId] = unExpAcc |
|
50388 | 134 |
self.featureDict[nameId] = self.get_features(line) |
50220 | 135 |
self.dependenciesDict[nameId] = [self.nameIdDict[d.strip()] for d in line[2].split()] |
136 |
self.changed = True |
|
137 |
return nameId |
|
50388 | 138 |
|
50220 | 139 |
def parse_overwrite(self,line): |
140 |
""" |
|
141 |
Parses a single line, extracts the problemId and the Ids of the dependencies. |
|
142 |
""" |
|
143 |
assert line.startswith('p ') |
|
144 |
line = line[2:] |
|
50388 | 145 |
|
50220 | 146 |
# line = name:dependencies |
147 |
line = line.split(':') |
|
148 |
name = line[0].strip() |
|
149 |
nameId = self.get_name_id(name) |
|
50388 | 150 |
|
50220 | 151 |
dependencies = [self.nameIdDict[d.strip()] for d in line[1].split()] |
152 |
self.changed = True |
|
153 |
return nameId,dependencies |
|
50388 | 154 |
|
50220 | 155 |
def parse_problem(self,line): |
156 |
""" |
|
50827 | 157 |
Parses a problem and returns the features, the accessibles, and any hints. |
50220 | 158 |
""" |
159 |
assert line.startswith('? ') |
|
160 |
line = line[2:] |
|
161 |
name = None |
|
50388 | 162 |
|
50220 | 163 |
# Check whether there is a problem name: |
164 |
tmp = line.split(':') |
|
165 |
if len(tmp) == 2: |
|
166 |
name = tmp[0].strip() |
|
167 |
line = tmp[1] |
|
50388 | 168 |
|
50220 | 169 |
# line = accessibles;features |
170 |
line = line.split(';') |
|
171 |
# Accessible Ids, expand and store the accessibles. |
|
172 |
unExpAcc = [self.nameIdDict[a.strip()] for a in line[0].split()] |
|
173 |
if len(self.expandedAccessibles.keys())>=100: |
|
174 |
self.expandedAccessibles = {} |
|
175 |
self.changed = True |
|
176 |
for accId in unExpAcc: |
|
177 |
if not self.expandedAccessibles.has_key(accId): |
|
178 |
accIdAcc = self.accessibleDict[accId] |
|
179 |
self.expandedAccessibles[accId] = self.expand_accessibles(accIdAcc) |
|
180 |
self.changed = True |
|
181 |
accessibles = self.expand_accessibles(unExpAcc) |
|
50388 | 182 |
features = self.get_features(line) |
50827 | 183 |
# Get hints: |
184 |
if len(line) == 3: |
|
185 |
hints = [self.nameIdDict[d.strip()] for d in line[2].split()] |
|
186 |
else: |
|
187 |
hints = [] |
|
50619
b958a94cf811
new version of MaSh, with theory-level reasoning
blanchet
parents:
50388
diff
changeset
|
188 |
|
50827 | 189 |
return name,features,accessibles,hints |
50388 | 190 |
|
50220 | 191 |
def save(self,fileName): |
192 |
if self.changed: |
|
193 |
dictsStream = open(fileName, 'wb') |
|
194 |
dump((self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
|
50827 | 195 |
self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,self.featureCountDict),dictsStream) |
50220 | 196 |
self.changed = False |
197 |
dictsStream.close() |
|
198 |
def load(self,fileName): |
|
50388 | 199 |
dictsStream = open(fileName, 'rb') |
50220 | 200 |
self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ |
50827 | 201 |
self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,self.featureCountDict = load(dictsStream) |
50220 | 202 |
self.changed = False |
203 |
dictsStream.close() |