45 parser.add_argument('--inputDir',default='../data/20121212/Jinja/',\ |
45 parser.add_argument('--inputDir',default='../data/20121212/Jinja/',\ |
46 help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility') |
46 help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility') |
47 parser.add_argument('--depFile', default='mash_dependencies', |
47 parser.add_argument('--depFile', default='mash_dependencies', |
48 help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies') |
48 help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies') |
49 parser.add_argument('--saveModel',default=False,action='store_true',help="Stores the learned Model at the end of a prediction run. Default=False.") |
49 parser.add_argument('--saveModel',default=False,action='store_true',help="Stores the learned Model at the end of a prediction run. Default=False.") |
|
50 |
50 parser.add_argument('--learnTheories',default=False,action='store_true',help="Uses a two-lvl prediction mode. First the theories, then the premises. Default=False.") |
51 parser.add_argument('--learnTheories',default=False,action='store_true',help="Uses a two-lvl prediction mode. First the theories, then the premises. Default=False.") |
51 #DEBUG: Change sineprioir default to false |
52 # Theory Parameters |
52 parser.add_argument('--sinePrior',default=True,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.") |
53 parser.add_argument('--theoryDefValPos',default=-7.5,help="Default value for positive unknown features. Default=-7.5.",type=float) |
53 |
54 parser.add_argument('--theoryDefValNeg',default=-15.0,help="Default value for negative unknown features. Default=-15.0.",type=float) |
|
55 parser.add_argument('--theoryPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float) |
54 |
56 |
55 parser.add_argument('--nb',default=False,action='store_true',help="Use Naive Bayes for learning. This is the default learning method.") |
57 parser.add_argument('--nb',default=False,action='store_true',help="Use Naive Bayes for learning. This is the default learning method.") |
|
58 # NB Parameters |
|
59 parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float) |
|
60 parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float) |
|
61 parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float) |
|
62 parser.add_argument('--NBSinePrior',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.") |
|
63 parser.add_argument('--NBSineWeight',default=100.0,help="How much the SInE prior is weighted. Default=100.0.",type=float) |
|
64 |
56 parser.add_argument('--snow',default=False,action='store_true',help="Use SNoW's naive bayes instead of Naive Bayes for learning.") |
65 parser.add_argument('--snow',default=False,action='store_true',help="Use SNoW's naive bayes instead of Naive Bayes for learning.") |
57 parser.add_argument('--predef',default=False,action='store_true',\ |
66 parser.add_argument('--predef',default=False,action='store_true',\ |
58 help="Use predefined predictions. Used only for comparison with the actual learning. Expects mash_mepo_suggestions in inputDir.") |
67 help="Use predefined predictions. Used only for comparison with the actual learning. Expects mash_mepo_suggestions in inputDir.") |
59 parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\ |
68 parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\ |
60 WARNING: This will make the program a lot slower! Default=False.") |
69 WARNING: This will make the program a lot slower! Default=False.") |
87 |
96 |
88 logger.info('Using the following settings: %s',args) |
97 logger.info('Using the following settings: %s',args) |
89 # Pick algorithm |
98 # Pick algorithm |
90 if args.nb: |
99 if args.nb: |
91 logger.info('Using sparse Naive Bayes for learning.') |
100 logger.info('Using sparse Naive Bayes for learning.') |
92 model = sparseNBClassifier(args.sinePrior) |
101 model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal,args.NBSinePrior,args.NBSineWeight) |
93 modelFile = os.path.join(args.outputDir,'NB.pickle') |
102 modelFile = os.path.join(args.outputDir,'NB.pickle') |
94 elif args.snow: |
103 elif args.snow: |
95 logger.info('Using naive bayes (SNoW) for learning.') |
104 logger.info('Using naive bayes (SNoW) for learning.') |
96 model = SNoW() |
105 model = SNoW() |
97 modelFile = os.path.join(args.outputDir,'SNoW.pickle') |
106 modelFile = os.path.join(args.outputDir,'SNoW.pickle') |
101 predictionFile = os.path.join(args.inputDir,'mash_mepo_suggestions') |
110 predictionFile = os.path.join(args.inputDir,'mash_mepo_suggestions') |
102 model = Predefined(predictionFile) |
111 model = Predefined(predictionFile) |
103 modelFile = os.path.join(args.outputDir,'mepo.pickle') |
112 modelFile = os.path.join(args.outputDir,'mepo.pickle') |
104 else: |
113 else: |
105 logger.info('No algorithm specified. Using sparse Naive Bayes.') |
114 logger.info('No algorithm specified. Using sparse Naive Bayes.') |
106 model = sparseNBClassifier(args.sinePrior) |
115 model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal,args.NBSinePrior,args.NBSineWeight) |
107 modelFile = os.path.join(args.outputDir,'NB.pickle') |
116 modelFile = os.path.join(args.outputDir,'NB.pickle') |
108 dictsFile = os.path.join(args.outputDir,'dicts.pickle') |
117 dictsFile = os.path.join(args.outputDir,'dicts.pickle') |
109 theoryFile = os.path.join(args.outputDir,'theory.pickle') |
118 theoryFile = os.path.join(args.outputDir,'theory.pickle') |
110 |
119 |
111 # Initializing model |
120 # Initializing model |
121 trainData = dicts.featureDict.keys() |
130 trainData = dicts.featureDict.keys() |
122 model.initializeModel(trainData,dicts) |
131 model.initializeModel(trainData,dicts) |
123 |
132 |
124 if args.learnTheories: |
133 if args.learnTheories: |
125 depFile = os.path.join(args.inputDir,args.depFile) |
134 depFile = os.path.join(args.inputDir,args.depFile) |
126 theoryModels = TheoryModels() |
135 theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight) |
127 theoryModels.init(depFile,dicts) |
136 theoryModels.init(depFile,dicts) |
128 theoryModels.save(theoryFile) |
137 theoryModels.save(theoryFile) |
129 |
138 |
130 model.save(modelFile) |
139 model.save(modelFile) |
131 dicts.save(dictsFile) |
140 dicts.save(dictsFile) |