502 ret [] (Integer.max 0 (num_visible_facts - max_suggs)) |
502 ret [] (Integer.max 0 (num_visible_facts - max_suggs)) |
503 end |
503 end |
504 |
504 |
505 val nb_def_prior_weight = 21 (* FUDGE *) |
505 val nb_def_prior_weight = 21 (* FUDGE *) |
506 |
506 |
|
507 fun naive_bayes_learn_fact tfreq sfreq dffreq th feats deps = |
|
508 let |
|
509 fun add_th weight t = |
|
510 let |
|
511 val im = Array.sub (sfreq, t) |
|
512 fun fold_fn s sf = Inttab.map_default (s, 0) (Integer.add weight) sf |
|
513 in |
|
514 Array.update (tfreq, t, weight + Array.sub (tfreq, t)); |
|
515 Array.update (sfreq, t, fold fold_fn feats im) |
|
516 end |
|
517 |
|
518 fun add_sym s = Array.update (dffreq, s, 1 + Array.sub (dffreq, s)) |
|
519 in |
|
520 add_th nb_def_prior_weight th; |
|
521 List.app (add_th 1) deps; |
|
522 List.app add_sym feats |
|
523 end |
|
524 |
507 fun naive_bayes_learn num_facts get_deps get_feats num_feats = |
525 fun naive_bayes_learn num_facts get_deps get_feats num_feats = |
508 let |
526 let |
509 val tfreq = Array.array (num_facts, 0) |
527 val tfreq = Array.array (num_facts, 0) |
510 val sfreq = Array.array (num_facts, Inttab.empty) |
528 val sfreq = Array.array (num_facts, Inttab.empty) |
511 val dffreq = Array.array (num_feats, 0) |
529 val dffreq = Array.array (num_feats, 0) |
512 |
530 |
513 fun learn th feats deps = |
|
514 let |
|
515 fun add_th weight t = |
|
516 let |
|
517 val im = Array.sub (sfreq, t) |
|
518 fun fold_fn s sf = Inttab.map_default (s, 0) (Integer.add weight) sf |
|
519 in |
|
520 Array.update (tfreq, t, weight + Array.sub (tfreq, t)); |
|
521 Array.update (sfreq, t, fold fold_fn feats im) |
|
522 end |
|
523 |
|
524 fun add_sym s = Array.update (dffreq, s, 1 + Array.sub (dffreq, s)) |
|
525 in |
|
526 add_th nb_def_prior_weight th; |
|
527 List.app (add_th 1) deps; |
|
528 List.app add_sym feats |
|
529 end |
|
530 |
|
531 fun for i = |
531 fun for i = |
532 if i = num_facts then () else (learn i (get_feats i) (get_deps i); for (i + 1)) |
532 if i = num_facts then () |
|
533 else (naive_bayes_learn_fact tfreq sfreq dffreq i (get_feats i) (get_deps i); for (i + 1)) |
533 |
534 |
534 val ln_afreq = Math.ln (Real.fromInt num_facts) |
535 val ln_afreq = Math.ln (Real.fromInt num_facts) |
535 in |
536 in |
536 for 0; |
537 for 0; |
537 (Array.vector tfreq, Array.vector sfreq, |
538 (Array.vector tfreq, Array.vector sfreq, |