Go 73 lines
package memory
import (
"regexp"
"sort"
"strings"
)
// tokenSplit matches the inverse of \w (letters/digits/underscore).
// Tokenisation is intentionally simple: lowercase, split on non-word
// characters, dedupe. No stopword list — the store is small enough that
// recall matters more than precision at this stage.
var tokenSplit = regexp.MustCompile(`[^\p{L}\p{N}_]+`)
// Select returns the facts whose name or description shares a word with
// task, sorted by name. Each selected fact's last_used is bumped to the
// store clock and re-saved. Pinned facts participate in selection so
// that explicit knowledge still surfaces.
func (s *Store) Select(task string) ([]*Fact, error) {
facts, err := s.LoadAll()
if err != nil {
return nil, err
}
terms := tokenize(task)
if len(terms) == 0 {
return nil, nil
}
var out []*Fact
for _, f := range facts {
if f.Disabled {
continue
}
if overlap(terms, tokenize(f.Name+" "+f.Description)) {
out = append(out, f)
}
}
sort.Slice(out, func(i, j int) bool { return out[i].Name < out[j].Name })
now := s.Now().UTC()
today := now.Truncate(24 * 60 * 60 * 1e9) // truncate to day; equivalent to date-only
for _, f := range out {
f.LastUsed = today
if err := s.Save(f); err != nil {
return out, err
}
}
return out, nil
}
func tokenize(s string) map[string]struct{} {
out := map[string]struct{}{}
for _, t := range tokenSplit.Split(strings.ToLower(s), -1) {
if t == "" {
continue
}
out[t] = struct{}{}
}
return out
}
func overlap(a, b map[string]struct{}) bool {
short, long := a, b
if len(b) < len(a) {
short, long = b, a
}
for k := range short {
if _, ok := long[k]; ok {
return true
}
}
return false
}