Go 359 lines
// Package ask answers a free-form question about a project with a
// deterministic, no-AI-spend, ranked set of pointers: the matching
// memory facts first (eeco's curated topic→file map) and then the
// best-matching code locations as path:line references.
//
// It is the engine behind `eeco ask`. Where `eeco go` (package brief)
// gives a one-shot project overview, `eeco ask` is the interactive
// counterpart: a fast, precise pointer into the codebase for any
// assistant, beyond the static brief.
//
// The package only reads — the resolved config, the memory store, and
// the repository's tracked files — and writes nothing. It calls no AI
// provider: relevance is a simple word-overlap score, the same
// tokenisation the memory store uses for fact selection. The output
// carries no timestamp and every list is in a stable sort order, so a
// given question over a given tree always produces the same answer.
package ask
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"github.com/ajhahnde/eeco/internal/config"
"github.com/ajhahnde/eeco/internal/gitx"
"github.com/ajhahnde/eeco/internal/memory"
)
// DefaultLimit is the number of code locations Search returns when the
// caller passes a non-positive limit.
const DefaultLimit = 10
// maxFileBytes caps the size of a file ask will scan. Larger files are
// skipped: they are almost always generated or vendored, and reading
// them would blow the time budget for a command meant to feel instant.
const maxFileBytes = 256 * 1024
// snippetCap bounds the length of a code-line snippet in the output so
// one long minified line cannot dominate the answer.
const snippetCap = 160
// tokenSplit matches the inverse of a word character (letters, digits,
// underscore). Tokenisation mirrors internal/memory/select.go: lowercase,
// split on non-word runs, dedupe. A private copy lives here rather than
// widening the memory package's surface — the two are tiny, independent,
// and ask layers its own scoring on top.
var tokenSplit = regexp.MustCompile(`[^\p{L}\p{N}_]+`)
// Result is the structured answer to one question: the matching memory
// facts and the matching code locations, each ranked. It is the data
// behind `eeco ask`, independent of how it is rendered — Render turns it
// into Markdown, RenderJSON into a JSON object, so the two always
// describe the same answer. Both slice fields are always non-nil so the
// JSON form renders an empty list rather than null.
type Result struct {
Question string `json:"question"`
Memory []MemoryHit `json:"memory"`
Code []CodeHit `json:"code"`
}
// MemoryHit is one memory fact whose name, description, or body shares a
// word with the question. Ref is the repo-relative file the fact points
// at, empty when the fact carries none. Score is the count of distinct
// question terms the fact matched.
type MemoryHit struct {
Name string `json:"name"`
Description string `json:"description"`
Ref string `json:"ref"`
Score int `json:"score"`
}
// CodeHit is one line in a tracked source file that shares a word with
// the question. Path is repo-relative and slash-separated, Line is
// 1-based, Text is the trimmed (and length-capped) line, and Score is
// the count of distinct question terms the line matched.
type CodeHit struct {
Path string `json:"path"`
Line int `json:"line"`
Text string `json:"text"`
Score int `json:"score"`
}
// Search answers question for cfg: it scores the memory store and the
// repository's tracked files by word overlap with the question and
// returns the ranked matches. limit caps the number of code locations
// returned (a non-positive limit means DefaultLimit); every matching
// memory fact is returned. It reads the memory store only when the
// workspace is initialised and degrades gracefully when it is not — the
// code search still runs, so `eeco ask` is useful in any git repo.
//
// A non-nil error means a real I/O fault while walking the tree or
// reading the store; an unmatched question is not an error (the Result
// simply carries empty lists).
func Search(cfg *config.Config, question string, limit int) (Result, error) {
if cfg == nil {
return Result{}, errors.New("ask.Search: nil config")
}
if limit <= 0 {
limit = DefaultLimit
}
res := Result{
Question: strings.TrimSpace(question),
Memory: []MemoryHit{},
Code: []CodeHit{},
}
terms := tokenize(question)
if len(terms) == 0 {
return res, nil
}
mem, err := searchMemory(cfg, terms)
if err != nil {
return Result{}, err
}
res.Memory = mem
code, err := searchCode(cfg, terms, limit)
if err != nil {
return Result{}, err
}
res.Code = code
return res, nil
}
// searchMemory scores each fact by the number of distinct question terms
// found across its name, description, and body. It reads the store
// read-only (it does not call memory.Select, which would bump last_used
// and re-save). Facts are sorted by score descending, then name
// ascending, for a stable order.
func searchMemory(cfg *config.Config, terms map[string]struct{}) ([]MemoryHit, error) {
if !config.IsInitialized(cfg) {
return []MemoryHit{}, nil
}
store, err := memory.Open(cfg)
if err != nil {
return nil, fmt.Errorf("ask: open memory: %w", err)
}
facts, err := store.LoadAll()
if err != nil {
return nil, fmt.Errorf("ask: load memory: %w", err)
}
hits := []MemoryHit{}
for _, f := range facts {
if f.Disabled {
continue
}
score := overlapCount(terms, tokenize(f.Name+" "+f.Description+" "+f.Body))
if score == 0 {
continue
}
hits = append(hits, MemoryHit{
Name: f.Name,
Description: f.Description,
Ref: f.Ref,
Score: score,
})
}
sort.SliceStable(hits, func(i, j int) bool {
if hits[i].Score != hits[j].Score {
return hits[i].Score > hits[j].Score
}
return hits[i].Name < hits[j].Name
})
return hits, nil
}
// searchCode scans every tracked text file and scores each line by the
// number of distinct question terms it contains. The top limit lines are
// returned, ranked by score descending, then path ascending, then line
// ascending — a fully deterministic order.
func searchCode(cfg *config.Config, terms map[string]struct{}, limit int) ([]CodeHit, error) {
files, err := collectFiles(cfg)
if err != nil {
return nil, err
}
hits := []CodeHit{}
for _, rel := range files {
data, err := os.ReadFile(filepath.Join(cfg.RepoRoot, filepath.FromSlash(rel)))
if err != nil {
// A file listed by git but unreadable now (a race, a broken
// symlink) is skipped, not fatal: the answer degrades rather
// than aborting.
continue
}
if len(data) > maxFileBytes || bytes.IndexByte(data, 0) >= 0 {
continue // oversized or binary
}
for i, raw := range strings.Split(string(data), "\n") {
score := overlapCount(terms, tokenize(raw))
if score == 0 {
continue
}
hits = append(hits, CodeHit{
Path: rel,
Line: i + 1,
Text: snippet(raw),
Score: score,
})
}
}
sort.SliceStable(hits, func(i, j int) bool {
if hits[i].Score != hits[j].Score {
return hits[i].Score > hits[j].Score
}
if hits[i].Path != hits[j].Path {
return hits[i].Path < hits[j].Path
}
return hits[i].Line < hits[j].Line
})
if len(hits) > limit {
hits = hits[:limit]
}
return hits, nil
}
// collectFiles lists the repository's text-file candidates, repo-relative
// and slash-separated. It prefers git's tracked set so build artifacts,
// the eeco workspace, and other untracked clutter stay out of the search;
// it falls back to a recursive directory walk when git is unavailable or
// the repo has no tracked files (the same two-branch strategy the brief
// uses for the top-level listing). Either path skips the .git directory
// and eeco's own workspace.
func collectFiles(cfg *config.Config) ([]string, error) {
if tracked, err := gitx.TrackedFiles(cfg.RepoRoot); err == nil && len(tracked) > 0 {
out := make([]string, 0, len(tracked))
for _, p := range tracked {
seg, _, _ := strings.Cut(p, "/")
if seg == cfg.WorkspaceName {
continue
}
out = append(out, p)
}
sort.Strings(out)
return out, nil
}
var out []string
err := filepath.WalkDir(cfg.RepoRoot, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() {
if path == cfg.RepoRoot {
return nil
}
if name := d.Name(); name == ".git" || name == cfg.WorkspaceName {
return filepath.SkipDir
}
return nil
}
rel, err := filepath.Rel(cfg.RepoRoot, path)
if err != nil {
return err
}
out = append(out, filepath.ToSlash(rel))
return nil
})
if err != nil {
return nil, fmt.Errorf("ask: walk repo: %w", err)
}
sort.Strings(out)
return out, nil
}
// Render serialises a Result to the Markdown answer. When the answer is
// empty it renders a single guidance line instead of empty sections.
func Render(r Result) string {
var b strings.Builder
fmt.Fprintf(&b, "# eeco ask: %q\n\n", r.Question)
if len(r.Memory) == 0 && len(r.Code) == 0 {
b.WriteString("No matches — try different terms, or run `eeco go` for the project brief.\n")
return b.String()
}
b.WriteString("## Memory\n\n")
if len(r.Memory) == 0 {
b.WriteString("No matching facts.\n")
} else {
for _, m := range r.Memory {
if m.Ref != "" {
fmt.Fprintf(&b, "- %s → `%s`\n", m.Description, m.Ref)
} else {
fmt.Fprintf(&b, "- %s — %s\n", m.Name, m.Description)
}
}
}
b.WriteString("\n## Code\n\n")
if len(r.Code) == 0 {
b.WriteString("No matching code.\n")
} else {
for _, c := range r.Code {
fmt.Fprintf(&b, "- `%s:%d` %s\n", c.Path, c.Line, c.Text)
}
}
return b.String()
}
// RenderJSON serialises a Result to an indented JSON object — the
// machine-readable counterpart to Render. The three top-level keys
// (question, memory, code) are frozen; the arrays are always present,
// never null.
func RenderJSON(r Result) (string, error) {
out, err := json.MarshalIndent(r, "", " ")
if err != nil {
return "", fmt.Errorf("ask: marshal json: %w", err)
}
return string(out) + "\n", nil
}
// snippet trims a code line and caps its length so one very long line
// cannot dominate the answer.
func snippet(line string) string {
s := strings.TrimSpace(line)
if len(s) > snippetCap {
s = s[:snippetCap] + "…"
}
return s
}
// tokenize lowercases s, splits it on non-word runs, drops single
// characters (which carry little signal and inflate code-search noise),
// and returns the distinct tokens as a set.
func tokenize(s string) map[string]struct{} {
out := map[string]struct{}{}
for _, t := range tokenSplit.Split(strings.ToLower(s), -1) {
if len(t) <= 1 {
continue
}
out[t] = struct{}{}
}
return out
}
// overlapCount returns the number of distinct terms present in both sets.
func overlapCount(terms, hay map[string]struct{}) int {
short, long := terms, hay
if len(hay) < len(terms) {
short, long = hay, terms
}
n := 0
for k := range short {
if _, ok := long[k]; ok {
n++
}
}
return n
}