Go 113 lines
package workflow
import (
"errors"
"fmt"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"github.com/ajhahnde/eeco/internal/gitx"
)
// engineSubdirs are the workspace's internal directories. A tracked
// file that references one of these as a path is leaking engine output
// into history; the bare workspace name in prose, source constants, or
// the .gitignore ignore line is not (and is explicitly allowed by
// Constraint 2).
var engineSubdirs = []string{"state", "memory", "engine", "workflows", "docs", "attic"}
// leakGuard blocks a commit that would carry an AI-attribution string,
// a Co-Authored-By trailer, or a workspace engine-path into tracked
// files. It inspects the git-tracked set plus the prospective commit
// message (.git/COMMIT_EDITMSG). It writes nothing and requires git; if
// git is unavailable it returns blocked (contract code 2) rather than
// passing silently.
type leakGuard struct{}
func (leakGuard) Name() string { return "leak-guard" }
func (leakGuard) Summary() string {
return "block attribution / workspace-path leakage into tracked files (read-only)"
}
func (leakGuard) Run(env Env) (Result, error) {
cfg := env.Config
if !gitx.Available() {
return Result{Code: CodeBlocked, Summary: "git not available — cannot inspect tracked tree"}, nil
}
det, err := NewDetector(cfg.AttributionPatterns)
if err != nil {
return Result{}, err
}
wsPathRE := regexp.MustCompile(
regexp.QuoteMeta(cfg.WorkspaceName) + `/(?:` + reAlt(engineSubdirs) + `)/`)
tracked, err := gitx.TrackedFiles(cfg.RepoRoot)
if err != nil {
if errors.Is(err, gitx.ErrUnavailable) {
return Result{Code: CodeBlocked, Summary: "git not available — cannot inspect tracked tree"}, nil
}
return Result{}, fmt.Errorf("leak-guard: %w", err)
}
var findings []Finding
for _, rel := range tracked {
b, rerr := os.ReadFile(filepath.Join(cfg.RepoRoot, rel))
if rerr != nil || !isText(b) {
continue
}
content := string(b)
findings = append(findings, det.Scan(rel, content)...)
// The .gitignore workspace entry is the documented, intended
// modification (Constraint 2); never treat it as a path leak.
if rel != ".gitignore" {
findings = append(findings, scanLines(rel, content, wsPathRE, "workspace path in tracked file")...)
}
}
// The prospective commit message, when a commit is in progress.
msgPath := filepath.Join(cfg.RepoRoot, ".git", "COMMIT_EDITMSG")
if b, rerr := os.ReadFile(msgPath); rerr == nil {
findings = append(findings, det.Scan("COMMIT_EDITMSG", string(b))...)
}
if len(findings) == 0 {
return Result{Code: CodeClean, Summary: "no leak in tracked tree or commit message"}, nil
}
sort.Slice(findings, func(i, j int) bool {
if findings[i].Path != findings[j].Path {
return findings[i].Path < findings[j].Path
}
return findings[i].Line < findings[j].Line
})
return Result{
Code: CodeFinding,
Summary: fmt.Sprintf("%d leak(s) would enter tracked history", len(findings)),
Findings: findings,
}, nil
}
// scanLines reports each line of content matching re.
func scanLines(path, content string, re *regexp.Regexp, what string) []Finding {
var out []Finding
ln := 0
for _, line := range splitLines(content) {
ln++
if re.MatchString(line) {
out = append(out, Finding{Path: path, Line: ln, Msg: what})
}
}
return out
}
func reAlt(xs []string) string {
q := make([]string, len(xs))
for i, x := range xs {
q[i] = regexp.QuoteMeta(x)
}
return strings.Join(q, "|")
}