ajhahn.de
← eeco
Go 113 lines
package workflow

import (
	"errors"
	"fmt"
	"os"
	"path/filepath"
	"regexp"
	"sort"
	"strings"

	"github.com/ajhahnde/eeco/internal/gitx"
)

// engineSubdirs are the workspace's internal directories. A tracked
// file that references one of these as a path is leaking engine output
// into history; the bare workspace name in prose, source constants, or
// the .gitignore ignore line is not (and is explicitly allowed by
// Constraint 2).
var engineSubdirs = []string{"state", "memory", "engine", "workflows", "docs", "attic"}

// leakGuard blocks a commit that would carry an AI-attribution string,
// a Co-Authored-By trailer, or a workspace engine-path into tracked
// files. It inspects the git-tracked set plus the prospective commit
// message (.git/COMMIT_EDITMSG). It writes nothing and requires git; if
// git is unavailable it returns blocked (contract code 2) rather than
// passing silently.
type leakGuard struct{}

func (leakGuard) Name() string { return "leak-guard" }

func (leakGuard) Summary() string {
	return "block attribution / workspace-path leakage into tracked files (read-only)"
}

func (leakGuard) Run(env Env) (Result, error) {
	cfg := env.Config
	if !gitx.Available() {
		return Result{Code: CodeBlocked, Summary: "git not available — cannot inspect tracked tree"}, nil
	}
	det, err := NewDetector(cfg.AttributionPatterns)
	if err != nil {
		return Result{}, err
	}
	wsPathRE := regexp.MustCompile(
		regexp.QuoteMeta(cfg.WorkspaceName) + `/(?:` + reAlt(engineSubdirs) + `)/`)

	tracked, err := gitx.TrackedFiles(cfg.RepoRoot)
	if err != nil {
		if errors.Is(err, gitx.ErrUnavailable) {
			return Result{Code: CodeBlocked, Summary: "git not available — cannot inspect tracked tree"}, nil
		}
		return Result{}, fmt.Errorf("leak-guard: %w", err)
	}

	var findings []Finding
	for _, rel := range tracked {
		b, rerr := os.ReadFile(filepath.Join(cfg.RepoRoot, rel))
		if rerr != nil || !isText(b) {
			continue
		}
		content := string(b)
		findings = append(findings, det.Scan(rel, content)...)
		// The .gitignore workspace entry is the documented, intended
		// modification (Constraint 2); never treat it as a path leak.
		if rel != ".gitignore" {
			findings = append(findings, scanLines(rel, content, wsPathRE, "workspace path in tracked file")...)
		}
	}

	// The prospective commit message, when a commit is in progress.
	msgPath := filepath.Join(cfg.RepoRoot, ".git", "COMMIT_EDITMSG")
	if b, rerr := os.ReadFile(msgPath); rerr == nil {
		findings = append(findings, det.Scan("COMMIT_EDITMSG", string(b))...)
	}

	if len(findings) == 0 {
		return Result{Code: CodeClean, Summary: "no leak in tracked tree or commit message"}, nil
	}
	sort.Slice(findings, func(i, j int) bool {
		if findings[i].Path != findings[j].Path {
			return findings[i].Path < findings[j].Path
		}
		return findings[i].Line < findings[j].Line
	})
	return Result{
		Code:     CodeFinding,
		Summary:  fmt.Sprintf("%d leak(s) would enter tracked history", len(findings)),
		Findings: findings,
	}, nil
}

// scanLines reports each line of content matching re.
func scanLines(path, content string, re *regexp.Regexp, what string) []Finding {
	var out []Finding
	ln := 0
	for _, line := range splitLines(content) {
		ln++
		if re.MatchString(line) {
			out = append(out, Finding{Path: path, Line: ln, Msg: what})
		}
	}
	return out
}

func reAlt(xs []string) string {
	q := make([]string, len(xs))
	for i, x := range xs {
		q[i] = regexp.QuoteMeta(x)
	}
	return strings.Join(q, "|")
}