Go 272 lines
// Package gates runs cross-cutting policy gates that compose multiple
// scans over a project tree. Today the only gate is check-attribution,
// which combines a tracked-file scan (delegated to
// internal/workflow.Detector — the same primitive comment-hygiene uses)
// with a commit-body scan applying a stricter, trailer-anchored pattern
// set. The package is consumed by the eeco gates CLI verb; it depends
// on git being on PATH for the commit-body scan and the tracked-files
// enumeration.
package gates
import (
"bytes"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
"github.com/ajhahnde/eeco/internal/workflow"
)
// Pattern fragments are assembled at runtime so this source stays
// self-clean for eeco's own comment-hygiene scan (Constraint 3 —
// mirrors the discipline in internal/workflow/attribution.go and
// internal/hooks/commitmsg.go).
var (
gateCoAuthored = "[Cc]o-" + "[Aa]uthored-" + "[Bb]y"
gateGenVerb = "[Gg]enerated"
gateRobotEmoji = "\\x{1F916}"
)
// strictTrailerPatterns is the commit-body pattern set — same shape as
// internal/hooks/commitmsg.go. Trailer-anchored Co-Authored-By rules so
// a docs commit subject like "remove the Co-Authored-By trailer" does
// not false-fire, plus the robot-emoji Generated-with signature.
var strictTrailerPatterns = []*regexp.Regexp{
regexp.MustCompile(`(?im)^` + gateCoAuthored + `:.*claude`),
regexp.MustCompile(`(?im)^` + gateCoAuthored + `:.*anthropic`),
regexp.MustCompile(`(?im)^` + gateCoAuthored + `:.*noreply@anthropic`),
regexp.MustCompile(gateRobotEmoji + `[^\n]{0,20}` + gateGenVerb),
}
// textExtensions is the default extension allowlist for the file scan
// — same set the existing scripts/check_comment_hygiene.sh in
// downstream consumers uses, extended with the Go-side extensions a
// Go project carries.
var textExtensions = map[string]bool{
".md": true,
".sh": true,
".go": true,
".zig": true,
".S": true,
".inc": true,
".zon": true,
".yml": true,
".yaml": true,
".txt": true,
".ld": true,
".json": true,
".toml": true,
}
// Options governs CheckAttribution scope. Zero value scans nothing —
// callers must set at least one of ScanFiles / ScanCommits.
type Options struct {
// Paths overrides the default tracked-files enumeration when set.
// Each entry is repo-relative.
Paths []string
// Range is the commit-body git range (e.g. "origin/main..HEAD"). An
// empty value selects the default: origin/main..HEAD when
// resolvable, otherwise HEAD~10..HEAD with a notice.
Range string
// ScanFiles enables the tracked-tree file scan.
ScanFiles bool
// ScanCommits enables the commit-body scan.
ScanCommits bool
// Excludes are additional repo-relative paths to skip during the
// file scan; the gate's own source is already excluded.
Excludes []string
}
// Finding is one policy hit. Path/Line/Excerpt set for file hits;
// Commit/Line/Excerpt set for commit-body hits.
type Finding struct {
Path string
Line int
Commit string
Excerpt string
}
// Result groups findings with non-fatal notices the caller should
// surface to stderr (for example the HEAD~10 range fallback).
type Result struct {
Findings []Finding
Notices []string
}
// CheckAttribution runs the configured scans against workdir (a git
// repository). Returns the combined Result and a non-nil error only on
// infrastructure failure (workdir is not a repo, git is unavailable).
// A clean result is Result{} with both slices nil; a finding-only
// outcome returns the populated Result and a nil error so callers can
// distinguish "ran and found things" from "could not run".
func CheckAttribution(workdir string, opts Options) (Result, error) {
var res Result
if opts.ScanFiles {
fs, err := scanFiles(workdir, opts)
if err != nil {
return res, err
}
res.Findings = append(res.Findings, fs...)
}
if opts.ScanCommits {
cs, notices, err := scanCommits(workdir, opts)
if err != nil {
return res, err
}
res.Findings = append(res.Findings, cs...)
res.Notices = append(res.Notices, notices...)
}
return res, nil
}
func scanFiles(workdir string, opts Options) ([]Finding, error) {
paths := opts.Paths
if len(paths) == 0 {
out, err := runGit(workdir, "ls-files")
if err != nil {
return nil, fmt.Errorf("git ls-files: %w", err)
}
for p := range strings.SplitSeq(strings.TrimRight(out, "\n"), "\n") {
p = strings.TrimSpace(p)
if p == "" {
continue
}
if !isTextExtension(p) {
continue
}
paths = append(paths, p)
}
}
excluded := make(map[string]bool, len(opts.Excludes)+1)
excluded["internal/gates/attribution.go"] = true
for _, e := range opts.Excludes {
excluded[filepath.ToSlash(e)] = true
}
det, err := workflow.NewDetector(nil)
if err != nil {
return nil, fmt.Errorf("build detector: %w", err)
}
var findings []Finding
for _, rel := range paths {
if excluded[rel] {
continue
}
full := filepath.Join(workdir, rel)
b, err := os.ReadFile(full)
if err != nil {
continue
}
// Cheap binary sniff so a JSON-like blob with a NUL skips.
if bytes.IndexByte(b[:min(len(b), 8000)], 0) != -1 {
continue
}
for _, hit := range det.Scan(rel, string(b)) {
excerpt := readLine(b, hit.Line)
findings = append(findings, Finding{
Path: rel,
Line: hit.Line,
Excerpt: excerpt,
})
}
}
return findings, nil
}
func scanCommits(workdir string, opts Options) ([]Finding, []string, error) {
var notices []string
rng := opts.Range
if rng == "" {
if _, err := runGit(workdir, "rev-parse", "--verify", "--quiet", "origin/main"); err == nil {
rng = "origin/main..HEAD"
} else {
rng = "HEAD~10..HEAD"
notices = append(notices, "origin/main not resolvable; commit-body scan range falls back to "+rng)
}
}
out, err := runGit(workdir, "rev-list", rng)
if err != nil {
// Empty range (e.g. HEAD has no ancestor for HEAD~10..HEAD in a
// shallow repo): treat as no commits, not an infrastructure
// failure. The notice already names the fallback range.
return nil, notices, nil
}
var findings []Finding
for sha := range strings.FieldsSeq(out) {
body, err := runGit(workdir, "log", "-1", "--format=%B", sha)
if err != nil {
continue
}
for _, p := range strictTrailerPatterns {
loc := p.FindStringIndex(body)
if loc == nil {
continue
}
line := strings.Count(body[:loc[0]], "\n") + 1
excerpt := strings.TrimRight(body[loc[0]:loc[1]], "\r\n")
findings = append(findings, Finding{
Commit: shortSHA(sha),
Line: line,
Excerpt: excerpt,
})
break // one hit per commit is enough — keep reports terse
}
}
return findings, notices, nil
}
func runGit(workdir string, args ...string) (string, error) {
cmd := exec.Command("git", args...)
cmd.Dir = workdir
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
return "", fmt.Errorf("git %s: %s", strings.Join(args, " "), strings.TrimSpace(stderr.String()))
}
return "", fmt.Errorf("git %s: %w", strings.Join(args, " "), err)
}
return stdout.String(), nil
}
func shortSHA(sha string) string {
if len(sha) >= 7 {
return sha[:7]
}
return sha
}
func isTextExtension(path string) bool {
ext := filepath.Ext(path)
return textExtensions[strings.ToLower(ext)]
}
// readLine returns the 1-indexed line of b, with trailing CR/LF
// stripped. An out-of-range line returns "".
func readLine(b []byte, n int) string {
if n <= 0 {
return ""
}
cur := 1
start := 0
for i, c := range b {
if c != '\n' {
continue
}
if cur == n {
return strings.TrimRight(string(b[start:i]), "\r")
}
cur++
start = i + 1
}
if cur == n {
return strings.TrimRight(string(b[start:]), "\r")
}
return ""
}