eeco/internal/workflow/gitwriteguard.go

Go 365 lines
package workflow

import (
	"os"
	"path/filepath"
	"regexp"
	"slices"
	"strings"
	"time"
)

// GitWriteGuardResult is the outcome of the git-write guard over a
// candidate Bash command. Decision is decisionDeny or decisionAllow. On a
// deny, Reason is the operator-facing explanation carried back as the
// PreToolUse permission-decision reason. On an allow, Consumed lists the
// one-shot sentinel kinds ("commit" / "tag") the caller must remove — the
// guard leaves consumption to the caller so a deny (an unauthorized op, or
// a gate finding on an authorized commit) never burns the authorization.
// A command with no commit / tag mutation is an allow with empty Consumed.
type GitWriteGuardResult struct {
	Decision string
	Reason   string
	Consumed []string
}

const (
	decisionAllow = "allow"
	decisionDeny  = "deny"
)

// sentinelTTL is how long an authorization sentinel stays valid after the
// operator sets it via `eeco authorize`. A stale sentinel is cleared and
// treated as unauthorized, so a forgotten authorization cannot linger.
const sentinelTTL = 15 * time.Minute

// gitGlobalValueOpts are the git global options that consume the following
// token as their value, used when walking past global options to the
// subcommand. It mirrors the set in isGitCommit (commitguard.go); a second
// copy here keeps ScanCommitGuard untouched while classifyGitWrite reuses
// the same walk for any subcommand.
var gitGlobalValueOpts = map[string]bool{
	"-C": true, "-c": true, "--git-dir": true, "--work-tree": true,
	"--namespace": true, "--exec-path": true, "--super-prefix": true,
	"--config-env": true,
}

// tagMutationFlags are the `git tag` flags that turn a tag op into a
// mutation (create / annotate / sign / delete / move). A bare `git tag`,
// `git tag -l`, or `git tag -n` is a read-only listing and passes.
var tagMutationFlags = map[string]bool{
	"-a": true, "-s": true, "-d": true, "-f": true,
	"--annotate": true, "--sign": true, "--delete": true,
	"--force": true, "--message": true, "--file": true,
	"--create-reflog": true,
}

// shellWrappers are the command prefixes that hide a git op inside a
// quoted argument the tokenizer cannot see into; their presence triggers
// the raw-string backstop (mirrors pre-commit-guard.sh:128-133).
var shellWrappers = []string{"bash -c", "sh -c", "zsh -c", " -lc ", "eval "}

var (
	reWrappedGitCommit = regexp.MustCompile(`(^|[^[:alnum:]_])git[[:space:]]+commit([[:space:]]|$)`)
	reWrappedGitTag    = regexp.MustCompile(`(^|[^[:alnum:]_])git[[:space:]]+tag([[:space:]]|$)`)
)

// ScanGitWriteGuard generalizes the attribution-only commit-guard into the
// full git-write guard the cockpit machinery installs as a PreToolUse hook.
// It blocks a pending `git commit` and a `git tag` MUTATION unless a
// one-shot authorization sentinel (set by `eeco authorize`, 15-min TTL)
// exists, and on an authorized commit folds in eeco's CI-parity gates
// (attribution + workspace-path leak) so an authorized write still cannot
// carry a leak into history. command is the PreToolUse Bash command, cwd
// the repo it targets, stateDir the sentinel directory (<workspace>/state),
// and workspaceName the engine dir name used to build the leak pattern.
//
// Posture (locked decision #2): the write-verb classifier fails CLOSED — a
// command that cannot be tokenized cleanly but whose raw text shows a
// commit / tag is denied. Everything downstream (the leak / attribution
// fold-in) degrades OPEN, so the git pre-commit hook and CI stay the hard
// gates and a session is never wedged. A deny is carried in Decision; the
// runner translates it to the JSON permission-decision body and always
// exits 0.
func ScanGitWriteGuard(det *Detector, command, cwd, stateDir, workspaceName string) GitWriteGuardResult {
	commit, tagMut := classifyCommand(command)
	if !commit && !tagMut {
		return GitWriteGuardResult{Decision: decisionAllow}
	}

	var consumed []string

	// (1a) git tag mutation: user-only, gated first. A combined
	// `git tag v1 && git commit` falls through to the commit gate after the
	// tag sentinel is queued for consumption.
	if tagMut {
		if !sentinelAuthorized(stateDir, "tag") {
			return GitWriteGuardResult{Decision: decisionDeny, Reason: tagDenyReason}
		}
		consumed = append(consumed, "tag")
	}

	// (1b) git commit: require authorization.
	if commit {
		if !sentinelAuthorized(stateDir, "commit") {
			return GitWriteGuardResult{Decision: decisionDeny, Reason: commitDenyReason}
		}
		// (2) authorized commit: fold in the CI-parity gates (degrade-open).
		// A finding denies but PRESERVES the sentinel (do not queue "commit"
		// for consumption) so a re-commit after the fix works.
		if problems := commitGateFindings(det, command, cwd, workspaceName); len(problems) > 0 {
			return GitWriteGuardResult{Decision: decisionDeny, Reason: gateDenyReason(problems)}
		}
		consumed = append(consumed, "commit")
	}

	return GitWriteGuardResult{Decision: decisionAllow, Consumed: consumed}
}

const (
	commitDenyReason = "eeco git-write-guard: git commit is user-driven — the user commits himself. " +
		"After explicit authorization, run `eeco authorize commit` to allow one commit " +
		"(15-min, one-shot), then re-run."
	tagDenyReason = "eeco git-write-guard: git tag mutation is user-only. " +
		"To allow one tag op, run `eeco authorize tag` (15-min, one-shot). " +
		"Read-only tag ops (git tag, git tag -l, …) are never blocked."
)

// gateDenyReason renders the deny message for an authorized commit that
// tripped the leak / attribution gates. The authorization sentinel is
// preserved, so the operator fixes the listed problems and re-commits.
func gateDenyReason(problems []string) string {
	return "eeco git-write-guard: commit blocked — " + strings.Join(problems, "; ") +
		". Fix these, then re-commit (authorization preserved)."
}

// classifyCommand reports whether the command invokes `git commit` and
// whether it invokes a `git tag` MUTATION, across every segment of a
// compound command. When the command cannot be tokenized cleanly it fails
// CLOSED, trusting a raw substring match (locked decision #2). A shell
// wrapper (bash -c / eval / …) triggers a raw backstop regardless, since
// the tokenizer cannot see a git op hidden inside the wrapper's quoted arg.
func classifyCommand(command string) (commit, tagMut bool) {
	if commandParseOK(command) {
		for _, words := range commandSegments(command) {
			verb, mut := classifyGitWrite(words)
			switch verb {
			case "commit":
				commit = true
			case "tag":
				if mut {
					tagMut = true
				}
			}
		}
	} else {
		// Fail CLOSED: an unbalanced-quote command we cannot tokenize is
		// denied if its raw text shows a commit / tag write.
		if strings.Contains(command, "git commit") {
			commit = true
		}
		if strings.Contains(command, "git tag") {
			tagMut = true
		}
	}
	if hasShellWrapper(command) {
		if reWrappedGitCommit.MatchString(command) {
			commit = true
		}
		if reWrappedGitTag.MatchString(command) {
			tagMut = true
		}
	}
	return commit, tagMut
}

// classifyGitWrite inspects one segment's word list and returns the git
// subcommand ("" when the segment is not a git invocation) and, for a
// `git tag`, whether it is a mutation. It reuses isEnvAssign / isGitProg
// and the global-option walk from isGitCommit, generalized to any
// subcommand.
func classifyGitWrite(words []string) (verb string, tagMutation bool) {
	i := 0
	for i < len(words) && isEnvAssign(words[i]) {
		i++
	}
	if i >= len(words) || !isGitProg(words[i]) {
		return "", false
	}
	i++ // past git
	for i < len(words) {
		w := words[i]
		if w == "--" {
			return "", false // end of options without a subcommand
		}
		if strings.HasPrefix(w, "-") {
			if gitGlobalValueOpts[w] {
				i += 2
			} else {
				i++
			}
			continue
		}
		if w != "tag" {
			return w, false
		}
		return "tag", tagIsMutation(words[i+1:])
	}
	return "", false
}

// tagIsMutation reports whether the args after `git tag` denote a mutation:
// a name argument (create) or any mutation flag (annotate / sign / delete /
// force / message / file). A bare listing (`git tag`, `-l`, `-n`) is not.
func tagIsMutation(rest []string) bool {
	for _, a := range rest {
		if !strings.HasPrefix(a, "-") {
			return true // a name arg ⇒ create
		}
		if tagMutationFlags[a] ||
			strings.HasPrefix(a, "-m") ||
			strings.HasPrefix(a, "--message") ||
			strings.HasPrefix(a, "--file") {
			return true
		}
	}
	return false
}

// hasShellWrapper reports whether the command contains a known shell
// wrapper that could hide a git op inside a quoted argument.
func hasShellWrapper(command string) bool {
	for _, w := range shellWrappers {
		if strings.Contains(command, w) {
			return true
		}
	}
	return false
}

// commandParseOK reports whether command tokenizes cleanly — every quote is
// closed. The guard fails CLOSED when this is false (locked decision #2). It
// mirrors lex's single-quote, double-quote, and backslash handling so its
// verdict matches the tokenizer the classifier relies on.
func commandParseOK(command string) bool {
	i, n := 0, len(command)
	for i < n {
		switch command[i] {
		case '\'':
			i++
			for i < n && command[i] != '\'' {
				i++
			}
			if i >= n {
				return false // unterminated single quote
			}
			i++
		case '"':
			i++
			for i < n && command[i] != '"' {
				if command[i] == '\\' && i+1 < n {
					i += 2
					continue
				}
				i++
			}
			if i >= n {
				return false // unterminated double quote
			}
			i++
		case '\\':
			if i+1 < n {
				i += 2
			} else {
				i++
			}
		default:
			i++
		}
	}
	return true
}

// sentinelAuthorized reports whether a one-shot authorization sentinel for
// kind ("commit"/"tag") exists and is within its TTL. A stale sentinel is
// removed and reported unauthorized, so a forgotten authorization never
// lingers (it is also cleared at session start in C4b).
func sentinelAuthorized(stateDir, kind string) bool {
	path := filepath.Join(stateDir, "git-"+kind+"-authorized")
	info, err := os.Stat(path)
	if err != nil {
		return false
	}
	if time.Since(info.ModTime()) > sentinelTTL {
		_ = os.Remove(path) // stale ⇒ clear, treat as unauthorized
		return false
	}
	return true
}

// commitGateFindings runs eeco's CI-parity gates over an authorized commit
// and returns the operator-facing problems (empty = clean). Every check
// degrades open: an unreadable diff or a message it cannot statically
// resolve yields no finding, so the git pre-commit hook + CI stay the hard
// gates (locked decision #2). It folds three families: AI-attribution
// (det, eeco's comment-hygiene equivalent) over the assembled message, the
// staged diff, and the raw command; plus a workspace-path leak over staged
// additions (leak-guard's pattern).
func commitGateFindings(det *Detector, command, cwd, workspaceName string) []string {
	var problems []string
	add := func(p string) {
		if !slices.Contains(problems, p) {
			problems = append(problems, p)
		}
	}
	scanAttr := func(where, text string) {
		for _, f := range det.Scan(where, text) {
			add(f.Msg + " in " + where)
		}
	}

	// Attribution in the assembled message of each commit segment.
	for _, words := range commandSegments(command) {
		if verb, _ := classifyGitWrite(words); verb != "commit" {
			continue
		}
		if msg := assembleMessage(words, cwd); msg != "" {
			scanAttr("commit message", msg)
		}
	}
	// Attribution + workspace-path leak in the staged additions.
	if diff := stagedDiff(cwd); diff != "" {
		scanAttr("staged diff", diff)
		for _, line := range scanDiffWorkspaceLeak(diff, workspaceName) {
			add("workspace path in staged content: " + strings.TrimSpace(line))
		}
	}
	// Attribution embedded with a real newline in the raw command (a trailer
	// or generated-by line inside -m).
	scanAttr("command", command)
	return problems
}

// scanDiffWorkspaceLeak returns the added diff lines that reference an
// engine subdirectory under the workspace (the state/memory/… dirs) — the
// workspace-path leak leak-guard catches in tracked files, applied here to
// the prospective staged content. An empty workspaceName disables the scan
// (no pattern to build). Only added lines (`+`, excluding the `+++` header)
// are scanned.
func scanDiffWorkspaceLeak(diff, workspaceName string) []string {
	if workspaceName == "" {
		return nil
	}
	re := regexp.MustCompile(regexp.QuoteMeta(workspaceName) + `/(?:` + reAlt(engineSubdirs) + `)/`)
	var out []string
	for _, line := range splitLines(diff) {
		if !strings.HasPrefix(line, "+") || strings.HasPrefix(line, "+++") {
			continue
		}
		if re.MatchString(line) {
			out = append(out, strings.TrimPrefix(line, "+"))
		}
	}
	return out
}
raw view on GitHub →