Go 365 lines
package workflow
import (
"os"
"path/filepath"
"regexp"
"slices"
"strings"
"time"
)
// GitWriteGuardResult is the outcome of the git-write guard over a
// candidate Bash command. Decision is decisionDeny or decisionAllow. On a
// deny, Reason is the operator-facing explanation carried back as the
// PreToolUse permission-decision reason. On an allow, Consumed lists the
// one-shot sentinel kinds ("commit" / "tag") the caller must remove — the
// guard leaves consumption to the caller so a deny (an unauthorized op, or
// a gate finding on an authorized commit) never burns the authorization.
// A command with no commit / tag mutation is an allow with empty Consumed.
type GitWriteGuardResult struct {
Decision string
Reason string
Consumed []string
}
const (
decisionAllow = "allow"
decisionDeny = "deny"
)
// sentinelTTL is how long an authorization sentinel stays valid after the
// operator sets it via `eeco authorize`. A stale sentinel is cleared and
// treated as unauthorized, so a forgotten authorization cannot linger.
const sentinelTTL = 15 * time.Minute
// gitGlobalValueOpts are the git global options that consume the following
// token as their value, used when walking past global options to the
// subcommand. It mirrors the set in isGitCommit (commitguard.go); a second
// copy here keeps ScanCommitGuard untouched while classifyGitWrite reuses
// the same walk for any subcommand.
var gitGlobalValueOpts = map[string]bool{
"-C": true, "-c": true, "--git-dir": true, "--work-tree": true,
"--namespace": true, "--exec-path": true, "--super-prefix": true,
"--config-env": true,
}
// tagMutationFlags are the `git tag` flags that turn a tag op into a
// mutation (create / annotate / sign / delete / move). A bare `git tag`,
// `git tag -l`, or `git tag -n` is a read-only listing and passes.
var tagMutationFlags = map[string]bool{
"-a": true, "-s": true, "-d": true, "-f": true,
"--annotate": true, "--sign": true, "--delete": true,
"--force": true, "--message": true, "--file": true,
"--create-reflog": true,
}
// shellWrappers are the command prefixes that hide a git op inside a
// quoted argument the tokenizer cannot see into; their presence triggers
// the raw-string backstop (mirrors pre-commit-guard.sh:128-133).
var shellWrappers = []string{"bash -c", "sh -c", "zsh -c", " -lc ", "eval "}
var (
reWrappedGitCommit = regexp.MustCompile(`(^|[^[:alnum:]_])git[[:space:]]+commit([[:space:]]|$)`)
reWrappedGitTag = regexp.MustCompile(`(^|[^[:alnum:]_])git[[:space:]]+tag([[:space:]]|$)`)
)
// ScanGitWriteGuard generalizes the attribution-only commit-guard into the
// full git-write guard the cockpit machinery installs as a PreToolUse hook.
// It blocks a pending `git commit` and a `git tag` MUTATION unless a
// one-shot authorization sentinel (set by `eeco authorize`, 15-min TTL)
// exists, and on an authorized commit folds in eeco's CI-parity gates
// (attribution + workspace-path leak) so an authorized write still cannot
// carry a leak into history. command is the PreToolUse Bash command, cwd
// the repo it targets, stateDir the sentinel directory (<workspace>/state),
// and workspaceName the engine dir name used to build the leak pattern.
//
// Posture (locked decision #2): the write-verb classifier fails CLOSED — a
// command that cannot be tokenized cleanly but whose raw text shows a
// commit / tag is denied. Everything downstream (the leak / attribution
// fold-in) degrades OPEN, so the git pre-commit hook and CI stay the hard
// gates and a session is never wedged. A deny is carried in Decision; the
// runner translates it to the JSON permission-decision body and always
// exits 0.
func ScanGitWriteGuard(det *Detector, command, cwd, stateDir, workspaceName string) GitWriteGuardResult {
commit, tagMut := classifyCommand(command)
if !commit && !tagMut {
return GitWriteGuardResult{Decision: decisionAllow}
}
var consumed []string
// (1a) git tag mutation: user-only, gated first. A combined
// `git tag v1 && git commit` falls through to the commit gate after the
// tag sentinel is queued for consumption.
if tagMut {
if !sentinelAuthorized(stateDir, "tag") {
return GitWriteGuardResult{Decision: decisionDeny, Reason: tagDenyReason}
}
consumed = append(consumed, "tag")
}
// (1b) git commit: require authorization.
if commit {
if !sentinelAuthorized(stateDir, "commit") {
return GitWriteGuardResult{Decision: decisionDeny, Reason: commitDenyReason}
}
// (2) authorized commit: fold in the CI-parity gates (degrade-open).
// A finding denies but PRESERVES the sentinel (do not queue "commit"
// for consumption) so a re-commit after the fix works.
if problems := commitGateFindings(det, command, cwd, workspaceName); len(problems) > 0 {
return GitWriteGuardResult{Decision: decisionDeny, Reason: gateDenyReason(problems)}
}
consumed = append(consumed, "commit")
}
return GitWriteGuardResult{Decision: decisionAllow, Consumed: consumed}
}
const (
commitDenyReason = "eeco git-write-guard: git commit is user-driven — the user commits himself. " +
"After explicit authorization, run `eeco authorize commit` to allow one commit " +
"(15-min, one-shot), then re-run."
tagDenyReason = "eeco git-write-guard: git tag mutation is user-only. " +
"To allow one tag op, run `eeco authorize tag` (15-min, one-shot). " +
"Read-only tag ops (git tag, git tag -l, …) are never blocked."
)
// gateDenyReason renders the deny message for an authorized commit that
// tripped the leak / attribution gates. The authorization sentinel is
// preserved, so the operator fixes the listed problems and re-commits.
func gateDenyReason(problems []string) string {
return "eeco git-write-guard: commit blocked — " + strings.Join(problems, "; ") +
". Fix these, then re-commit (authorization preserved)."
}
// classifyCommand reports whether the command invokes `git commit` and
// whether it invokes a `git tag` MUTATION, across every segment of a
// compound command. When the command cannot be tokenized cleanly it fails
// CLOSED, trusting a raw substring match (locked decision #2). A shell
// wrapper (bash -c / eval / …) triggers a raw backstop regardless, since
// the tokenizer cannot see a git op hidden inside the wrapper's quoted arg.
func classifyCommand(command string) (commit, tagMut bool) {
if commandParseOK(command) {
for _, words := range commandSegments(command) {
verb, mut := classifyGitWrite(words)
switch verb {
case "commit":
commit = true
case "tag":
if mut {
tagMut = true
}
}
}
} else {
// Fail CLOSED: an unbalanced-quote command we cannot tokenize is
// denied if its raw text shows a commit / tag write.
if strings.Contains(command, "git commit") {
commit = true
}
if strings.Contains(command, "git tag") {
tagMut = true
}
}
if hasShellWrapper(command) {
if reWrappedGitCommit.MatchString(command) {
commit = true
}
if reWrappedGitTag.MatchString(command) {
tagMut = true
}
}
return commit, tagMut
}
// classifyGitWrite inspects one segment's word list and returns the git
// subcommand ("" when the segment is not a git invocation) and, for a
// `git tag`, whether it is a mutation. It reuses isEnvAssign / isGitProg
// and the global-option walk from isGitCommit, generalized to any
// subcommand.
func classifyGitWrite(words []string) (verb string, tagMutation bool) {
i := 0
for i < len(words) && isEnvAssign(words[i]) {
i++
}
if i >= len(words) || !isGitProg(words[i]) {
return "", false
}
i++ // past git
for i < len(words) {
w := words[i]
if w == "--" {
return "", false // end of options without a subcommand
}
if strings.HasPrefix(w, "-") {
if gitGlobalValueOpts[w] {
i += 2
} else {
i++
}
continue
}
if w != "tag" {
return w, false
}
return "tag", tagIsMutation(words[i+1:])
}
return "", false
}
// tagIsMutation reports whether the args after `git tag` denote a mutation:
// a name argument (create) or any mutation flag (annotate / sign / delete /
// force / message / file). A bare listing (`git tag`, `-l`, `-n`) is not.
func tagIsMutation(rest []string) bool {
for _, a := range rest {
if !strings.HasPrefix(a, "-") {
return true // a name arg ⇒ create
}
if tagMutationFlags[a] ||
strings.HasPrefix(a, "-m") ||
strings.HasPrefix(a, "--message") ||
strings.HasPrefix(a, "--file") {
return true
}
}
return false
}
// hasShellWrapper reports whether the command contains a known shell
// wrapper that could hide a git op inside a quoted argument.
func hasShellWrapper(command string) bool {
for _, w := range shellWrappers {
if strings.Contains(command, w) {
return true
}
}
return false
}
// commandParseOK reports whether command tokenizes cleanly — every quote is
// closed. The guard fails CLOSED when this is false (locked decision #2). It
// mirrors lex's single-quote, double-quote, and backslash handling so its
// verdict matches the tokenizer the classifier relies on.
func commandParseOK(command string) bool {
i, n := 0, len(command)
for i < n {
switch command[i] {
case '\'':
i++
for i < n && command[i] != '\'' {
i++
}
if i >= n {
return false // unterminated single quote
}
i++
case '"':
i++
for i < n && command[i] != '"' {
if command[i] == '\\' && i+1 < n {
i += 2
continue
}
i++
}
if i >= n {
return false // unterminated double quote
}
i++
case '\\':
if i+1 < n {
i += 2
} else {
i++
}
default:
i++
}
}
return true
}
// sentinelAuthorized reports whether a one-shot authorization sentinel for
// kind ("commit"/"tag") exists and is within its TTL. A stale sentinel is
// removed and reported unauthorized, so a forgotten authorization never
// lingers (it is also cleared at session start in C4b).
func sentinelAuthorized(stateDir, kind string) bool {
path := filepath.Join(stateDir, "git-"+kind+"-authorized")
info, err := os.Stat(path)
if err != nil {
return false
}
if time.Since(info.ModTime()) > sentinelTTL {
_ = os.Remove(path) // stale ⇒ clear, treat as unauthorized
return false
}
return true
}
// commitGateFindings runs eeco's CI-parity gates over an authorized commit
// and returns the operator-facing problems (empty = clean). Every check
// degrades open: an unreadable diff or a message it cannot statically
// resolve yields no finding, so the git pre-commit hook + CI stay the hard
// gates (locked decision #2). It folds three families: AI-attribution
// (det, eeco's comment-hygiene equivalent) over the assembled message, the
// staged diff, and the raw command; plus a workspace-path leak over staged
// additions (leak-guard's pattern).
func commitGateFindings(det *Detector, command, cwd, workspaceName string) []string {
var problems []string
add := func(p string) {
if !slices.Contains(problems, p) {
problems = append(problems, p)
}
}
scanAttr := func(where, text string) {
for _, f := range det.Scan(where, text) {
add(f.Msg + " in " + where)
}
}
// Attribution in the assembled message of each commit segment.
for _, words := range commandSegments(command) {
if verb, _ := classifyGitWrite(words); verb != "commit" {
continue
}
if msg := assembleMessage(words, cwd); msg != "" {
scanAttr("commit message", msg)
}
}
// Attribution + workspace-path leak in the staged additions.
if diff := stagedDiff(cwd); diff != "" {
scanAttr("staged diff", diff)
for _, line := range scanDiffWorkspaceLeak(diff, workspaceName) {
add("workspace path in staged content: " + strings.TrimSpace(line))
}
}
// Attribution embedded with a real newline in the raw command (a trailer
// or generated-by line inside -m).
scanAttr("command", command)
return problems
}
// scanDiffWorkspaceLeak returns the added diff lines that reference an
// engine subdirectory under the workspace (the state/memory/… dirs) — the
// workspace-path leak leak-guard catches in tracked files, applied here to
// the prospective staged content. An empty workspaceName disables the scan
// (no pattern to build). Only added lines (`+`, excluding the `+++` header)
// are scanned.
func scanDiffWorkspaceLeak(diff, workspaceName string) []string {
if workspaceName == "" {
return nil
}
re := regexp.MustCompile(regexp.QuoteMeta(workspaceName) + `/(?:` + reAlt(engineSubdirs) + `)/`)
var out []string
for _, line := range splitLines(diff) {
if !strings.HasPrefix(line, "+") || strings.HasPrefix(line, "+++") {
continue
}
if re.MatchString(line) {
out = append(out, strings.TrimPrefix(line, "+"))
}
}
return out
}