eeco/internal/cockpit/parity.go

Go 276 lines
package cockpit

import (
	"fmt"
	"os"
	"path/filepath"
	"strings"
)

// ParityResult is the three-tier structural comparison of an emitted skill
// against a hand-built answer-key SKILL.md. Tiers are independent so a
// failure names exactly which dimension diverged. OK requires all three.
type ParityResult struct {
	LayerOK  bool     // same one-dir-per-skill .claude/skills/<name>/SKILL.md layout
	CapOK    bool     // 3-key frontmatter, >=5 steps, output section, allowlist superset of the answer-key core
	SafetyOK bool     // zero forbidden write-git verbs in the emitted allowlist; answer-key hits are warnings, not failures
	Notes    []string // human notes for any failed tier
}

// OK reports whether all three parity tiers held.
func (p ParityResult) OK() bool { return p.LayerOK && p.CapOK && p.SafetyOK }

// skillShape is the comparable structure parsed from a SKILL.md: the
// frontmatter keys present, the allowlist entries, the step count, and
// whether an output section exists. Prose is deliberately ignored —
// parity is structural, the emit is neutral while the answer key is
// project-specific.
type skillShape struct {
	frontmatterKeys map[string]bool
	allowlist       []string
	stepCount       int
	hasOutput       bool
}

// ScratchRegenerate renders pb for target into a fresh file under
// scratchRoot and returns its path. It never writes cfg.UserDir or the
// answer key — scratchRoot is a caller-owned throwaway (t.TempDir /
// os.MkdirTemp) — so a parity check can never mutate a real cockpit.
func ScratchRegenerate(pb Playbook, target, scratchRoot string) (string, error) {
	r, ok := rendererFor(target)
	if !ok {
		return "", fmt.Errorf("unknown target %q", target)
	}
	content, err := r.Render(pb)
	if err != nil {
		return "", err
	}
	dst := filepath.Join(scratchRoot, r.RelPath(pb))
	if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil {
		return "", fmt.Errorf("scratch dir: %w", err)
	}
	if err := os.WriteFile(dst, content, 0o644); err != nil {
		return "", fmt.Errorf("write scratch: %w", err)
	}
	return dst, nil
}

// Parity renders pb for target into a private scratch dir and compares it
// structurally against the answer-key SKILL.md at answerKeyPath. The answer
// key is read-only; the scratch dir is created and removed internally.
func Parity(pb Playbook, target, answerKeyPath string) (ParityResult, error) {
	scratch, err := os.MkdirTemp("", "eeco-cockpit-parity-")
	if err != nil {
		return ParityResult{}, fmt.Errorf("scratch root: %w", err)
	}
	defer os.RemoveAll(scratch)

	emittedPath, err := ScratchRegenerate(pb, target, scratch)
	if err != nil {
		return ParityResult{}, err
	}
	emittedBytes, err := os.ReadFile(emittedPath)
	if err != nil {
		return ParityResult{}, err
	}
	keyBytes, err := os.ReadFile(answerKeyPath)
	if err != nil {
		return ParityResult{}, fmt.Errorf("read answer key %s: %w", answerKeyPath, err)
	}

	r, ok := rendererFor(target)
	if !ok {
		return ParityResult{}, fmt.Errorf("unknown target %q", target)
	}
	emitted := parseSkillShape(emittedBytes)
	key := parseSkillShape(keyBytes)

	var res ParityResult

	// Tier 1 — layer: both live at .claude/skills/<name>/SKILL.md.
	emittedRel := r.RelPath(pb)
	res.LayerOK = isSkillLayout(emittedRel) && isSkillLayout(answerKeyPath)
	if !res.LayerOK {
		res.Notes = append(res.Notes, fmt.Sprintf("layer: emitted %q / answer key %q not both .claude/skills/<name>/SKILL.md", emittedRel, answerKeyPath))
	}

	// Tier 2 — capability: 3-key frontmatter, >=5 steps, an output section,
	// and the emitted allowlist is a superset of the answer key's portable
	// core (FlashOS-only entries ignored).
	threeKey := emitted.frontmatterKeys["name"] && emitted.frontmatterKeys["description"] && emitted.frontmatterKeys["allowed-tools"]
	enoughSteps := emitted.stepCount >= 5
	keyCore := portableAllowlist(key.allowlist)
	missing := setDifference(coverageSet(keyCore), coverageSet(emitted.allowlist))
	res.CapOK = threeKey && enoughSteps && emitted.hasOutput && len(missing) == 0
	if !res.CapOK {
		if !threeKey {
			res.Notes = append(res.Notes, "capability: emitted frontmatter missing one of name/description/allowed-tools")
		}
		if !enoughSteps {
			res.Notes = append(res.Notes, fmt.Sprintf("capability: emitted has %d steps (<5)", emitted.stepCount))
		}
		if !emitted.hasOutput {
			res.Notes = append(res.Notes, "capability: emitted has no output section")
		}
		if len(missing) > 0 {
			res.Notes = append(res.Notes, "capability: emitted allowlist missing answer-key core: "+strings.Join(missing, ", "))
		}
	}

	// Tier 3 — safety: zero forbidden write-git verbs in the EMITTED allowlist.
	// eeco controls only what it emits; the hand-built answer key is a separate
	// repo, so an over-grant there is surfaced as a warning Note, not a hard
	// failure (the roadmap invariant scopes to "every emitted allowlist"). The
	// scan reads the literal emitted scopes, so a broader emitted scope can
	// never hide behind the Tier-2 head normalization.
	forbidden := pb.Intent.forbiddenVerbs()
	emittedHits := ScanAllowlistForWriteGitVerbs(emitted.allowlist, forbidden)
	keyHits := ScanAllowlistForWriteGitVerbs(key.allowlist, forbidden)
	res.SafetyOK = len(emittedHits) == 0
	if len(emittedHits) > 0 {
		res.Notes = append(res.Notes, fmt.Sprintf("safety: forbidden write-git verb(s) in emitted allowlist: %v", emittedHits))
	}
	if len(keyHits) > 0 {
		res.Notes = append(res.Notes, fmt.Sprintf("answer key over-grants forbidden verb(s): %v (not eeco-controlled — fix the source skill)", keyHits))
	}

	return res, nil
}

// isSkillLayout reports whether p ends in the one-dir-per-skill Claude
// layout .claude/skills/<name>/SKILL.md.
func isSkillLayout(p string) bool {
	p = filepath.ToSlash(p)
	if filepath.Base(p) != "SKILL.md" {
		return false
	}
	return strings.Contains(p, ".claude/skills/")
}

// portableAllowlist drops answer-key entries that are project-specific and
// not expected in a neutral emit: absolute-path bash verbs (e.g.
// /bin/ls) and path-scoped write verbs (e.g. mv into a project dir). What
// remains is the portable core the emitted allowlist must cover.
func portableAllowlist(allowlist []string) []string {
	var out []string
	for _, e := range allowlist {
		verb := bashVerb(e)
		if verb != "" {
			if strings.Contains(verb, "/") { // absolute-path tool like /bin/ls
				continue
			}
			fields := strings.Fields(verb)
			// A non-git write verb pinned to a project path (e.g. "mv
			// ajhahnde/...") is FlashOS-specific; a bare/git read verb stays.
			if len(fields) >= 2 && fields[0] != "git" {
				continue
			}
		}
		out = append(out, e)
	}
	return out
}

// coverageKey maps an allowlist entry to the token the Tier-2 capability
// coverage check compares on. A git Bash entry collapses to its "git <verb>"
// head (first two tokens) so a scope or sub-verb refinement of the same verb
// (emitted "git branch --show-current" vs key "git branch") is not counted as
// a missing capability. Every other entry — a named tool, a non-git Bash verb
// — compares as its exact string. Safety is unaffected: Tier 3 scans the
// literal emitted scopes, so a broader emitted scope cannot hide here.
func coverageKey(entry string) string {
	verb := bashVerb(entry)
	fields := strings.Fields(verb)
	if len(fields) >= 2 && fields[0] == "git" {
		return "git " + fields[1]
	}
	return entry
}

// coverageSet maps coverageKey over a list, for the Tier-2 set-difference.
func coverageSet(entries []string) []string {
	out := make([]string, 0, len(entries))
	for _, e := range entries {
		out = append(out, coverageKey(e))
	}
	return out
}

// setDifference returns the entries of want not present in have (set
// semantics, order-insensitive).
func setDifference(want, have []string) []string {
	hset := make(map[string]bool, len(have))
	for _, h := range have {
		hset[h] = true
	}
	var diff []string
	for _, w := range want {
		if !hset[w] {
			diff = append(diff, w)
		}
	}
	return diff
}

// parseSkillShape extracts the comparable structure from SKILL.md bytes:
// the leading YAML frontmatter's keys + allowed-tools list, the count of
// "## Step " headings, and whether a "## Output" heading is present.
func parseSkillShape(content []byte) skillShape {
	shape := skillShape{frontmatterKeys: map[string]bool{}}
	text := string(content)
	lines := strings.Split(text, "\n")

	inFrontmatter := false
	frontmatterDone := false
	for i, raw := range lines {
		line := strings.TrimRight(raw, "\r")
		if i == 0 && strings.TrimSpace(line) == "---" {
			inFrontmatter = true
			continue
		}
		if inFrontmatter {
			if strings.TrimSpace(line) == "---" {
				inFrontmatter = false
				frontmatterDone = true
				continue
			}
			key, val, ok := strings.Cut(line, ":")
			if ok {
				key = strings.TrimSpace(key)
				shape.frontmatterKeys[key] = true
				if key == "allowed-tools" {
					shape.allowlist = splitAllowlist(val)
				}
			}
			continue
		}
		_ = frontmatterDone
		trimmed := strings.TrimSpace(line)
		if strings.HasPrefix(trimmed, "## Step ") {
			shape.stepCount++
		}
		if trimmed == "## Output" {
			shape.hasOutput = true
		}
	}
	return shape
}

// parseAllowedTools returns the allowlist entries from a SKILL.md's
// frontmatter (empty when absent). Shared by Verify's on-disk safety scan.
func parseAllowedTools(content []byte) []string {
	return parseSkillShape(content).allowlist
}

// splitAllowlist parses an "allowed-tools:" value into trimmed, non-empty
// entries split on commas.
func splitAllowlist(val string) []string {
	var out []string
	for _, part := range strings.Split(val, ",") {
		if p := strings.TrimSpace(part); p != "" {
			out = append(out, p)
		}
	}
	return out
}
raw view on GitHub →