Go 276 lines
package cockpit
import (
"fmt"
"os"
"path/filepath"
"strings"
)
// ParityResult is the three-tier structural comparison of an emitted skill
// against a hand-built answer-key SKILL.md. Tiers are independent so a
// failure names exactly which dimension diverged. OK requires all three.
type ParityResult struct {
LayerOK bool // same one-dir-per-skill .claude/skills/<name>/SKILL.md layout
CapOK bool // 3-key frontmatter, >=5 steps, output section, allowlist superset of the answer-key core
SafetyOK bool // zero forbidden write-git verbs in the emitted allowlist; answer-key hits are warnings, not failures
Notes []string // human notes for any failed tier
}
// OK reports whether all three parity tiers held.
func (p ParityResult) OK() bool { return p.LayerOK && p.CapOK && p.SafetyOK }
// skillShape is the comparable structure parsed from a SKILL.md: the
// frontmatter keys present, the allowlist entries, the step count, and
// whether an output section exists. Prose is deliberately ignored —
// parity is structural, the emit is neutral while the answer key is
// project-specific.
type skillShape struct {
frontmatterKeys map[string]bool
allowlist []string
stepCount int
hasOutput bool
}
// ScratchRegenerate renders pb for target into a fresh file under
// scratchRoot and returns its path. It never writes cfg.UserDir or the
// answer key — scratchRoot is a caller-owned throwaway (t.TempDir /
// os.MkdirTemp) — so a parity check can never mutate a real cockpit.
func ScratchRegenerate(pb Playbook, target, scratchRoot string) (string, error) {
r, ok := rendererFor(target)
if !ok {
return "", fmt.Errorf("unknown target %q", target)
}
content, err := r.Render(pb)
if err != nil {
return "", err
}
dst := filepath.Join(scratchRoot, r.RelPath(pb))
if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil {
return "", fmt.Errorf("scratch dir: %w", err)
}
if err := os.WriteFile(dst, content, 0o644); err != nil {
return "", fmt.Errorf("write scratch: %w", err)
}
return dst, nil
}
// Parity renders pb for target into a private scratch dir and compares it
// structurally against the answer-key SKILL.md at answerKeyPath. The answer
// key is read-only; the scratch dir is created and removed internally.
func Parity(pb Playbook, target, answerKeyPath string) (ParityResult, error) {
scratch, err := os.MkdirTemp("", "eeco-cockpit-parity-")
if err != nil {
return ParityResult{}, fmt.Errorf("scratch root: %w", err)
}
defer os.RemoveAll(scratch)
emittedPath, err := ScratchRegenerate(pb, target, scratch)
if err != nil {
return ParityResult{}, err
}
emittedBytes, err := os.ReadFile(emittedPath)
if err != nil {
return ParityResult{}, err
}
keyBytes, err := os.ReadFile(answerKeyPath)
if err != nil {
return ParityResult{}, fmt.Errorf("read answer key %s: %w", answerKeyPath, err)
}
r, ok := rendererFor(target)
if !ok {
return ParityResult{}, fmt.Errorf("unknown target %q", target)
}
emitted := parseSkillShape(emittedBytes)
key := parseSkillShape(keyBytes)
var res ParityResult
// Tier 1 — layer: both live at .claude/skills/<name>/SKILL.md.
emittedRel := r.RelPath(pb)
res.LayerOK = isSkillLayout(emittedRel) && isSkillLayout(answerKeyPath)
if !res.LayerOK {
res.Notes = append(res.Notes, fmt.Sprintf("layer: emitted %q / answer key %q not both .claude/skills/<name>/SKILL.md", emittedRel, answerKeyPath))
}
// Tier 2 — capability: 3-key frontmatter, >=5 steps, an output section,
// and the emitted allowlist is a superset of the answer key's portable
// core (FlashOS-only entries ignored).
threeKey := emitted.frontmatterKeys["name"] && emitted.frontmatterKeys["description"] && emitted.frontmatterKeys["allowed-tools"]
enoughSteps := emitted.stepCount >= 5
keyCore := portableAllowlist(key.allowlist)
missing := setDifference(coverageSet(keyCore), coverageSet(emitted.allowlist))
res.CapOK = threeKey && enoughSteps && emitted.hasOutput && len(missing) == 0
if !res.CapOK {
if !threeKey {
res.Notes = append(res.Notes, "capability: emitted frontmatter missing one of name/description/allowed-tools")
}
if !enoughSteps {
res.Notes = append(res.Notes, fmt.Sprintf("capability: emitted has %d steps (<5)", emitted.stepCount))
}
if !emitted.hasOutput {
res.Notes = append(res.Notes, "capability: emitted has no output section")
}
if len(missing) > 0 {
res.Notes = append(res.Notes, "capability: emitted allowlist missing answer-key core: "+strings.Join(missing, ", "))
}
}
// Tier 3 — safety: zero forbidden write-git verbs in the EMITTED allowlist.
// eeco controls only what it emits; the hand-built answer key is a separate
// repo, so an over-grant there is surfaced as a warning Note, not a hard
// failure (the roadmap invariant scopes to "every emitted allowlist"). The
// scan reads the literal emitted scopes, so a broader emitted scope can
// never hide behind the Tier-2 head normalization.
forbidden := pb.Intent.forbiddenVerbs()
emittedHits := ScanAllowlistForWriteGitVerbs(emitted.allowlist, forbidden)
keyHits := ScanAllowlistForWriteGitVerbs(key.allowlist, forbidden)
res.SafetyOK = len(emittedHits) == 0
if len(emittedHits) > 0 {
res.Notes = append(res.Notes, fmt.Sprintf("safety: forbidden write-git verb(s) in emitted allowlist: %v", emittedHits))
}
if len(keyHits) > 0 {
res.Notes = append(res.Notes, fmt.Sprintf("answer key over-grants forbidden verb(s): %v (not eeco-controlled — fix the source skill)", keyHits))
}
return res, nil
}
// isSkillLayout reports whether p ends in the one-dir-per-skill Claude
// layout .claude/skills/<name>/SKILL.md.
func isSkillLayout(p string) bool {
p = filepath.ToSlash(p)
if filepath.Base(p) != "SKILL.md" {
return false
}
return strings.Contains(p, ".claude/skills/")
}
// portableAllowlist drops answer-key entries that are project-specific and
// not expected in a neutral emit: absolute-path bash verbs (e.g.
// /bin/ls) and path-scoped write verbs (e.g. mv into a project dir). What
// remains is the portable core the emitted allowlist must cover.
func portableAllowlist(allowlist []string) []string {
var out []string
for _, e := range allowlist {
verb := bashVerb(e)
if verb != "" {
if strings.Contains(verb, "/") { // absolute-path tool like /bin/ls
continue
}
fields := strings.Fields(verb)
// A non-git write verb pinned to a project path (e.g. "mv
// ajhahnde/...") is FlashOS-specific; a bare/git read verb stays.
if len(fields) >= 2 && fields[0] != "git" {
continue
}
}
out = append(out, e)
}
return out
}
// coverageKey maps an allowlist entry to the token the Tier-2 capability
// coverage check compares on. A git Bash entry collapses to its "git <verb>"
// head (first two tokens) so a scope or sub-verb refinement of the same verb
// (emitted "git branch --show-current" vs key "git branch") is not counted as
// a missing capability. Every other entry — a named tool, a non-git Bash verb
// — compares as its exact string. Safety is unaffected: Tier 3 scans the
// literal emitted scopes, so a broader emitted scope cannot hide here.
func coverageKey(entry string) string {
verb := bashVerb(entry)
fields := strings.Fields(verb)
if len(fields) >= 2 && fields[0] == "git" {
return "git " + fields[1]
}
return entry
}
// coverageSet maps coverageKey over a list, for the Tier-2 set-difference.
func coverageSet(entries []string) []string {
out := make([]string, 0, len(entries))
for _, e := range entries {
out = append(out, coverageKey(e))
}
return out
}
// setDifference returns the entries of want not present in have (set
// semantics, order-insensitive).
func setDifference(want, have []string) []string {
hset := make(map[string]bool, len(have))
for _, h := range have {
hset[h] = true
}
var diff []string
for _, w := range want {
if !hset[w] {
diff = append(diff, w)
}
}
return diff
}
// parseSkillShape extracts the comparable structure from SKILL.md bytes:
// the leading YAML frontmatter's keys + allowed-tools list, the count of
// "## Step " headings, and whether a "## Output" heading is present.
func parseSkillShape(content []byte) skillShape {
shape := skillShape{frontmatterKeys: map[string]bool{}}
text := string(content)
lines := strings.Split(text, "\n")
inFrontmatter := false
frontmatterDone := false
for i, raw := range lines {
line := strings.TrimRight(raw, "\r")
if i == 0 && strings.TrimSpace(line) == "---" {
inFrontmatter = true
continue
}
if inFrontmatter {
if strings.TrimSpace(line) == "---" {
inFrontmatter = false
frontmatterDone = true
continue
}
key, val, ok := strings.Cut(line, ":")
if ok {
key = strings.TrimSpace(key)
shape.frontmatterKeys[key] = true
if key == "allowed-tools" {
shape.allowlist = splitAllowlist(val)
}
}
continue
}
_ = frontmatterDone
trimmed := strings.TrimSpace(line)
if strings.HasPrefix(trimmed, "## Step ") {
shape.stepCount++
}
if trimmed == "## Output" {
shape.hasOutput = true
}
}
return shape
}
// parseAllowedTools returns the allowlist entries from a SKILL.md's
// frontmatter (empty when absent). Shared by Verify's on-disk safety scan.
func parseAllowedTools(content []byte) []string {
return parseSkillShape(content).allowlist
}
// splitAllowlist parses an "allowed-tools:" value into trimmed, non-empty
// entries split on commas.
func splitAllowlist(val string) []string {
var out []string
for _, part := range strings.Split(val, ",") {
if p := strings.TrimSpace(part); p != "" {
out = append(out, p)
}
}
return out
}