ajhahn.de
← eeco
Go 449 lines
package workflow

import (
	"errors"
	"fmt"
	"os"
	"path/filepath"
	"regexp"
	"sort"
	"strconv"
	"strings"

	"github.com/ajhahnde/eeco/internal/gitx"
)

// versionSync is a read-only gate that reports drift between the
// version strings declared in `config.local`'s `version_locations`
// list. Every entry is a `path:regex` pair (split on the first colon);
// the regex must declare at least one capture group, and group 1 holds
// the version string. The reserved value `version_locations=auto`
// switches the gate to auto-detect: it scans a fixed set of common
// version files (see versionDetectTargets) instead of an explicit list.
//
// Anchor modes:
//   - cfg.VersionAnchor == "" (default): consistency-only — the first
//     declared location is the anchor; the rest must match it.
//   - cfg.VersionAnchor == "tag": the latest semver-shaped reachable git
//     tag is the source of truth. Declared locations must be semver
//     greater-or-equal to it so a release commit can bump declared
//     locations ahead of the not-yet-pushed tag; backward-drift fails.
//     No reachable tag yet → fall back to consistency-only.
//   - cfg.VersionAnchor == "<path>:<regex>": designated-file mode. The
//     pair is parsed like a `version_locations` entry; the captured
//     value is the source of truth. Declared locations must strict-equal
//     it. A missing path exits 2 (blocked).
type versionSync struct{}

func (versionSync) Name() string { return "version-sync" }

func (versionSync) Summary() string {
	return "verify version strings agree across declared locations (read-only)"
}

// versionSyncTagSource is the function that resolves the tag-anchor
// expected version. Overridable in tests; defaults to
// gitx.LatestSemverTag.
var versionSyncTagSource = func(root string) (string, error) {
	tag, err := gitx.LatestSemverTag(root)
	if errors.Is(err, gitx.ErrUnavailable) {
		// Treat missing git as "no tag available" — fall back to
		// consistency-only rather than blocking on a host without git.
		return "", nil
	}
	return tag, err
}

type vsCapture struct {
	path  string
	line  int
	value string
}

func (versionSync) Run(env Env) (Result, error) {
	cfg := env.Config
	if len(cfg.VersionLocations) == 0 {
		return Result{Code: CodeClean, Summary: "no version_locations declared"}, nil
	}

	// version_locations=auto switches from an explicit declared list to
	// auto-detection over a fixed set of common version files. The config
	// parser guarantees `auto` stands alone, so a one-element list holding
	// exactly "auto" is the whole signal.
	autoMode := len(cfg.VersionLocations) == 1 && cfg.VersionLocations[0] == "auto"

	var captures []vsCapture
	if autoMode {
		detected, err := detectVersionLocations(cfg.RepoRoot)
		if err != nil {
			return Result{}, err
		}
		if len(detected) == 0 {
			return Result{Code: CodeClean, Summary: "auto-detect: no version locations found"}, nil
		}
		captures = detected
	} else {
		declared, missing, err := readDeclaredLocations(cfg.RepoRoot, cfg.VersionLocations)
		if err != nil {
			return Result{}, err
		}
		if len(missing) > 0 {
			sort.Strings(missing)
			return Result{
				Code:    CodeBlocked,
				Summary: fmt.Sprintf("%d declared location(s) missing on disk: %s", len(missing), strings.Join(missing, ", ")),
			}, nil
		}
		captures = declared
	}

	var findings []Finding
	for _, c := range captures {
		if c.value == "" {
			findings = append(findings, Finding{
				Path: c.path,
				Line: 0,
				Msg:  "regex matched no version string",
			})
		}
	}
	if len(findings) > 0 {
		sort.Slice(findings, func(i, j int) bool { return findings[i].Path < findings[j].Path })
		return Result{
			Code:     CodeFinding,
			Summary:  fmt.Sprintf("%d declared location(s) carry no version string", len(findings)),
			Findings: findings,
		}, nil
	}

	var (
		res Result
		err error
	)
	switch cfg.VersionAnchor {
	case "":
		res = runConsistencyOnly(captures)
	case "tag":
		res, err = runTagAnchor(cfg.RepoRoot, captures)
	default:
		res, err = runFileAnchor(cfg.RepoRoot, cfg.VersionAnchor, captures)
	}
	if err != nil {
		return Result{}, err
	}
	if autoMode {
		res.Summary = "auto-detect: " + res.Summary
	}
	return res, nil
}

// readDeclaredLocations parses every `path:regex` entry, reads the file,
// and captures the version string per entry. A missing path is reported
// via the missing slice (caller maps it to CodeBlocked); a regex
// matching nothing produces an empty-value capture (caller maps it to a
// finding). Other parse errors short-circuit as a workflow error.
func readDeclaredLocations(repoRoot string, entries []string) ([]vsCapture, []string, error) {
	captures := make([]vsCapture, 0, len(entries))
	var missing []string
	for _, entry := range entries {
		rel, pattern, ok := strings.Cut(entry, ":")
		if !ok || rel == "" || pattern == "" {
			return nil, nil, fmt.Errorf("version-sync: invalid version_locations entry %q (expected \"path:regex\")", entry)
		}
		cap, miss, err := readVersionAt(repoRoot, rel, pattern)
		if err != nil {
			return nil, nil, err
		}
		if miss {
			missing = append(missing, rel)
			continue
		}
		captures = append(captures, cap)
	}
	return captures, missing, nil
}

// readVersionAt reads one path:regex pair. miss=true means the path is
// absent on disk (caller decides exit code).
func readVersionAt(repoRoot, rel, pattern string) (cap vsCapture, miss bool, err error) {
	re, err := regexp.Compile(pattern)
	if err != nil {
		return vsCapture{}, false, fmt.Errorf("version-sync: compile regex %q: %w", pattern, err)
	}
	if re.NumSubexp() < 1 {
		return vsCapture{}, false, fmt.Errorf("version-sync: regex %q needs at least one capture group", pattern)
	}
	abs := filepath.Join(repoRoot, filepath.FromSlash(rel))
	b, err := os.ReadFile(abs)
	if err != nil {
		if errors.Is(err, os.ErrNotExist) {
			return vsCapture{}, true, nil
		}
		return vsCapture{}, false, fmt.Errorf("version-sync: read %s: %w", rel, err)
	}
	content := string(b)
	idx := re.FindStringSubmatchIndex(content)
	if idx == nil {
		return vsCapture{path: rel}, false, nil
	}
	value := content[idx[2]:idx[3]]
	line := 1 + strings.Count(content[:idx[2]], "\n")
	return vsCapture{path: rel, line: line, value: value}, false, nil
}

// versionDetectTargets is the fixed, high-precision set of files
// `version_locations=auto` scans for a project version string. Each
// entry is a path:regex pair in the same shape as a declared
// version_locations entry; the regex declares one capture group holding
// a semver-shaped version. The set is deliberately small — only files
// whose version field is unambiguous — so auto-detect does not flag a
// version-shaped string that is not the project version. The slice
// order is the detection order and so the consistency-only anchor order.
var versionDetectTargets = []struct {
	path  string
	regex string
}{
	{"VERSION", `\bv?(\d+\.\d+\.\d+)\b`},
	{"CHANGELOG.md", `(?m)^##\s+\[v?(\d+\.\d+\.\d+)\]`},
	{"package.json", `"version"\s*:\s*"v?(\d+\.\d+\.\d+)"`},
	{"pyproject.toml", `(?m)^\s*version\s*=\s*"v?(\d+\.\d+\.\d+)"`},
	{"Cargo.toml", `(?m)^\s*version\s*=\s*"v?(\d+\.\d+\.\d+)"`},
}

// detectVersionLocations scans versionDetectTargets relative to repoRoot
// and returns one capture per file that exists and carries a
// version-shaped string. A target whose file is absent — or present but
// matching no version — is skipped, so auto-detect reports drift only
// across files that actually declare a version. Captures come back in
// versionDetectTargets order, so the first detected file is the
// deterministic consistency-only anchor.
func detectVersionLocations(repoRoot string) ([]vsCapture, error) {
	var captures []vsCapture
	for _, t := range versionDetectTargets {
		cap, miss, err := readVersionAt(repoRoot, t.path, t.regex)
		if err != nil {
			return nil, err
		}
		if miss || cap.value == "" {
			continue
		}
		captures = append(captures, cap)
	}
	return captures, nil
}

// runConsistencyOnly is the slice-1 behaviour: first capture is the
// anchor; the rest must match it.
func runConsistencyOnly(captures []vsCapture) Result {
	anchor := captures[0]
	var findings []Finding
	for _, c := range captures[1:] {
		if c.value != anchor.value {
			findings = append(findings, Finding{
				Path: c.path,
				Line: c.line,
				Msg:  fmt.Sprintf("%s differs from %s:%d (%s)", c.value, anchor.path, anchor.line, anchor.value),
			})
		}
	}
	if len(findings) == 0 {
		return Result{
			Code:    CodeClean,
			Summary: fmt.Sprintf("%d declared location(s) agree on %s", len(captures), anchor.value),
		}
	}
	sortFindings(findings)
	return Result{
		Code:     CodeFinding,
		Summary:  fmt.Sprintf("%d version drift(s) from %s (%s)", len(findings), anchor.path, anchor.value),
		Findings: findings,
	}
}

// runTagAnchor compares declared locations against the latest
// semver-shaped reachable git tag. Mutual disagreement still fails;
// strictly-less-than-tag (backward-drift) fails; greater-or-equal is
// clean. When no semver-shaped tag is reachable yet, falls back to
// consistency-only with a note in the summary so the operator knows the
// tag-anchor mode is configured but not yet active.
func runTagAnchor(repoRoot string, captures []vsCapture) (Result, error) {
	tag, err := versionSyncTagSource(repoRoot)
	if err != nil {
		return Result{}, fmt.Errorf("version-sync: resolve tag-anchor: %w", err)
	}
	if tag == "" {
		res := runConsistencyOnly(captures)
		res.Summary = "tag-anchor: no semver tag reachable yet; " + res.Summary
		return res, nil
	}
	// Backward-drift check against the tag. Forward-drift is allowed so
	// a release commit (CHANGELOG bumped to vN.M+1.0 before the tag
	// vN.M+1.0 exists) passes the gate.
	var findings []Finding
	for _, c := range captures {
		cmp, ok := compareSemverVal(c.value, tag)
		if !ok {
			findings = append(findings, Finding{
				Path: c.path,
				Line: c.line,
				Msg:  fmt.Sprintf("%s is not semver-shaped (tag-anchor compares against %s)", c.value, tag),
			})
			continue
		}
		if cmp < 0 {
			findings = append(findings, Finding{
				Path: c.path,
				Line: c.line,
				Msg:  fmt.Sprintf("%s is behind tag-anchor %s", c.value, tag),
			})
		}
	}
	if len(findings) > 0 {
		sortFindings(findings)
		return Result{
			Code:     CodeFinding,
			Summary:  fmt.Sprintf("%d location(s) behind tag-anchor %s", len(findings), tag),
			Findings: findings,
		}, nil
	}
	// Then enforce mutual consistency among the declared locations: a
	// release commit moves every declared location together, so any
	// disagreement is still a bug class slice 1 catches.
	anchor := captures[0]
	for _, c := range captures[1:] {
		if c.value != anchor.value {
			findings = append(findings, Finding{
				Path: c.path,
				Line: c.line,
				Msg:  fmt.Sprintf("%s differs from %s:%d (%s)", c.value, anchor.path, anchor.line, anchor.value),
			})
		}
	}
	if len(findings) > 0 {
		sortFindings(findings)
		return Result{
			Code:     CodeFinding,
			Summary:  fmt.Sprintf("%d version drift(s) from %s (%s); tag-anchor %s", len(findings), anchor.path, anchor.value, tag),
			Findings: findings,
		}, nil
	}
	summary := fmt.Sprintf("%d declared location(s) agree on %s; tag-anchor %s", len(captures), anchor.value, tag)
	if compareSemverFatal(anchor.value, tag) > 0 {
		summary = fmt.Sprintf("%d declared location(s) agree on %s (ahead of tag-anchor %s)", len(captures), anchor.value, tag)
	}
	return Result{Code: CodeClean, Summary: summary}, nil
}

// runFileAnchor uses a `path:regex` source of truth file. Strict
// equality across every declared location. Missing source-of-truth path
// exits 2 (blocked) so the operator notices a typo rather than silently
// going to consistency-only.
func runFileAnchor(repoRoot, anchor string, captures []vsCapture) (Result, error) {
	rel, pattern, ok := strings.Cut(anchor, ":")
	if !ok || rel == "" || pattern == "" {
		return Result{}, fmt.Errorf("version-sync: invalid version_anchor %q (expected \"tag\" or \"path:regex\")", anchor)
	}
	cap, miss, err := readVersionAt(repoRoot, rel, pattern)
	if err != nil {
		return Result{}, err
	}
	if miss {
		return Result{
			Code:    CodeBlocked,
			Summary: fmt.Sprintf("version_anchor file missing on disk: %s", rel),
		}, nil
	}
	if cap.value == "" {
		return Result{
			Code:     CodeFinding,
			Summary:  "version_anchor regex matched no version string",
			Findings: []Finding{{Path: rel, Line: 0, Msg: "regex matched no version string"}},
		}, nil
	}
	var findings []Finding
	for _, c := range captures {
		if c.value != cap.value {
			findings = append(findings, Finding{
				Path: c.path,
				Line: c.line,
				Msg:  fmt.Sprintf("%s differs from version_anchor %s:%d (%s)", c.value, cap.path, cap.line, cap.value),
			})
		}
	}
	if len(findings) > 0 {
		sortFindings(findings)
		return Result{
			Code:     CodeFinding,
			Summary:  fmt.Sprintf("%d version drift(s) from version_anchor %s (%s)", len(findings), cap.path, cap.value),
			Findings: findings,
		}, nil
	}
	return Result{
		Code:    CodeClean,
		Summary: fmt.Sprintf("%d declared location(s) agree with version_anchor %s on %s", len(captures), cap.path, cap.value),
	}, nil
}

func sortFindings(findings []Finding) {
	sort.Slice(findings, func(i, j int) bool {
		if findings[i].Path != findings[j].Path {
			return findings[i].Path < findings[j].Path
		}
		return findings[i].Line < findings[j].Line
	})
}

// compareSemverVal returns a stdlib-style cmp (negative / 0 / positive)
// for two `vX.Y.Z` or `X.Y.Z` strings, with an ok flag reporting whether
// both inputs parsed as strict three-component semver. A malformed input
// makes ok=false; the caller treats that as a finding so the operator
// notices a non-semver-shaped value the tag-anchor cannot compare.
func compareSemverVal(a, b string) (int, bool) {
	ap, aOk := splitSemver(a)
	bp, bOk := splitSemver(b)
	if !aOk || !bOk {
		return 0, false
	}
	for i := range 3 {
		if ap[i] != bp[i] {
			if ap[i] < bp[i] {
				return -1, true
			}
			return 1, true
		}
	}
	return 0, true
}

// compareSemverFatal is the panic-free cmp used inside the
// already-validated post-comparison block; it falls back to 0 on parse
// failure (anchor already proved valid at that point).
func compareSemverFatal(a, b string) int {
	cmp, ok := compareSemverVal(a, b)
	if !ok {
		return 0
	}
	return cmp
}

// splitSemver parses `vX.Y.Z` / `X.Y.Z` into three non-negative ints.
func splitSemver(v string) ([3]int, bool) {
	var out [3]int
	v = strings.TrimPrefix(v, "v")
	parts := strings.Split(v, ".")
	if len(parts) != 3 {
		return out, false
	}
	for i, p := range parts {
		if p == "" {
			return out, false
		}
		n, err := strconv.Atoi(p)
		if err != nil || n < 0 {
			return out, false
		}
		out[i] = n
	}
	return out, true
}