ajhahn.de
← eeco
Go 222 lines
package workflow

import (
	"errors"
	"fmt"
	"os"
	"path/filepath"
	"regexp"
	"sort"
	"strings"
	"time"

	"github.com/ajhahnde/eeco/internal/gitx"
	"github.com/ajhahnde/eeco/internal/queue"
)

// docDrift flags drift between the release sections documented in
// CHANGELOG.md and the project's git tags. Two drift classes:
//
//   - a `vX.Y.Z` git tag with no `## [vX.Y.Z]` CHANGELOG section — the
//     release happened but was never documented;
//   - a `## [vX.Y.Z]` CHANGELOG section with no matching git tag — a
//     documented release that was never tagged. The newest section is
//     exempt: a release commit adds `## [vX.Y.Z]` before the `vX.Y.Z`
//     tag is pushed, so a section strictly ahead of the latest tag is
//     the expected release-in-progress state, not drift (mirrors
//     version-sync's tag-anchor forward-drift allowance).
//
// Drift is reported and one review item per drift is routed to the
// queue (the single decision channel); the operator reconciles the
// CHANGELOG or the tag, eeco never edits either. This is the doc-vs-tags
// slice of the stale-state / drift detection family, sibling to
// memory-drift.
type docDrift struct{}

func (docDrift) Name() string { return "doc-drift" }

func (docDrift) Summary() string {
	return "flag drift between CHANGELOG.md release sections and git tags"
}

// docDriftTagSource resolves the project's semver-shaped tags. It is
// overridable in tests; it defaults to gitx.SemverTags.
var docDriftTagSource = gitx.SemverTags

// changelogHeading matches a Keep-a-Changelog version section heading
// `## [vX.Y.Z]`; the optional `v` is tolerated and group 1 captures the
// bare X.Y.Z. `## [Unreleased]` carries no version and never matches.
var changelogHeading = regexp.MustCompile(`(?m)^##\s+\[v?(\d+\.\d+\.\d+)\]`)

// changelogSection is one `## [vX.Y.Z]` heading found in CHANGELOG.md.
type changelogSection struct {
	version string // bare X.Y.Z
	line    int    // 1-based heading line
}

// docDriftItem is one drift carried from detection to the queue-append
// loop.
type docDriftItem struct {
	title  string
	detail string
}

func (docDrift) Run(env Env) (Result, error) {
	cfg := env.Config

	// The whole check compares against git tags, so a host without git
	// cannot run it — report blocked (contract code 2) rather than
	// passing a check that never actually ran.
	if !gitx.Available() {
		return Result{Code: CodeBlocked, Summary: "git not available on PATH"}, nil
	}

	const changelogName = "CHANGELOG.md"
	abs := filepath.Join(cfg.RepoRoot, changelogName)
	b, err := os.ReadFile(abs)
	if err != nil {
		if errors.Is(err, os.ErrNotExist) {
			// No CHANGELOG to check — a clean no-op, like memory-drift's
			// "no facts" and version-sync's "no version_locations".
			return Result{Code: CodeClean, Summary: "no CHANGELOG.md to check"}, nil
		}
		return Result{}, fmt.Errorf("doc-drift: read %s: %w", changelogName, err)
	}
	sections := parseChangelogSections(string(b))

	tags, err := docDriftTagSource(cfg.RepoRoot)
	if err != nil {
		return Result{}, fmt.Errorf("doc-drift: resolve tags: %w", err)
	}
	if len(tags) == 0 {
		// A project with a CHANGELOG but no releases yet has nothing to
		// drift against — clean no-op.
		return Result{Code: CodeClean, Summary: "no git tags to check against"}, nil
	}

	// Sort both sides ascending by semver so detection is deterministic.
	sort.Slice(tags, func(i, j int) bool {
		c, _ := compareSemverVal(tags[i], tags[j])
		return c < 0
	})
	sort.Slice(sections, func(i, j int) bool {
		c, _ := compareSemverVal(sections[i].version, sections[j].version)
		return c < 0
	})
	latest := tags[len(tags)-1]

	tagSet := make(map[string]bool, len(tags))
	for _, tg := range tags {
		tagSet[strings.TrimPrefix(tg, "v")] = true
	}
	sectionSet := make(map[string]bool, len(sections))
	for _, s := range sections {
		sectionSet[s.version] = true
	}

	var (
		findings []Finding
		drifts   []docDriftItem
	)

	// Class 1: a git tag with no matching CHANGELOG section.
	for _, tg := range tags {
		if sectionSet[strings.TrimPrefix(tg, "v")] {
			continue
		}
		findings = append(findings, Finding{
			Path: changelogName,
			Line: 0,
			Msg:  fmt.Sprintf("git tag %s has no CHANGELOG section", tg),
		})
		drifts = append(drifts, docDriftItem{
			title: fmt.Sprintf("git tag %s is not documented in CHANGELOG.md", tg),
			detail: fmt.Sprintf("tag %s exists but CHANGELOG.md has no \"## [%s]\" section — document the release",
				tg, tg),
		})
	}

	// Class 2: a CHANGELOG section with no matching git tag, excluding
	// the release-in-progress section strictly ahead of the latest tag.
	for _, s := range sections {
		if tagSet[s.version] {
			continue
		}
		if c, ok := compareSemverVal(s.version, latest); ok && c > 0 {
			continue
		}
		findings = append(findings, Finding{
			Path: changelogName,
			Line: s.line,
			Msg:  fmt.Sprintf("CHANGELOG section v%s has no matching git tag", s.version),
		})
		drifts = append(drifts, docDriftItem{
			title: fmt.Sprintf("CHANGELOG section v%s has no matching git tag", s.version),
			detail: fmt.Sprintf("CHANGELOG.md:%d documents v%s but no v%s git tag exists — tag the release or correct the section",
				s.line, s.version, s.version),
		})
	}

	if len(findings) == 0 {
		return Result{
			Code:    CodeClean,
			Summary: fmt.Sprintf("CHANGELOG.md and %d git tag(s) agree", len(tags)),
		}, nil
	}

	sort.Slice(findings, func(i, j int) bool {
		if findings[i].Line != findings[j].Line {
			return findings[i].Line < findings[j].Line
		}
		return findings[i].Msg < findings[j].Msg
	})

	// Route one review item per drift to the queue — eeco flags it, the
	// operator reconciles the CHANGELOG against the tags.
	project := filepath.Base(cfg.RepoRoot)
	stateDir := filepath.Join(cfg.Workspace, "state")
	today := time.Now().UTC()
	for _, d := range drifts {
		item := queue.Item{
			Kind:    "doc-drift",
			Title:   d.title,
			Project: project,
			Detail:  d.detail,
			Date:    today,
		}
		// AppendUnique so a repeated run (for example the post-merge hook)
		// does not pile up duplicate items for a drift still open in the
		// queue; the drift itself is still real and reported below.
		if _, err := queue.AppendUnique(stateDir, item); err != nil {
			return Result{}, fmt.Errorf("doc-drift: queue: %w", err)
		}
	}

	return Result{
		Code:     CodeFinding,
		Summary:  fmt.Sprintf("%d CHANGELOG/tag drift(s)", len(findings)),
		Findings: findings,
	}, nil
}

// parseChangelogSections returns one entry per `## [vX.Y.Z]` heading in
// content, in file order, de-duplicated on the version (a malformed
// CHANGELOG repeating a version is reported once). `## [Unreleased]`
// carries no version and is skipped.
func parseChangelogSections(content string) []changelogSection {
	var out []changelogSection
	seen := map[string]bool{}
	for _, m := range changelogHeading.FindAllStringSubmatchIndex(content, -1) {
		version := content[m[2]:m[3]]
		if seen[version] {
			continue
		}
		seen[version] = true
		out = append(out, changelogSection{
			version: version,
			line:    1 + strings.Count(content[:m[0]], "\n"),
		})
	}
	return out
}