Go 222 lines
package workflow
import (
"errors"
"fmt"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"time"
"github.com/ajhahnde/eeco/internal/gitx"
"github.com/ajhahnde/eeco/internal/queue"
)
// docDrift flags drift between the release sections documented in
// CHANGELOG.md and the project's git tags. Two drift classes:
//
// - a `vX.Y.Z` git tag with no `## [vX.Y.Z]` CHANGELOG section — the
// release happened but was never documented;
// - a `## [vX.Y.Z]` CHANGELOG section with no matching git tag — a
// documented release that was never tagged. The newest section is
// exempt: a release commit adds `## [vX.Y.Z]` before the `vX.Y.Z`
// tag is pushed, so a section strictly ahead of the latest tag is
// the expected release-in-progress state, not drift (mirrors
// version-sync's tag-anchor forward-drift allowance).
//
// Drift is reported and one review item per drift is routed to the
// queue (the single decision channel); the operator reconciles the
// CHANGELOG or the tag, eeco never edits either. This is the doc-vs-tags
// slice of the stale-state / drift detection family, sibling to
// memory-drift.
type docDrift struct{}
func (docDrift) Name() string { return "doc-drift" }
func (docDrift) Summary() string {
return "flag drift between CHANGELOG.md release sections and git tags"
}
// docDriftTagSource resolves the project's semver-shaped tags. It is
// overridable in tests; it defaults to gitx.SemverTags.
var docDriftTagSource = gitx.SemverTags
// changelogHeading matches a Keep-a-Changelog version section heading
// `## [vX.Y.Z]`; the optional `v` is tolerated and group 1 captures the
// bare X.Y.Z. `## [Unreleased]` carries no version and never matches.
var changelogHeading = regexp.MustCompile(`(?m)^##\s+\[v?(\d+\.\d+\.\d+)\]`)
// changelogSection is one `## [vX.Y.Z]` heading found in CHANGELOG.md.
type changelogSection struct {
version string // bare X.Y.Z
line int // 1-based heading line
}
// docDriftItem is one drift carried from detection to the queue-append
// loop.
type docDriftItem struct {
title string
detail string
}
func (docDrift) Run(env Env) (Result, error) {
cfg := env.Config
// The whole check compares against git tags, so a host without git
// cannot run it — report blocked (contract code 2) rather than
// passing a check that never actually ran.
if !gitx.Available() {
return Result{Code: CodeBlocked, Summary: "git not available on PATH"}, nil
}
const changelogName = "CHANGELOG.md"
abs := filepath.Join(cfg.RepoRoot, changelogName)
b, err := os.ReadFile(abs)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
// No CHANGELOG to check — a clean no-op, like memory-drift's
// "no facts" and version-sync's "no version_locations".
return Result{Code: CodeClean, Summary: "no CHANGELOG.md to check"}, nil
}
return Result{}, fmt.Errorf("doc-drift: read %s: %w", changelogName, err)
}
sections := parseChangelogSections(string(b))
tags, err := docDriftTagSource(cfg.RepoRoot)
if err != nil {
return Result{}, fmt.Errorf("doc-drift: resolve tags: %w", err)
}
if len(tags) == 0 {
// A project with a CHANGELOG but no releases yet has nothing to
// drift against — clean no-op.
return Result{Code: CodeClean, Summary: "no git tags to check against"}, nil
}
// Sort both sides ascending by semver so detection is deterministic.
sort.Slice(tags, func(i, j int) bool {
c, _ := compareSemverVal(tags[i], tags[j])
return c < 0
})
sort.Slice(sections, func(i, j int) bool {
c, _ := compareSemverVal(sections[i].version, sections[j].version)
return c < 0
})
latest := tags[len(tags)-1]
tagSet := make(map[string]bool, len(tags))
for _, tg := range tags {
tagSet[strings.TrimPrefix(tg, "v")] = true
}
sectionSet := make(map[string]bool, len(sections))
for _, s := range sections {
sectionSet[s.version] = true
}
var (
findings []Finding
drifts []docDriftItem
)
// Class 1: a git tag with no matching CHANGELOG section.
for _, tg := range tags {
if sectionSet[strings.TrimPrefix(tg, "v")] {
continue
}
findings = append(findings, Finding{
Path: changelogName,
Line: 0,
Msg: fmt.Sprintf("git tag %s has no CHANGELOG section", tg),
})
drifts = append(drifts, docDriftItem{
title: fmt.Sprintf("git tag %s is not documented in CHANGELOG.md", tg),
detail: fmt.Sprintf("tag %s exists but CHANGELOG.md has no \"## [%s]\" section — document the release",
tg, tg),
})
}
// Class 2: a CHANGELOG section with no matching git tag, excluding
// the release-in-progress section strictly ahead of the latest tag.
for _, s := range sections {
if tagSet[s.version] {
continue
}
if c, ok := compareSemverVal(s.version, latest); ok && c > 0 {
continue
}
findings = append(findings, Finding{
Path: changelogName,
Line: s.line,
Msg: fmt.Sprintf("CHANGELOG section v%s has no matching git tag", s.version),
})
drifts = append(drifts, docDriftItem{
title: fmt.Sprintf("CHANGELOG section v%s has no matching git tag", s.version),
detail: fmt.Sprintf("CHANGELOG.md:%d documents v%s but no v%s git tag exists — tag the release or correct the section",
s.line, s.version, s.version),
})
}
if len(findings) == 0 {
return Result{
Code: CodeClean,
Summary: fmt.Sprintf("CHANGELOG.md and %d git tag(s) agree", len(tags)),
}, nil
}
sort.Slice(findings, func(i, j int) bool {
if findings[i].Line != findings[j].Line {
return findings[i].Line < findings[j].Line
}
return findings[i].Msg < findings[j].Msg
})
// Route one review item per drift to the queue — eeco flags it, the
// operator reconciles the CHANGELOG against the tags.
project := filepath.Base(cfg.RepoRoot)
stateDir := filepath.Join(cfg.Workspace, "state")
today := time.Now().UTC()
for _, d := range drifts {
item := queue.Item{
Kind: "doc-drift",
Title: d.title,
Project: project,
Detail: d.detail,
Date: today,
}
// AppendUnique so a repeated run (for example the post-merge hook)
// does not pile up duplicate items for a drift still open in the
// queue; the drift itself is still real and reported below.
if _, err := queue.AppendUnique(stateDir, item); err != nil {
return Result{}, fmt.Errorf("doc-drift: queue: %w", err)
}
}
return Result{
Code: CodeFinding,
Summary: fmt.Sprintf("%d CHANGELOG/tag drift(s)", len(findings)),
Findings: findings,
}, nil
}
// parseChangelogSections returns one entry per `## [vX.Y.Z]` heading in
// content, in file order, de-duplicated on the version (a malformed
// CHANGELOG repeating a version is reported once). `## [Unreleased]`
// carries no version and is skipped.
func parseChangelogSections(content string) []changelogSection {
var out []changelogSection
seen := map[string]bool{}
for _, m := range changelogHeading.FindAllStringSubmatchIndex(content, -1) {
version := content[m[2]:m[3]]
if seen[version] {
continue
}
seen[version] = true
out = append(out, changelogSection{
version: version,
line: 1 + strings.Count(content[:m[0]], "\n"),
})
}
return out
}