ajhahn.de
← eeco
Go 133 lines
package workflow

import (
	"fmt"
	"regexp"
	"sort"
	"strings"
)

// Signal kinds emitted by ComputeSignals. Only commit-type ships
// today; future signal kinds (e.g. recurring file touches) need a new
// gitx read surface and are deferred to a later slice.
const (
	SignalCommitType = "commit-type"
)

// Thresholds for the deterministic evolve pass. minCommitTypeOccurrences
// is the floor on how often a commit-type must appear in the recent
// history before it counts as a repetition signal; the value is
// deliberately small (3) so a one-off backport or release run does not
// trip it. maxDeterministicCandidates caps the surfaced list so a noisy
// history cannot flood the queue.
const (
	minCommitTypeOccurrences   = 3
	maxDeterministicCandidates = 5
)

// conventionalCommitRE matches the leading `<type>(<scope>)?!?:` of a
// conventional-commit subject. Group 1 captures the type — lower-case
// letters only, per the spec's recommended shape — and is the only
// portion eeco's deterministic pass tallies.
var conventionalCommitRE = regexp.MustCompile(`^([a-z]+)(?:\([^)]*\))?!?:\s`)

// Signal is one observation about the recent history: a repeated
// commit-type, a repeated file touch (future), etc. Kind names the
// signal class, Key the specific value (e.g. "fix"), Count the number
// of occurrences in the inspected window.
type Signal struct {
	Kind  string
	Key   string
	Count int
}

// Candidate is one proposed workflow the deterministic pass surfaces.
// Title is the suggested workflow name (always satisfies the workflow
// name regex Scaffold enforces). Reason is a one-line human-readable
// explanation that becomes the queue item's detail line. Signals are
// the underlying observations that justified the candidate.
type Candidate struct {
	Title   string
	Reason  string
	Signals []Signal
}

// ComputeSignals scans `git log --oneline` lines for repeated
// conventional-commit types. Each input line is the bare
// `<short-sha> <subject>` shape gitx.ChangesSince returns; lines that
// do not parse as conventional-commit subjects are ignored. The result
// is sorted descending by Count, then ascending by Key, so the output
// is stable across runs over the same input.
func ComputeSignals(logLines []string) []Signal {
	counts := map[string]int{}
	for _, line := range logLines {
		subject := extractSubject(line)
		if subject == "" {
			continue
		}
		m := conventionalCommitRE.FindStringSubmatch(subject)
		if m == nil {
			continue
		}
		counts[m[1]]++
	}

	signals := make([]Signal, 0, len(counts))
	for k, n := range counts {
		if n < minCommitTypeOccurrences {
			continue
		}
		signals = append(signals, Signal{Kind: SignalCommitType, Key: k, Count: n})
	}
	sort.Slice(signals, func(i, j int) bool {
		if signals[i].Count != signals[j].Count {
			return signals[i].Count > signals[j].Count
		}
		return signals[i].Key < signals[j].Key
	})
	return signals
}

// ProposeCandidates turns commit-type signals into workflow proposals,
// one candidate per signal. Order follows ComputeSignals (count desc,
// key asc); the output is capped at maxDeterministicCandidates.
// Candidate titles are constructed so they always satisfy the workflow
// name regex Scaffold enforces — a malformed type (which the regex on
// ComputeSignals already filters out) would be dropped here too.
func ProposeCandidates(signals []Signal) []Candidate {
	out := make([]Candidate, 0, len(signals))
	for _, s := range signals {
		if len(out) >= maxDeterministicCandidates {
			break
		}
		if s.Kind != SignalCommitType {
			continue
		}
		name := s.Key + "-workflow"
		if !workflowNameRE.MatchString(name) {
			continue
		}
		out = append(out, Candidate{
			Title:   name,
			Reason:  fmt.Sprintf("repeated commit-type %q (%d occurrences in recent history)", s.Key, s.Count),
			Signals: []Signal{s},
		})
	}
	return out
}

// extractSubject splits a `git log --oneline` line into its subject
// portion. The format is `<short-sha> <subject>`; an empty or
// SHA-only line returns the empty string.
func extractSubject(line string) string {
	line = strings.TrimSpace(line)
	if line == "" {
		return ""
	}
	_, rest, ok := strings.Cut(line, " ")
	if !ok {
		return ""
	}
	return strings.TrimSpace(rest)
}