Go 171 lines
package queue
import (
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"strconv"
"strings"
"time"
)
// LockName is the presence-based lock file written next to queue.md
// while Append is mid-flight. It is portable (no flock / fcntl) and
// recovers from a crashed eeco via the staleness check in acquireLock.
const LockName = ".queue.lock"
// ErrLocked is returned by Append when the queue lock is already held
// by another live eeco invocation in the same workspace. Callers that
// must persist their item propagate it so the top-level CLI surfaces a
// non-zero exit with a clear message; "fire-and-forget" sites already
// discard via `_ = queue.Append(...)`. The lock guarantees the queue
// file is never partially written: a contender either holds the lock
// and writes cleanly, or fails up-front without touching queue.md.
var ErrLocked = errors.New("queue is locked by another eeco invocation")
// staleLockAge is the conservative window after which a lock left over
// by a crashed run is considered abandoned. Append itself is
// millisecond-scale, so any healthy run releases the lock long before
// this; only a hard crash mid-write reaches the takeover path.
const staleLockAge = 5 * time.Minute
// acquireLock claims <stateDir>/.queue.lock for the lifetime of one
// Append. On success it returns a release closure that removes the
// lock; the caller defers it. On contention it returns ErrLocked.
// A stale lock (older than staleLockAge or owned by a dead PID on
// Unix) is forcibly reclaimed once before the takeover is reported as
// ErrLocked.
func acquireLock(stateDir string) (release func(), err error) {
lockPath := filepath.Join(stateDir, LockName)
claimed, err := tryClaim(lockPath)
if err != nil {
return func() {}, err
}
if claimed {
return func() { removeLock(lockPath) }, nil
}
if !lockIsStale(lockPath) {
return func() {}, ErrLocked
}
if err := os.Remove(lockPath); err != nil && !errors.Is(err, fs.ErrNotExist) {
return func() {}, fmt.Errorf("queue.lock: remove stale: %w", err)
}
claimed, err = tryClaim(lockPath)
if err != nil {
return func() {}, err
}
if !claimed {
return func() {}, ErrLocked
}
return func() { removeLock(lockPath) }, nil
}
// tryClaim makes the exclusive O_CREATE|O_EXCL claim on the lock file
// and writes its contents on success. It returns:
//
// (true, nil) — the lock was created cleanly and is now held;
// (false, nil) — the lock already exists (genuine contention), so the
// caller should run the staleness check;
// (false, err) — a real I/O error.
//
// On Windows a lock file in "delete pending" state — a contender just
// released it while a staleness probe (readLockPID) still held an open
// read handle — makes CreateFile reject the exclusive create with
// ERROR_ACCESS_DENIED rather than reporting the file as existing. That
// window clears in microseconds once the stray handle closes, so the
// create is retried briefly; if it never clears it is reported as
// contention so the caller falls through to the staleness path instead
// of erroring out. On Unix isPendingDelete is always false and the loop
// runs exactly once.
func tryClaim(lockPath string) (bool, error) {
deadline := time.Now().Add(2 * time.Second)
for {
f, err := os.OpenFile(lockPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o644)
if err == nil {
writeLockContents(f)
_ = f.Close()
return true, nil
}
if errors.Is(err, fs.ErrExist) {
return false, nil
}
if isPendingDelete(err) {
if time.Now().Before(deadline) {
time.Sleep(20 * time.Millisecond)
continue
}
return false, nil
}
return false, fmt.Errorf("queue.lock: %w", err)
}
}
// removeLock deletes the lock file, retrying briefly past the transient
// sharing violation Windows raises when a contender's staleness probe
// (readLockPID's os.ReadFile) holds an open handle at the moment of
// deletion. Go opens files without FILE_SHARE_DELETE on Windows, so a
// concurrent reader blocks DeleteFile until its handle closes —
// microseconds later. Without the retry the releaser's os.Remove fails,
// the lock file is orphaned, and the queue stays locked out until the
// staleLockAge window elapses. On Unix the first Remove always wins.
func removeLock(path string) {
deadline := time.Now().Add(2 * time.Second)
for {
err := os.Remove(path)
if err == nil || errors.Is(err, fs.ErrNotExist) {
return
}
if time.Now().After(deadline) {
return
}
time.Sleep(20 * time.Millisecond)
}
}
func writeLockContents(f *os.File) {
_, _ = fmt.Fprintf(f, "pid=%d\ntime=%s\n", os.Getpid(), time.Now().UTC().Format(time.RFC3339))
}
// lockIsStale returns true when the existing lock at path is safe to
// reclaim: either older than staleLockAge by mtime, or owned by a PID
// that no longer exists (Unix only — see processAlive).
func lockIsStale(path string) bool {
info, err := os.Stat(path)
if err != nil {
return false
}
if time.Since(info.ModTime()) > staleLockAge {
return true
}
pid, ok := readLockPID(path)
if !ok {
return false
}
return !processAlive(pid)
}
func readLockPID(path string) (int, bool) {
b, err := os.ReadFile(path)
if err != nil {
return 0, false
}
for line := range strings.SplitSeq(string(b), "\n") {
v, ok := strings.CutPrefix(strings.TrimSpace(line), "pid=")
if !ok {
continue
}
pid, err := strconv.Atoi(strings.TrimSpace(v))
if err != nil || pid <= 0 {
return 0, false
}
return pid, true
}
return 0, false
}