Go 404 lines
package docs
import (
"bytes"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
)
// Marker spellings for `eeco docs compact`. Fixed in slice 1; a future
// slice can introduce a config knob for custom markers if a user needs
// it.
const (
startMarker = "<!-- eeco:archive:start -->"
endMarker = "<!-- eeco:archive:end -->"
)
// CompactRegion records one marked region that was (or would be, in
// dry-run) moved to the archive. Line numbers are 1-based and inclusive,
// covering the start marker line through the end marker line.
type CompactRegion struct {
StartLine int
EndLine int
}
// CompactReport summarises a compact run. It is returned in both the
// dry-run and write paths so a CLI caller can render the same summary
// either way.
type CompactReport struct {
Source string
Archive string
Regions []CompactRegion
ArchiveExists bool
DryRun bool
}
// Compact moves every region of source delimited by
// `<!-- eeco:archive:start -->` / `<!-- eeco:archive:end -->` into
// archive, leaving a pointer stub in place at the source (marker mode).
// It is a thin wrapper over the shared compact engine; the regions to
// move are discovered from explicit markers. The public signature is
// unchanged.
func Compact(source, archive string, dryRun bool) (CompactReport, error) {
return compact(source, archive, dryRun, func(raw []byte) ([]CompactRegion, error) {
return scanArchiveRegions(raw)
})
}
// CompactKeepLast moves heading-delimited regions of source into archive
// (heading mode). prefix is a heading-line prefix such as "## Snapshot"
// whose `#` run fixes the section level; the keepLast most-recent
// matching sections (newest first, top of file) are kept and everything
// older is archived. It shares every move mechanic with Compact via the
// compact engine — only region discovery differs. Heading mode refuses
// to run on a source that still carries explicit archive markers (the
// two modes are mutually exclusive).
func CompactKeepLast(source, archive string, dryRun bool, prefix string, keepLast int) (CompactReport, error) {
return compact(source, archive, dryRun, func(raw []byte) ([]CompactRegion, error) {
return scanHeadingRegions(raw, prefix, keepLast)
})
}
// compact is the shared engine behind Compact (marker mode) and
// CompactKeepLast (heading mode). find discovers the regions to move;
// everything downstream — the archive stat, the dry-run / no-region
// early return, splitRegions, appendArchive, and the source rewrite — is
// identical across both modes. Both paths are absolute. The
// repo-relativity check belongs in the CLI layer where the repo root is
// known; this function trusts both paths.
//
// Behaviour:
// - Markers / headings inside fenced code blocks (``` or ~~~) are ignored.
// - Unmatched, nested, or out-of-order markers return an error.
// - With dryRun=true, nothing is written; the report still names every
// region that would move.
// - Re-running with no discoverable regions is an idempotent no-op
// (returns an empty Regions slice and writes nothing).
// - The archive file is created on first run and appended to on later
// runs; the appended content is a deterministic concatenation of the
// cut regions, each preceded by a one-line provenance header.
// - The source-doc trailing newline is preserved exactly.
//
// The header carries no date or wall-clock content so byte output is
// reproducible across runs.
func compact(source, archive string, dryRun bool, find func([]byte) ([]CompactRegion, error)) (CompactReport, error) {
report := CompactReport{
Source: source,
Archive: archive,
DryRun: dryRun,
}
raw, err := os.ReadFile(source)
if err != nil {
return report, fmt.Errorf("read source: %w", err)
}
regions, err := find(raw)
if err != nil {
return report, err
}
report.Regions = regions
if _, err := os.Stat(archive); err == nil {
report.ArchiveExists = true
} else if !errors.Is(err, os.ErrNotExist) {
return report, fmt.Errorf("stat archive: %w", err)
}
if len(regions) == 0 || dryRun {
return report, nil
}
// Source path is given as an absolute path; the archive header
// records the source by basename to keep the header short and avoid
// leaking the operator's local layout. The stub references the
// archive by its path relative to the source's directory so a
// reader can follow the pointer without guessing where the archive
// lives.
sourceTag := filepath.Base(source)
stubTarget, relErr := filepath.Rel(filepath.Dir(source), archive)
if relErr != nil {
stubTarget = filepath.Base(archive)
}
stubTarget = filepath.ToSlash(stubTarget)
archiveAddition, sourceRewrite := splitRegions(raw, regions, sourceTag, stubTarget)
if err := appendArchive(archive, archiveAddition, !report.ArchiveExists); err != nil {
return report, fmt.Errorf("write archive: %w", err)
}
if err := os.WriteFile(source, sourceRewrite, 0o644); err != nil {
return report, fmt.Errorf("rewrite source: %w", err)
}
return report, nil
}
// scanArchiveRegions walks src line-by-line tracking fenced-code state
// and returns every paired start/end region. Markers inside a fence are
// ignored. A start without a matching end, an end without an open start,
// or a second start before the first end is a hard error.
func scanArchiveRegions(src []byte) ([]CompactRegion, error) {
var regions []CompactRegion
inFence := false
openStart := 0 // 1-based line number of the open start marker; 0 = no open start
lines := splitLinesKeepEOL(src)
for i, line := range lines {
lineNo := i + 1
trimmed := strings.TrimRight(line, "\r\n")
// Track fenced code boundaries. The trim handles indented fences too
// (a fence may carry leading whitespace).
stripped := strings.TrimLeft(trimmed, " \t")
if strings.HasPrefix(stripped, "```") || strings.HasPrefix(stripped, "~~~") {
inFence = !inFence
continue
}
if inFence {
continue
}
marker := strings.TrimSpace(trimmed)
switch marker {
case startMarker:
if openStart != 0 {
return nil, fmt.Errorf("%s line %d: nested start marker (previous still open at line %d)", "compact", lineNo, openStart)
}
openStart = lineNo
case endMarker:
if openStart == 0 {
return nil, fmt.Errorf("%s line %d: end marker with no matching start", "compact", lineNo)
}
regions = append(regions, CompactRegion{StartLine: openStart, EndLine: lineNo})
openStart = 0
}
}
if openStart != 0 {
return nil, fmt.Errorf("compact line %d: start marker with no matching end", openStart)
}
return regions, nil
}
// headingSection is one matched heading-mode section: the 1-based line of
// the matched heading and the 1-based line of the boundary that
// terminates it (exclusive — the section spans [start, end)). At EOF the
// terminating boundary is len(lines)+1.
type headingSection struct {
start int
end int
}
// headingLevel returns the ATX-heading level of line (the number of
// leading `#` characters) when line is a heading, or 0 when it is not. A
// heading is a run of one or more `#` at the start of the line (after
// optional leading whitespace) followed by a space or the line end. The
// trailing newline is ignored.
func headingLevel(line string) int {
s := strings.TrimLeft(strings.TrimRight(line, "\r\n"), " \t")
n := 0
for n < len(s) && s[n] == '#' {
n++
}
if n == 0 || (n < len(s) && s[n] != ' ') {
return 0
}
return n
}
// scanHeadingRegions discovers archivable regions by heading rather than
// by explicit markers. prefix is a heading-line prefix such as
// "## Snapshot"; its `#` run fixes the section level L. A *matched*
// section opens at a heading of exactly level L whose trimmed text has
// the given prefix, and runs until the next *boundary* heading (any
// heading of level <= L) or EOF — so a section can never swallow a later
// same-or-higher heading such as a live "## Next session" tail. The N
// most-recent matched sections (newest first, i.e. topmost in the file)
// are kept; everything older is archivable, and adjacent archivable
// sections coalesce into one CompactRegion per contiguous run. Headings
// inside fenced code blocks are ignored, mirroring scanArchiveRegions.
//
// Heading mode is mutually exclusive with explicit markers: if the
// source already contains a paired archive-marker region, this returns
// an error rather than silently mixing the two schemes.
func scanHeadingRegions(src []byte, prefix string, keepLast int) ([]CompactRegion, error) {
level := headingLevel(prefix)
if level == 0 {
return nil, fmt.Errorf("compact: --heading %q is not a markdown heading (expected a leading '#' run, e.g. \"## Snapshot\")", prefix)
}
if keepLast < 0 {
return nil, fmt.Errorf("compact: --keep-last must be >= 0 (got %d)", keepLast)
}
// Any explicit archive markers (a complete pair, or even a malformed
// unmatched/nested one) mean the source is set up for marker mode;
// refuse rather than silently mix the two schemes. Inline prose
// mentions are unaffected — scanArchiveRegions only matches standalone
// marker lines.
if markers, err := scanArchiveRegions(src); err != nil || len(markers) > 0 {
return nil, errors.New("source contains explicit archive markers; remove them or drop --keep-last")
}
wantPrefix := strings.TrimSpace(prefix)
lines := splitLinesKeepEOL(src)
var matched []headingSection
openStart := 0 // 1-based line of the currently open matched section; 0 = none
inFence := false
for i, line := range lines {
lineNo := i + 1
stripped := strings.TrimLeft(strings.TrimRight(line, "\r\n"), " \t")
if strings.HasPrefix(stripped, "```") || strings.HasPrefix(stripped, "~~~") {
inFence = !inFence
continue
}
if inFence {
continue
}
lvl := headingLevel(line)
if lvl == 0 || lvl > level {
continue // body line (a deeper heading does not split the section)
}
// A boundary heading (lvl <= level) closes any open matched section.
if openStart != 0 {
matched = append(matched, headingSection{start: openStart, end: lineNo})
openStart = 0
}
// The boundary is itself a new matched section only when it is at
// exactly level L and carries the prefix.
if lvl == level && strings.HasPrefix(strings.TrimSpace(line), wantPrefix) {
openStart = lineNo
}
}
if openStart != 0 {
matched = append(matched, headingSection{start: openStart, end: len(lines) + 1})
}
if keepLast >= len(matched) {
return nil, nil // nothing older than the kept window — idempotent no-op
}
archivable := matched[keepLast:] // newest-on-top: keep the first keepLast
// Coalesce adjacent archivable sections (section_i.end == the next
// section's start) into maximal contiguous runs; each run is one
// region whose EndLine is the last line before its terminating
// boundary.
var regions []CompactRegion
for i := 0; i < len(archivable); {
runStart := archivable[i].start
runEnd := archivable[i].end
j := i + 1
for j < len(archivable) && archivable[j].start == runEnd {
runEnd = archivable[j].end
j++
}
regions = append(regions, CompactRegion{StartLine: runStart, EndLine: runEnd - 1})
i = j
}
return regions, nil
}
// splitRegions partitions src into (archiveBytes, sourceBytes) using the
// pre-validated regions. Each cut region (markers + body) is appended to
// archiveBytes after a one-line provenance header. The same region is
// replaced in sourceBytes with a single-line pointer stub that names the
// archive destination.
func splitRegions(src []byte, regions []CompactRegion, sourceTag, stubTarget string) (archiveAddition, sourceRewrite []byte) {
lines := splitLinesKeepEOL(src)
newline := dominantNewline(lines)
stub := fmt.Sprintf("> _archived to `%s` (eeco docs compact)._%s", stubTarget, newline)
var archive bytes.Buffer
var out bytes.Buffer
cursor := 0
for _, r := range regions {
for ; cursor < r.StartLine-1; cursor++ {
out.WriteString(lines[cursor])
}
out.WriteString(stub)
archive.WriteString("<!-- archived from ")
archive.WriteString(sourceTag)
archive.WriteString(" -->")
archive.WriteString(newline)
for j := r.StartLine - 1; j < r.EndLine; j++ {
archive.WriteString(lines[j])
}
// Guarantee a blank line between consecutive archive blocks. If
// the cut content already ended with a newline (the end-marker
// line normally does), one extra newline is enough; if it did
// not, add two.
last := lines[r.EndLine-1]
if !strings.HasSuffix(last, "\n") {
archive.WriteString(newline)
}
archive.WriteString(newline)
cursor = r.EndLine
}
for ; cursor < len(lines); cursor++ {
out.WriteString(lines[cursor])
}
return archive.Bytes(), out.Bytes()
}
// dominantNewline picks the newline style used most often in lines, with
// a "\n" fallback for files with no newlines at all.
func dominantNewline(lines []string) string {
crlf, lf := 0, 0
for _, line := range lines {
switch {
case strings.HasSuffix(line, "\r\n"):
crlf++
case strings.HasSuffix(line, "\n"):
lf++
}
}
if crlf > lf {
return "\r\n"
}
return "\n"
}
// splitLinesKeepEOL returns the lines of src with their trailing newline
// (LF or CRLF) preserved. An unterminated final line is returned as-is.
func splitLinesKeepEOL(src []byte) []string {
var lines []string
for len(src) > 0 {
i := bytes.IndexByte(src, '\n')
if i < 0 {
lines = append(lines, string(src))
break
}
lines = append(lines, string(src[:i+1]))
src = src[i+1:]
}
return lines
}
// appendArchive appends content to archive, creating the file (and any
// parent directories) on first write. When the archive already exists,
// a single blank line is written between the prior content and the new
// content so successive runs do not glue blocks together visually.
func appendArchive(archive string, content []byte, createNew bool) error {
if createNew {
if err := os.MkdirAll(filepath.Dir(archive), 0o755); err != nil {
return err
}
return os.WriteFile(archive, content, 0o644)
}
existing, err := os.ReadFile(archive)
if err != nil {
return err
}
var buf bytes.Buffer
buf.Write(existing)
if len(existing) > 0 && !bytes.HasSuffix(existing, []byte("\n")) {
buf.WriteByte('\n')
}
if len(existing) > 0 {
buf.WriteByte('\n')
}
buf.Write(content)
return os.WriteFile(archive, buf.Bytes(), 0o644)
}