Go 405 lines
package projecttype
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"slices"
"sort"
"strings"
)
// DefaultThreshold is the deterministic-confidence floor at or above
// which Detect accepts the marker-scan result without prompting. It
// backs the init_detection_threshold config key.
const DefaultThreshold = 0.7
// minAIConfidence is the floor below which an AI-fallback classification
// is rejected: the result is re-offered to the operator (when a Prompter
// is available) or degraded to generic.
const minAIConfidence = 0.5
// Source records which pipeline layer produced a Result.
type Source string
const (
SourceMarker Source = "marker-scan"
SourceFlag Source = "type-flag"
SourceInteractive Source = "interactive-prompt"
SourceAI Source = "ai-fallback"
SourceFallback Source = "generic-fallback"
)
// Result is the outcome of Detect.
type Result struct {
Category Category
// Confidence is the marker-scan confidence in [0,1] for a
// deterministic result. An operator pick or a forced --type is 1.0; an
// AI result carries the model's reported confidence.
Confidence float64
// Dirs is the knowledge-directory set to scaffold: the catalog dirs
// for Category, plus any AI-proposed deviations when Source is
// SourceAI.
Dirs []string
Source Source
Justification string
}
// Prompter asks the operator to resolve an ambiguous detection. A nil
// Prompter makes Detect non-interactive (layer 3 is skipped).
type Prompter interface {
// Pick presents the candidate categories best-first and the catalog
// (for descriptions and the generic escape) and returns the operator's
// choice. When describe is true the operator asked to describe the
// project freely; freeText carries that description and Detect routes
// to the AI layer. A non-nil error aborts detection.
Pick(candidates []Category, cat *Catalog) (choice Category, describe bool, freeText string, err error)
}
// AIFunc runs one gated AI pass and returns the model's raw text. A nil
// AIFunc means no AI fallback is available and Detect degrades to
// generic where the pipeline would otherwise call it.
type AIFunc func(ctx context.Context, prompt string) (string, error)
// Options configures one Detect call.
type Options struct {
// RepoRoot is the directory the deterministic layers scan.
RepoRoot string
// Threshold overrides DefaultThreshold when > 0.
Threshold float64
// Forced short-circuits the whole pipeline with an operator-supplied
// --type value. An unknown value is an error.
Forced Category
// ForceAI routes straight to the AI layer (the --ai flag), skipping
// the deterministic accept and the interactive prompt.
ForceAI bool
// Prompter resolves ambiguity interactively; nil disables layer 3.
Prompter Prompter
// AI runs the layer-4 fallback; nil disables layer 4.
AI AIFunc
}
func (o Options) threshold() float64 {
if o.Threshold > 0 {
return o.Threshold
}
return DefaultThreshold
}
// Detect classifies opt.RepoRoot through the four-layer pipeline and
// returns the resolved category and its scaffold dir-set. It never
// errors on an unclassifiable tree: the terminal fallback is generic.
func Detect(ctx context.Context, cat *Catalog, opt Options) (Result, error) {
if cat == nil {
return Result{}, fmt.Errorf("nil catalog")
}
if opt.Forced != "" {
if !cat.Has(opt.Forced) {
return Result{}, fmt.Errorf("unknown project type %q", opt.Forced)
}
return Result{
Category: opt.Forced,
Confidence: 1.0,
Dirs: cat.DirsFor(opt.Forced),
Source: SourceFlag,
}, nil
}
if opt.ForceAI {
return aiLayer(ctx, cat, opt, "")
}
scores := scoreRepo(opt.RepoRoot)
top, second := topTwo(scores)
conf := confidence(scores[top], scores[second])
if top != "" && conf >= opt.threshold() {
return Result{
Category: top,
Confidence: conf,
Dirs: cat.DirsFor(top),
Source: SourceMarker,
}, nil
}
candidates := rankedCandidates(scores)
if opt.Prompter != nil {
choice, describe, freeText, err := opt.Prompter.Pick(candidates, cat)
if err != nil {
return Result{}, err
}
if describe {
return aiLayer(ctx, cat, opt, freeText)
}
if !cat.Has(choice) {
return Result{}, fmt.Errorf("operator chose unknown project type %q", choice)
}
return Result{
Category: choice,
Confidence: 1.0,
Dirs: cat.DirsFor(choice),
Source: SourceInteractive,
}, nil
}
// Non-interactive: accept the best deterministic guess if there is
// one, otherwise fall back to generic.
if top != "" {
return Result{
Category: top,
Confidence: conf,
Dirs: cat.DirsFor(top),
Source: SourceMarker,
}, nil
}
return genericResult(cat, "no marker or convention identified the project"), nil
}
// aiLayer runs the layer-4 fallback. It degrades to generic when no
// AIFunc is wired, the call fails, the response is malformed, or the
// reported confidence is below minAIConfidence and no Prompter can
// re-offer the top candidates.
func aiLayer(ctx context.Context, cat *Catalog, opt Options, desc string) (Result, error) {
if opt.AI == nil {
return genericResult(cat, "AI fallback not configured"), nil
}
tree := topLevelEntries(opt.RepoRoot)
prompt, err := buildDetectPrompt(cat, tree, desc)
if err != nil {
return genericResult(cat, "AI fallback prompt build failed: "+err.Error()), nil
}
raw, err := opt.AI(ctx, prompt)
if err != nil {
return genericResult(cat, "AI fallback unavailable: "+err.Error()), nil
}
parsed, ok := parseAIDetect(raw)
if !ok || !cat.Has(Category(parsed.Category)) {
return genericResult(cat, "AI fallback returned no usable classification"), nil
}
chosen := Category(parsed.Category)
if parsed.Confidence < minAIConfidence {
if opt.Prompter != nil {
choice, describe, _, perr := opt.Prompter.Pick(topThree(cat, parsed), cat)
if perr != nil {
return Result{}, perr
}
if !describe && cat.Has(choice) {
return Result{
Category: choice,
Confidence: 1.0,
Dirs: cat.DirsFor(choice),
Source: SourceInteractive,
}, nil
}
}
return genericResult(cat, "AI fallback confidence too low"), nil
}
return Result{
Category: chosen,
Confidence: clamp01(parsed.Confidence),
Dirs: mergeDirs(cat.DirsFor(chosen), parsed.Dirs),
Source: SourceAI,
Justification: strings.TrimSpace(parsed.Justification),
}, nil
}
func genericResult(cat *Catalog, why string) Result {
return Result{
Category: Generic,
Confidence: 0,
Dirs: cat.DirsFor(Generic),
Source: SourceFallback,
Justification: why,
}
}
// scoreRepo accumulates per-category votes from the marker-file scan
// (layer 1) and the conventional-directory scan (layer 2).
func scoreRepo(repoRoot string) map[Category]float64 {
scores := make(map[Category]float64)
if repoRoot == "" {
return scores
}
for marker, votes := range markerRules {
if rootHas(repoRoot, marker) {
for _, v := range votes {
scores[v.cat] += v.weight
}
}
}
for dir, votes := range signalRules {
if rootHasDir(repoRoot, dir) {
for _, v := range votes {
scores[v.cat] += v.weight
}
}
}
return scores
}
// confidence is the share of the winning score over itself plus the
// runner-up: 1.0 when only one category scores, lower as the runner-up
// closes in. It deliberately ignores the long tail of small votes so a
// clear leader is not diluted by many partial matches.
func confidence(top, second float64) float64 {
if top <= 0 {
return 0
}
return top / (top + second)
}
func topTwo(scores map[Category]float64) (top, second Category) {
var topV, secondV float64
for _, cat := range sortedCats(scores) {
v := scores[cat]
switch {
case v > topV:
second, secondV = top, topV
top, topV = cat, v
case v > secondV:
second, secondV = cat, v
}
}
return top, second
}
func rankedCandidates(scores map[Category]float64) []Category {
cats := sortedCats(scores)
sort.SliceStable(cats, func(i, j int) bool {
return scores[cats[i]] > scores[cats[j]]
})
out := make([]Category, 0, len(cats))
for _, c := range cats {
if scores[c] > 0 {
out = append(out, c)
}
}
return out
}
// sortedCats returns the scored categories in deterministic name order
// so the score walk and tie-breaks do not depend on map iteration order.
func sortedCats(scores map[Category]float64) []Category {
out := make([]Category, 0, len(scores))
for c := range scores {
out = append(out, c)
}
slices.Sort(out)
return out
}
func rootHas(repoRoot, marker string) bool {
if strings.ContainsAny(marker, "*?[") {
matches, err := filepath.Glob(filepath.Join(repoRoot, marker))
return err == nil && len(matches) > 0
}
_, err := os.Stat(filepath.Join(repoRoot, marker))
return err == nil
}
func rootHasDir(repoRoot, name string) bool {
info, err := os.Stat(filepath.Join(repoRoot, name))
return err == nil && info.IsDir()
}
func topLevelEntries(repoRoot string) []string {
var names []string
ents, err := os.ReadDir(repoRoot)
if err != nil {
return names
}
for _, e := range ents {
if e.Name() == ".git" {
continue
}
name := e.Name()
if e.IsDir() {
name += "/"
}
names = append(names, name)
}
sort.Strings(names)
return names
}
func clamp01(v float64) float64 {
switch {
case v < 0:
return 0
case v > 1:
return 1
default:
return v
}
}
// mergeDirs returns base with any extra dirs appended that are not
// already present, preserving order. It backs the AI layer's bounded
// "propose deviations to the dir-set" affordance.
func mergeDirs(base, extra []string) []string {
seen := make(map[string]struct{}, len(base))
out := make([]string, 0, len(base)+len(extra))
for _, d := range base {
seen[d] = struct{}{}
out = append(out, d)
}
for _, d := range extra {
d = strings.TrimSpace(d)
if d == "" {
continue
}
if _, dup := seen[d]; dup {
continue
}
seen[d] = struct{}{}
out = append(out, d)
}
return out
}
type aiDetect struct {
Category string `json:"category"`
Confidence float64 `json:"confidence"`
Dirs []string `json:"dirs"`
Justification string `json:"justification"`
Deviations []string `json:"deviations"`
}
// parseAIDetect extracts the first JSON object from the model's text
// (which may wrap it in prose or a code fence) and unmarshals it.
func parseAIDetect(raw string) (aiDetect, bool) {
start := strings.IndexByte(raw, '{')
end := strings.LastIndexByte(raw, '}')
if start < 0 || end < start {
return aiDetect{}, false
}
var d aiDetect
if err := json.Unmarshal([]byte(raw[start:end+1]), &d); err != nil {
return aiDetect{}, false
}
d.Dirs = mergeDirs(d.Dirs, d.Deviations)
return d, true
}
// topThree returns the AI-chosen category (when known) plus other known
// categories, capped at three, for an operator re-prompt.
func topThree(cat *Catalog, d aiDetect) []Category {
var out []Category
if cat.Has(Category(d.Category)) {
out = append(out, Category(d.Category))
}
for _, c := range cat.Categories() {
if len(out) >= 3 {
break
}
if c == Generic || (len(out) > 0 && c == out[0]) {
continue
}
out = append(out, c)
}
return out
}