ScuttleBot

feat: automatic stale agent cleanup with configurable reap_after_days - Add Reap() to registry — removes agents not seen in N days - Hourly reaper goroutine reads reap_after_days from agent policy - Revoked agents are reaped too if past the cutoff - Online agents are never reaped - Setting exposed in Settings UI → Agent Policy → "reap after days" - Default 0 (disabled) Closes #49

lmata 2026-04-03 22:18 trunk
Commit cd79584f050b25cb81b8c5dcd9b6d2d905c65824fc29038bf4d3a7633a9ff72f
--- cmd/scuttlebot/main.go
+++ cmd/scuttlebot/main.go
@@ -294,10 +294,30 @@
294294
Config: b.Config,
295295
}
296296
}
297297
botMgr.Sync(ctx, specs)
298298
}
299
+
300
+ // Agent reaper — periodically removes stale agents based on policy.
301
+ go func() {
302
+ ticker := time.NewTicker(1 * time.Hour)
303
+ defer ticker.Stop()
304
+ for {
305
+ select {
306
+ case <-ctx.Done():
307
+ return
308
+ case <-ticker.C:
309
+ p := policyStore.Get()
310
+ if p.AgentPolicy.ReapAfterDays > 0 {
311
+ maxAge := time.Duration(p.AgentPolicy.ReapAfterDays) * 24 * time.Hour
312
+ if n := reg.Reap(maxAge); n > 0 {
313
+ log.Info("reaped stale agents", "count", n, "max_age_days", p.AgentPolicy.ReapAfterDays)
314
+ }
315
+ }
316
+ }
317
+ }
318
+ }()
299319
300320
// Config store — owns write-back to scuttlebot.yaml with history snapshots.
301321
cfgStore := api.NewConfigStore(*configPath, *cfg)
302322
cfgStore.OnChange(func(updated config.Config) {
303323
// Hot-reload topology on config change.
304324
--- cmd/scuttlebot/main.go
+++ cmd/scuttlebot/main.go
@@ -294,10 +294,30 @@
294 Config: b.Config,
295 }
296 }
297 botMgr.Sync(ctx, specs)
298 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
300 // Config store — owns write-back to scuttlebot.yaml with history snapshots.
301 cfgStore := api.NewConfigStore(*configPath, *cfg)
302 cfgStore.OnChange(func(updated config.Config) {
303 // Hot-reload topology on config change.
304
--- cmd/scuttlebot/main.go
+++ cmd/scuttlebot/main.go
@@ -294,10 +294,30 @@
294 Config: b.Config,
295 }
296 }
297 botMgr.Sync(ctx, specs)
298 }
299
300 // Agent reaper — periodically removes stale agents based on policy.
301 go func() {
302 ticker := time.NewTicker(1 * time.Hour)
303 defer ticker.Stop()
304 for {
305 select {
306 case <-ctx.Done():
307 return
308 case <-ticker.C:
309 p := policyStore.Get()
310 if p.AgentPolicy.ReapAfterDays > 0 {
311 maxAge := time.Duration(p.AgentPolicy.ReapAfterDays) * 24 * time.Hour
312 if n := reg.Reap(maxAge); n > 0 {
313 log.Info("reaped stale agents", "count", n, "max_age_days", p.AgentPolicy.ReapAfterDays)
314 }
315 }
316 }
317 }
318 }()
319
320 // Config store — owns write-back to scuttlebot.yaml with history snapshots.
321 cfgStore := api.NewConfigStore(*configPath, *cfg)
322 cfgStore.OnChange(func(updated config.Config) {
323 // Hot-reload topology on config change.
324
--- internal/api/policies.go
+++ internal/api/policies.go
@@ -29,10 +29,11 @@
2929
type AgentPolicy struct {
3030
RequireCheckin bool `json:"require_checkin"`
3131
CheckinChannel string `json:"checkin_channel"`
3232
RequiredChannels []string `json:"required_channels"`
3333
OnlineTimeoutSecs int `json:"online_timeout_secs,omitempty"`
34
+ ReapAfterDays int `json:"reap_after_days,omitempty"`
3435
}
3536
3637
// LoggingPolicy configures message logging.
3738
type LoggingPolicy struct {
3839
Enabled bool `json:"enabled"`
3940
--- internal/api/policies.go
+++ internal/api/policies.go
@@ -29,10 +29,11 @@
29 type AgentPolicy struct {
30 RequireCheckin bool `json:"require_checkin"`
31 CheckinChannel string `json:"checkin_channel"`
32 RequiredChannels []string `json:"required_channels"`
33 OnlineTimeoutSecs int `json:"online_timeout_secs,omitempty"`
 
34 }
35
36 // LoggingPolicy configures message logging.
37 type LoggingPolicy struct {
38 Enabled bool `json:"enabled"`
39
--- internal/api/policies.go
+++ internal/api/policies.go
@@ -29,10 +29,11 @@
29 type AgentPolicy struct {
30 RequireCheckin bool `json:"require_checkin"`
31 CheckinChannel string `json:"checkin_channel"`
32 RequiredChannels []string `json:"required_channels"`
33 OnlineTimeoutSecs int `json:"online_timeout_secs,omitempty"`
34 ReapAfterDays int `json:"reap_after_days,omitempty"`
35 }
36
37 // LoggingPolicy configures message logging.
38 type LoggingPolicy struct {
39 Enabled bool `json:"enabled"`
40
--- internal/api/ui/index.html
+++ internal/api/ui/index.html
@@ -605,10 +605,15 @@
605605
<div class="setting-row">
606606
<div class="setting-label">online timeout</div>
607607
<div class="setting-desc">Seconds since last heartbeat before an agent is considered offline. Default: 120.</div>
608608
<input type="number" id="policy-online-timeout" placeholder="120" min="10" max="3600" style="width:100px;padding:4px 8px;font-size:12px">
609609
</div>
610
+ <div class="setting-row">
611
+ <div class="setting-label">reap after days</div>
612
+ <div class="setting-desc">Remove stale agents not seen in this many days. 0 = never reap.</div>
613
+ <input type="number" id="policy-reap-days" placeholder="0" min="0" max="365" style="width:100px;padding:4px 8px;font-size:12px">
614
+ </div>
610615
</div>
611616
<div id="agentpolicy-save-result" style="display:none;margin:0 16px 12px"></div>
612617
</div>
613618
614619
<!-- bridge -->
@@ -2767,10 +2772,11 @@
27672772
function renderAgentPolicy(p) {
27682773
document.getElementById('policy-checkin-enabled').checked = !!p.require_checkin;
27692774
document.getElementById('policy-checkin-channel').value = p.checkin_channel || '';
27702775
document.getElementById('policy-required-channels').value = (p.required_channels||[]).join(', ');
27712776
document.getElementById('policy-online-timeout').value = p.online_timeout_secs || '';
2777
+ document.getElementById('policy-reap-days').value = p.reap_after_days || '';
27722778
toggleCheckinChannel();
27732779
}
27742780
function toggleCheckinChannel() {
27752781
const on = document.getElementById('policy-checkin-enabled').checked;
27762782
document.getElementById('policy-checkin-row').style.display = on ? '' : 'none';
@@ -3024,10 +3030,11 @@
30243030
agent_policy: {
30253031
require_checkin: document.getElementById('policy-checkin-enabled').checked,
30263032
checkin_channel: document.getElementById('policy-checkin-channel').value.trim(),
30273033
required_channels: document.getElementById('policy-required-channels').value.split(',').map(s=>s.trim()).filter(Boolean),
30283034
online_timeout_secs: parseInt(document.getElementById('policy-online-timeout').value) || 0,
3035
+ reap_after_days: parseInt(document.getElementById('policy-reap-days').value) || 0,
30293036
}
30303037
}, 'agentpolicy-save-result');
30313038
}
30323039
30333040
function saveBridgeConfig() {
30343041
--- internal/api/ui/index.html
+++ internal/api/ui/index.html
@@ -605,10 +605,15 @@
605 <div class="setting-row">
606 <div class="setting-label">online timeout</div>
607 <div class="setting-desc">Seconds since last heartbeat before an agent is considered offline. Default: 120.</div>
608 <input type="number" id="policy-online-timeout" placeholder="120" min="10" max="3600" style="width:100px;padding:4px 8px;font-size:12px">
609 </div>
 
 
 
 
 
610 </div>
611 <div id="agentpolicy-save-result" style="display:none;margin:0 16px 12px"></div>
612 </div>
613
614 <!-- bridge -->
@@ -2767,10 +2772,11 @@
2767 function renderAgentPolicy(p) {
2768 document.getElementById('policy-checkin-enabled').checked = !!p.require_checkin;
2769 document.getElementById('policy-checkin-channel').value = p.checkin_channel || '';
2770 document.getElementById('policy-required-channels').value = (p.required_channels||[]).join(', ');
2771 document.getElementById('policy-online-timeout').value = p.online_timeout_secs || '';
 
2772 toggleCheckinChannel();
2773 }
2774 function toggleCheckinChannel() {
2775 const on = document.getElementById('policy-checkin-enabled').checked;
2776 document.getElementById('policy-checkin-row').style.display = on ? '' : 'none';
@@ -3024,10 +3030,11 @@
3024 agent_policy: {
3025 require_checkin: document.getElementById('policy-checkin-enabled').checked,
3026 checkin_channel: document.getElementById('policy-checkin-channel').value.trim(),
3027 required_channels: document.getElementById('policy-required-channels').value.split(',').map(s=>s.trim()).filter(Boolean),
3028 online_timeout_secs: parseInt(document.getElementById('policy-online-timeout').value) || 0,
 
3029 }
3030 }, 'agentpolicy-save-result');
3031 }
3032
3033 function saveBridgeConfig() {
3034
--- internal/api/ui/index.html
+++ internal/api/ui/index.html
@@ -605,10 +605,15 @@
605 <div class="setting-row">
606 <div class="setting-label">online timeout</div>
607 <div class="setting-desc">Seconds since last heartbeat before an agent is considered offline. Default: 120.</div>
608 <input type="number" id="policy-online-timeout" placeholder="120" min="10" max="3600" style="width:100px;padding:4px 8px;font-size:12px">
609 </div>
610 <div class="setting-row">
611 <div class="setting-label">reap after days</div>
612 <div class="setting-desc">Remove stale agents not seen in this many days. 0 = never reap.</div>
613 <input type="number" id="policy-reap-days" placeholder="0" min="0" max="365" style="width:100px;padding:4px 8px;font-size:12px">
614 </div>
615 </div>
616 <div id="agentpolicy-save-result" style="display:none;margin:0 16px 12px"></div>
617 </div>
618
619 <!-- bridge -->
@@ -2767,10 +2772,11 @@
2772 function renderAgentPolicy(p) {
2773 document.getElementById('policy-checkin-enabled').checked = !!p.require_checkin;
2774 document.getElementById('policy-checkin-channel').value = p.checkin_channel || '';
2775 document.getElementById('policy-required-channels').value = (p.required_channels||[]).join(', ');
2776 document.getElementById('policy-online-timeout').value = p.online_timeout_secs || '';
2777 document.getElementById('policy-reap-days').value = p.reap_after_days || '';
2778 toggleCheckinChannel();
2779 }
2780 function toggleCheckinChannel() {
2781 const on = document.getElementById('policy-checkin-enabled').checked;
2782 document.getElementById('policy-checkin-row').style.display = on ? '' : 'none';
@@ -3024,10 +3030,11 @@
3030 agent_policy: {
3031 require_checkin: document.getElementById('policy-checkin-enabled').checked,
3032 checkin_channel: document.getElementById('policy-checkin-channel').value.trim(),
3033 required_channels: document.getElementById('policy-required-channels').value.split(',').map(s=>s.trim()).filter(Boolean),
3034 online_timeout_secs: parseInt(document.getElementById('policy-online-timeout').value) || 0,
3035 reap_after_days: parseInt(document.getElementById('policy-reap-days').value) || 0,
3036 }
3037 }, 'agentpolicy-save-result');
3038 }
3039
3040 function saveBridgeConfig() {
3041
--- internal/registry/registry.go
+++ internal/registry/registry.go
@@ -407,10 +407,43 @@
407407
if r.onlineTimeout > 0 {
408408
return r.onlineTimeout
409409
}
410410
return defaultOnlineTimeout
411411
}
412
+
413
+// Reap removes agents that haven't been seen in maxAge. Revoked agents
414
+// are always reaped if older than maxAge. Returns the number of agents removed.
415
+func (r *Registry) Reap(maxAge time.Duration) int {
416
+ if maxAge <= 0 {
417
+ return 0
418
+ }
419
+ r.mu.Lock()
420
+ defer r.mu.Unlock()
421
+ cutoff := time.Now().Add(-maxAge)
422
+ var reaped int
423
+ for nick, a := range r.agents {
424
+ if a.Online {
425
+ continue
426
+ }
427
+ // Use last_seen if available, otherwise fall back to created_at.
428
+ ref := a.CreatedAt
429
+ if a.LastSeen != nil {
430
+ ref = *a.LastSeen
431
+ }
432
+ if ref.Before(cutoff) {
433
+ delete(r.agents, nick)
434
+ if r.db != nil {
435
+ _ = r.db.AgentDelete(nick)
436
+ }
437
+ reaped++
438
+ }
439
+ }
440
+ if reaped > 0 && r.db == nil {
441
+ r.save()
442
+ }
443
+ return reaped
444
+}
412445
413446
// List returns all registered agents with computed online status.
414447
func (r *Registry) List() []*Agent {
415448
r.mu.RLock()
416449
defer r.mu.RUnlock()
417450
--- internal/registry/registry.go
+++ internal/registry/registry.go
@@ -407,10 +407,43 @@
407 if r.onlineTimeout > 0 {
408 return r.onlineTimeout
409 }
410 return defaultOnlineTimeout
411 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
413 // List returns all registered agents with computed online status.
414 func (r *Registry) List() []*Agent {
415 r.mu.RLock()
416 defer r.mu.RUnlock()
417
--- internal/registry/registry.go
+++ internal/registry/registry.go
@@ -407,10 +407,43 @@
407 if r.onlineTimeout > 0 {
408 return r.onlineTimeout
409 }
410 return defaultOnlineTimeout
411 }
412
413 // Reap removes agents that haven't been seen in maxAge. Revoked agents
414 // are always reaped if older than maxAge. Returns the number of agents removed.
415 func (r *Registry) Reap(maxAge time.Duration) int {
416 if maxAge <= 0 {
417 return 0
418 }
419 r.mu.Lock()
420 defer r.mu.Unlock()
421 cutoff := time.Now().Add(-maxAge)
422 var reaped int
423 for nick, a := range r.agents {
424 if a.Online {
425 continue
426 }
427 // Use last_seen if available, otherwise fall back to created_at.
428 ref := a.CreatedAt
429 if a.LastSeen != nil {
430 ref = *a.LastSeen
431 }
432 if ref.Before(cutoff) {
433 delete(r.agents, nick)
434 if r.db != nil {
435 _ = r.db.AgentDelete(nick)
436 }
437 reaped++
438 }
439 }
440 if reaped > 0 && r.db == nil {
441 r.save()
442 }
443 return reaped
444 }
445
446 // List returns all registered agents with computed online status.
447 func (r *Registry) List() []*Agent {
448 r.mu.RLock()
449 defer r.mu.RUnlock()
450

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button