ScuttleBot

feat: relay-watchdog sidecar for automatic IRC reconnection The relay process's PTY/terminal handling prevents background goroutines from ticking reliably, so connection health monitoring runs as a separate sidecar process (relay-watchdog). relay-watchdog: - Polls /v1/status every 10s - Detects server restarts (start time changes) - Detects extended API outages (60s unreachable) - Sends SIGUSR1 to all relay processes on detection - Reads config from ~/.config/scuttlebot-relay.env claude-relay (and other relays): - Handles SIGUSR1 by tearing down IRC and reconnecting - Fresh SASL credential registration on reconnect - Claude subprocess keeps running through reconnection - Presence loop uses pointer-swappable relay reference Closes #51

lmata 2026-04-03 21:25 trunk
Commit 9f5df4d1183e8569d781584eef9176c5573d2669706dd05f6de7fa29b04c807b
--- cmd/claude-relay/main.go
+++ cmd/claude-relay/main.go
@@ -195,11 +195,12 @@
195195
"SCUTTLEBOT_NICK="+cfg.Nick,
196196
"SCUTTLEBOT_ACTIVITY_VIA_BROKER="+boolString(relayActive),
197197
)
198198
if relayActive {
199199
go mirrorSessionLoop(ctx, relay, cfg, startedAt)
200
- go presenceLoop(ctx, relay, cfg.HeartbeatInterval)
200
+ go presenceLoopPtr(ctx, &relay, cfg.HeartbeatInterval)
201
+ go handleReconnectSignal(ctx, &relay, cfg)
201202
}
202203
203204
if !isInteractiveTTY() {
204205
cmd.Stdin = os.Stdin
205206
cmd.Stdout = os.Stdout
@@ -634,10 +635,96 @@
634635
return
635636
}
636637
}
637638
}
638639
}
640
+
641
+// handleReconnectSignal listens for SIGUSR1 and tears down/rebuilds
642
+// the IRC connection. The relay-watchdog sidecar sends this signal
643
+// when it detects the server restarted or the network is down.
644
+func handleReconnectSignal(ctx context.Context, relayPtr *sessionrelay.Connector, cfg config) {
645
+ sigCh := make(chan os.Signal, 1)
646
+ signal.Notify(sigCh, syscall.SIGUSR1)
647
+ defer signal.Stop(sigCh)
648
+
649
+ for {
650
+ select {
651
+ case <-ctx.Done():
652
+ return
653
+ case <-sigCh:
654
+ }
655
+
656
+ fmt.Fprintf(os.Stderr, "claude-relay: received SIGUSR1, reconnecting IRC...\n")
657
+ old := *relayPtr
658
+ if old != nil {
659
+ _ = old.Close(context.Background())
660
+ }
661
+
662
+ // Retry with backoff.
663
+ wait := 2 * time.Second
664
+ for attempt := 0; attempt < 10; attempt++ {
665
+ if ctx.Err() != nil {
666
+ return
667
+ }
668
+ time.Sleep(wait)
669
+
670
+ conn, err := sessionrelay.New(sessionrelay.Config{
671
+ Transport: cfg.Transport,
672
+ URL: cfg.URL,
673
+ Token: cfg.Token,
674
+ Channel: cfg.Channel,
675
+ Channels: cfg.Channels,
676
+ Nick: cfg.Nick,
677
+ IRC: sessionrelay.IRCConfig{
678
+ Addr: cfg.IRCAddr,
679
+ Pass: "", // force re-registration
680
+ AgentType: cfg.IRCAgentType,
681
+ DeleteOnClose: cfg.IRCDeleteOnClose,
682
+ },
683
+ })
684
+ if err != nil {
685
+ wait = min(wait*2, 30*time.Second)
686
+ continue
687
+ }
688
+
689
+ connectCtx, cancel := context.WithTimeout(ctx, 20*time.Second)
690
+ if err := conn.Connect(connectCtx); err != nil {
691
+ _ = conn.Close(context.Background())
692
+ cancel()
693
+ wait = min(wait*2, 30*time.Second)
694
+ continue
695
+ }
696
+ cancel()
697
+
698
+ *relayPtr = conn
699
+ _ = conn.Post(context.Background(), fmt.Sprintf(
700
+ "reconnected in %s; mention %s to interrupt",
701
+ filepath.Base(cfg.TargetCWD), cfg.Nick,
702
+ ))
703
+ fmt.Fprintf(os.Stderr, "claude-relay: reconnected successfully\n")
704
+ break
705
+ }
706
+ }
707
+}
708
+
709
+func presenceLoopPtr(ctx context.Context, relayPtr *sessionrelay.Connector, interval time.Duration) {
710
+ if interval <= 0 {
711
+ return
712
+ }
713
+ ticker := time.NewTicker(interval)
714
+ defer ticker.Stop()
715
+ for {
716
+ select {
717
+ case <-ctx.Done():
718
+ return
719
+ case <-ticker.C:
720
+ if r := *relayPtr; r != nil {
721
+ _ = r.Touch(ctx)
722
+ }
723
+ }
724
+ }
725
+}
639726
640727
func presenceLoop(ctx context.Context, relay sessionrelay.Connector, interval time.Duration) {
641728
if interval <= 0 {
642729
return
643730
}
644731
645732
ADDED cmd/relay-watchdog/main.go
--- cmd/claude-relay/main.go
+++ cmd/claude-relay/main.go
@@ -195,11 +195,12 @@
195 "SCUTTLEBOT_NICK="+cfg.Nick,
196 "SCUTTLEBOT_ACTIVITY_VIA_BROKER="+boolString(relayActive),
197 )
198 if relayActive {
199 go mirrorSessionLoop(ctx, relay, cfg, startedAt)
200 go presenceLoop(ctx, relay, cfg.HeartbeatInterval)
 
201 }
202
203 if !isInteractiveTTY() {
204 cmd.Stdin = os.Stdin
205 cmd.Stdout = os.Stdout
@@ -634,10 +635,96 @@
634 return
635 }
636 }
637 }
638 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
639
640 func presenceLoop(ctx context.Context, relay sessionrelay.Connector, interval time.Duration) {
641 if interval <= 0 {
642 return
643 }
644
645 DDED cmd/relay-watchdog/main.go
--- cmd/claude-relay/main.go
+++ cmd/claude-relay/main.go
@@ -195,11 +195,12 @@
195 "SCUTTLEBOT_NICK="+cfg.Nick,
196 "SCUTTLEBOT_ACTIVITY_VIA_BROKER="+boolString(relayActive),
197 )
198 if relayActive {
199 go mirrorSessionLoop(ctx, relay, cfg, startedAt)
200 go presenceLoopPtr(ctx, &relay, cfg.HeartbeatInterval)
201 go handleReconnectSignal(ctx, &relay, cfg)
202 }
203
204 if !isInteractiveTTY() {
205 cmd.Stdin = os.Stdin
206 cmd.Stdout = os.Stdout
@@ -634,10 +635,96 @@
635 return
636 }
637 }
638 }
639 }
640
641 // handleReconnectSignal listens for SIGUSR1 and tears down/rebuilds
642 // the IRC connection. The relay-watchdog sidecar sends this signal
643 // when it detects the server restarted or the network is down.
644 func handleReconnectSignal(ctx context.Context, relayPtr *sessionrelay.Connector, cfg config) {
645 sigCh := make(chan os.Signal, 1)
646 signal.Notify(sigCh, syscall.SIGUSR1)
647 defer signal.Stop(sigCh)
648
649 for {
650 select {
651 case <-ctx.Done():
652 return
653 case <-sigCh:
654 }
655
656 fmt.Fprintf(os.Stderr, "claude-relay: received SIGUSR1, reconnecting IRC...\n")
657 old := *relayPtr
658 if old != nil {
659 _ = old.Close(context.Background())
660 }
661
662 // Retry with backoff.
663 wait := 2 * time.Second
664 for attempt := 0; attempt < 10; attempt++ {
665 if ctx.Err() != nil {
666 return
667 }
668 time.Sleep(wait)
669
670 conn, err := sessionrelay.New(sessionrelay.Config{
671 Transport: cfg.Transport,
672 URL: cfg.URL,
673 Token: cfg.Token,
674 Channel: cfg.Channel,
675 Channels: cfg.Channels,
676 Nick: cfg.Nick,
677 IRC: sessionrelay.IRCConfig{
678 Addr: cfg.IRCAddr,
679 Pass: "", // force re-registration
680 AgentType: cfg.IRCAgentType,
681 DeleteOnClose: cfg.IRCDeleteOnClose,
682 },
683 })
684 if err != nil {
685 wait = min(wait*2, 30*time.Second)
686 continue
687 }
688
689 connectCtx, cancel := context.WithTimeout(ctx, 20*time.Second)
690 if err := conn.Connect(connectCtx); err != nil {
691 _ = conn.Close(context.Background())
692 cancel()
693 wait = min(wait*2, 30*time.Second)
694 continue
695 }
696 cancel()
697
698 *relayPtr = conn
699 _ = conn.Post(context.Background(), fmt.Sprintf(
700 "reconnected in %s; mention %s to interrupt",
701 filepath.Base(cfg.TargetCWD), cfg.Nick,
702 ))
703 fmt.Fprintf(os.Stderr, "claude-relay: reconnected successfully\n")
704 break
705 }
706 }
707 }
708
709 func presenceLoopPtr(ctx context.Context, relayPtr *sessionrelay.Connector, interval time.Duration) {
710 if interval <= 0 {
711 return
712 }
713 ticker := time.NewTicker(interval)
714 defer ticker.Stop()
715 for {
716 select {
717 case <-ctx.Done():
718 return
719 case <-ticker.C:
720 if r := *relayPtr; r != nil {
721 _ = r.Touch(ctx)
722 }
723 }
724 }
725 }
726
727 func presenceLoop(ctx context.Context, relay sessionrelay.Connector, interval time.Duration) {
728 if interval <= 0 {
729 return
730 }
731
732 DDED cmd/relay-watchdog/main.go
--- a/cmd/relay-watchdog/main.go
+++ b/cmd/relay-watchdog/main.go
@@ -0,0 +1,153 @@
1
+// relay-watchdog monitors a scuttlebot server and signals relay processes
2
+// to reconnect when the server restarts or becomes unreachable.
3
+//
4
+// Usage: relay-watchdog --url https://irc.scuttlebot.net --token <token> --signal <pid>
5
+//
6
+// It polls the server's /v1/status endpoint every 10 seconds. When the
7
+// server's start time changes (restart) or the API is unreachable for 60
8
+// seconds (network issue), it sends SIGUSR1 to the specified PID (or all
9
+// relay processes if --signal 0).
10
+package main
11
+
12
+import (
13
+ "encoding/json"
14
+ "flag"
15
+ "fmt"
16
+ "net/http"
17
+ "os"
18
+ "os/exec"
19
+ "os/signal"
20
+ "strings"
21
+ "syscall"
22
+ "time"
23
+)
24
+
25
+func loadEnvFile(path string) {
26
+ data, err := os.ReadFile(path)
27
+ if err != nil {
28
+ return
29
+ }
30
+ for _, line := range strings.Split(string(data), "\n") {
31
+ line = strings.TrimSpace(line)
32
+ if line == "" || strings.HasPrefix(line, "#") {
33
+ continue
34
+ }
35
+ if k, v, ok := strings.Cut(line, "="); ok {
36
+ k = strings.TrimSpace(k)
37
+ v = strings.TrimSpace(v)
38
+ if os.Getenv(k) == "" { // don't override explicit env
39
+ os.Setenv(k, v)
40
+ }
41
+ }
42
+ }
43
+}
44
+
45
+func main() {
46
+ // Load the shared relay config.
47
+ home, _ := os.UserHomeDir()
48
+ if home != "" {
49
+ loadEnvFile(home + "/.config/scuttlebot-relay.env")
50
+ }
51
+
52
+ url := flag.String("url", os.Getenv("SCUTTLEBOT_URL"), "scuttlebot API URL")
53
+ token := flag.String("token", os.Getenv("SCUTTLEBOT_TOKEN"), "API token")
54
+ interval := flag.Duration("interval", 10*time.Second, "poll interval")
55
+ flag.Parse()
56
+
57
+ if *url == "" || *token == "" {
58
+ fmt.Fprintf(os.Stderr, "relay-watchdog: SCUTTLEBOT_URL and SCUTTLEBOT_TOKEN required\n")
59
+ os.Exit(1)
60
+ }
61
+
62
+ // Handle graceful shutdown.
63
+ sigCh := make(chan os.Signal, 1)
64
+ signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
65
+
66
+ var lastStart string
67
+ failures := 0
68
+ client := &http.Client{Timeout: 5 * time.Second}
69
+
70
+ fmt.Fprintf(os.Stderr, "relay-watchdog: monitoring %s every %s\n", *url, *interval)
71
+
72
+ ticker := time.NewTicker(*interval)
73
+ defer ticker.Stop()
74
+
75
+ for {
76
+ select {
77
+ case <-sigCh:
78
+ fmt.Fprintf(os.Stderr, "relay-watchdog: shutting down\n")
79
+ return
80
+ case <-ticker.C:
81
+ }
82
+
83
+ start := getStart(client, *url, *token)
84
+ if start == "" {
85
+ failures++
86
+ fmt.Fprintf(os.Stderr, "relay-watchdog: API unreachable (%d)\n", failures)
87
+ if failures >= 6 { // 60s at 10s interval
88
+ fmt.Fprintf(os.Stderr, "relay-watchdog: extended outage, will signal relays on recovery\n")
89
+ }
90
+ continue
91
+ }
92
+
93
+ if failures >= 6 {
94
+ // We were down for a while and just came back.
95
+ fmt.Fprintf(os.Stderr, "relay-watchdog: API recovered after %d failures, killing relays\n", failures)
96
+ killRelays()
97
+ lastStart = start
98
+ failures = 0
99
+ continue
100
+ }
101
+
102
+ if lastStart == "" {
103
+ lastStart = start
104
+ failures = 0
105
+ continue
106
+ }
107
+
108
+ if start != lastStart {
109
+ fmt.Fprintf(os.Stderr, "relay-watchdog: server restarted (was %s, now %s), killing relays\n", lastStart, start)
110
+ killRelays()
111
+ lastStart = start
112
+ }
113
+ failures = 0
114
+ }
115
+}
116
+
117
+func getStart(client *http.Client, url, token string) string {
118
+ req, err := http.NewRequest(http.MethodGet, url+"/v1/status", nil)
119
+ if err != nil {
120
+ return ""
121
+ }
122
+ req.Header.Set("Authorization", "Bearer "+token)
123
+ resp, err := client.Do(req)
124
+ if err != nil {
125
+ return ""
126
+ }
127
+ defer resp.Body.Close()
128
+ var s struct {
129
+ Started string `json:"started"`
130
+ }
131
+ _ = json.NewDecoder(resp.Body).Decode(&s)
132
+ return s.Started
133
+}
134
+
135
+func killRelays() {
136
+ // Find relay processes and send SIGUSR1 to trigger IRC reconnection.
137
+ // The relay handles SIGUSR1 by tearing down and rebuilding the IRC
138
+ // connection without killing the Claude subprocess.
139
+ out, err := exec.Command("pgrep", "-f", "(claude|codex|gemini)-relay").Output()
140
+ if err != nil {
141
+ fmt.Fprintf(os.Stderr, "relay-watchdog: no relay processes found\n")
142
+ return
143
+ }
144
+ pids := strings.Fields(strings.TrimSpace(string(out)))
145
+ myPid := fmt.Sprintf("%d", os.Getpid())
146
+ for _, pid := range pids {
147
+ if pid == myPid {
148
+ continue
149
+ }
150
+ fmt.Fprintf(os.Stderr, "relay-watchdog: signaling relay pid %s (SIGUSR1)\n", pid)
151
+ _ = exec.Command("kill", "-USR1", pid).Run()
152
+ }
153
+}
--- a/cmd/relay-watchdog/main.go
+++ b/cmd/relay-watchdog/main.go
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/cmd/relay-watchdog/main.go
+++ b/cmd/relay-watchdog/main.go
@@ -0,0 +1,153 @@
1 // relay-watchdog monitors a scuttlebot server and signals relay processes
2 // to reconnect when the server restarts or becomes unreachable.
3 //
4 // Usage: relay-watchdog --url https://irc.scuttlebot.net --token <token> --signal <pid>
5 //
6 // It polls the server's /v1/status endpoint every 10 seconds. When the
7 // server's start time changes (restart) or the API is unreachable for 60
8 // seconds (network issue), it sends SIGUSR1 to the specified PID (or all
9 // relay processes if --signal 0).
10 package main
11
12 import (
13 "encoding/json"
14 "flag"
15 "fmt"
16 "net/http"
17 "os"
18 "os/exec"
19 "os/signal"
20 "strings"
21 "syscall"
22 "time"
23 )
24
25 func loadEnvFile(path string) {
26 data, err := os.ReadFile(path)
27 if err != nil {
28 return
29 }
30 for _, line := range strings.Split(string(data), "\n") {
31 line = strings.TrimSpace(line)
32 if line == "" || strings.HasPrefix(line, "#") {
33 continue
34 }
35 if k, v, ok := strings.Cut(line, "="); ok {
36 k = strings.TrimSpace(k)
37 v = strings.TrimSpace(v)
38 if os.Getenv(k) == "" { // don't override explicit env
39 os.Setenv(k, v)
40 }
41 }
42 }
43 }
44
45 func main() {
46 // Load the shared relay config.
47 home, _ := os.UserHomeDir()
48 if home != "" {
49 loadEnvFile(home + "/.config/scuttlebot-relay.env")
50 }
51
52 url := flag.String("url", os.Getenv("SCUTTLEBOT_URL"), "scuttlebot API URL")
53 token := flag.String("token", os.Getenv("SCUTTLEBOT_TOKEN"), "API token")
54 interval := flag.Duration("interval", 10*time.Second, "poll interval")
55 flag.Parse()
56
57 if *url == "" || *token == "" {
58 fmt.Fprintf(os.Stderr, "relay-watchdog: SCUTTLEBOT_URL and SCUTTLEBOT_TOKEN required\n")
59 os.Exit(1)
60 }
61
62 // Handle graceful shutdown.
63 sigCh := make(chan os.Signal, 1)
64 signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
65
66 var lastStart string
67 failures := 0
68 client := &http.Client{Timeout: 5 * time.Second}
69
70 fmt.Fprintf(os.Stderr, "relay-watchdog: monitoring %s every %s\n", *url, *interval)
71
72 ticker := time.NewTicker(*interval)
73 defer ticker.Stop()
74
75 for {
76 select {
77 case <-sigCh:
78 fmt.Fprintf(os.Stderr, "relay-watchdog: shutting down\n")
79 return
80 case <-ticker.C:
81 }
82
83 start := getStart(client, *url, *token)
84 if start == "" {
85 failures++
86 fmt.Fprintf(os.Stderr, "relay-watchdog: API unreachable (%d)\n", failures)
87 if failures >= 6 { // 60s at 10s interval
88 fmt.Fprintf(os.Stderr, "relay-watchdog: extended outage, will signal relays on recovery\n")
89 }
90 continue
91 }
92
93 if failures >= 6 {
94 // We were down for a while and just came back.
95 fmt.Fprintf(os.Stderr, "relay-watchdog: API recovered after %d failures, killing relays\n", failures)
96 killRelays()
97 lastStart = start
98 failures = 0
99 continue
100 }
101
102 if lastStart == "" {
103 lastStart = start
104 failures = 0
105 continue
106 }
107
108 if start != lastStart {
109 fmt.Fprintf(os.Stderr, "relay-watchdog: server restarted (was %s, now %s), killing relays\n", lastStart, start)
110 killRelays()
111 lastStart = start
112 }
113 failures = 0
114 }
115 }
116
117 func getStart(client *http.Client, url, token string) string {
118 req, err := http.NewRequest(http.MethodGet, url+"/v1/status", nil)
119 if err != nil {
120 return ""
121 }
122 req.Header.Set("Authorization", "Bearer "+token)
123 resp, err := client.Do(req)
124 if err != nil {
125 return ""
126 }
127 defer resp.Body.Close()
128 var s struct {
129 Started string `json:"started"`
130 }
131 _ = json.NewDecoder(resp.Body).Decode(&s)
132 return s.Started
133 }
134
135 func killRelays() {
136 // Find relay processes and send SIGUSR1 to trigger IRC reconnection.
137 // The relay handles SIGUSR1 by tearing down and rebuilding the IRC
138 // connection without killing the Claude subprocess.
139 out, err := exec.Command("pgrep", "-f", "(claude|codex|gemini)-relay").Output()
140 if err != nil {
141 fmt.Fprintf(os.Stderr, "relay-watchdog: no relay processes found\n")
142 return
143 }
144 pids := strings.Fields(strings.TrimSpace(string(out)))
145 myPid := fmt.Sprintf("%d", os.Getpid())
146 for _, pid := range pids {
147 if pid == myPid {
148 continue
149 }
150 fmt.Fprintf(os.Stderr, "relay-watchdog: signaling relay pid %s (SIGUSR1)\n", pid)
151 _ = exec.Command("kill", "-USR1", pid).Run()
152 }
153 }
--- pkg/sessionrelay/irc.go
+++ pkg/sessionrelay/irc.go
@@ -33,10 +33,11 @@
3333
messages []Message
3434
client *girc.Client
3535
errCh chan error
3636
3737
registeredByRelay bool
38
+ connectedAt time.Time
3839
}
3940
4041
func newIRCConnector(cfg Config) (Connector, error) {
4142
if cfg.IRC.Addr == "" {
4243
return nil, fmt.Errorf("sessionrelay: irc transport requires irc addr")
@@ -87,11 +88,10 @@
8788
case err := <-c.errCh:
8889
_ = c.cleanupRegistration(context.Background())
8990
return fmt.Errorf("sessionrelay: irc connect: %w", err)
9091
case <-joined:
9192
go c.keepAlive(ctx, host, port)
92
- go c.watchdog(ctx)
9393
return nil
9494
}
9595
}
9696
9797
// dial creates a fresh girc client, wires up handlers, and starts the
@@ -108,10 +108,13 @@
108108
SASL: &girc.SASLPlain{User: c.nick, Pass: c.pass},
109109
PingDelay: 30 * time.Second,
110110
PingTimeout: 30 * time.Second,
111111
})
112112
client.Handlers.AddBg(girc.CONNECTED, func(cl *girc.Client, _ girc.Event) {
113
+ c.mu.Lock()
114
+ c.connectedAt = time.Now()
115
+ c.mu.Unlock()
113116
for _, channel := range c.Channels() {
114117
cl.Cmd.Join(channel)
115118
}
116119
})
117120
client.Handlers.AddBg(girc.JOIN, func(_ *girc.Client, e girc.Event) {
@@ -210,70 +213,10 @@
210213
fmt.Fprintf(os.Stderr, "sessionrelay: reconnected successfully\n")
211214
})
212215
}
213216
}
214217
215
-// watchdog periodically checks if the IRC client is still connected and
216
-// if the API is reachable. Forces reconnection when the connection is dead.
217
-func (c *ircConnector) watchdog(ctx context.Context) {
218
- failures := 0
219
- for {
220
- select {
221
- case <-ctx.Done():
222
- return
223
- case <-time.After(10 * time.Second):
224
- }
225
-
226
- c.mu.RLock()
227
- client := c.client
228
- c.mu.RUnlock()
229
- if client == nil {
230
- failures = 0
231
- continue
232
- }
233
-
234
- if !client.IsConnected() {
235
- client.Close()
236
- select {
237
- case c.errCh <- fmt.Errorf("watchdog: client disconnected"):
238
- default:
239
- }
240
- failures = 0
241
- continue
242
- }
243
-
244
- // Probe the API to detect server restarts.
245
- if c.apiURL != "" && c.token != "" {
246
- probeCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
247
- req, _ := http.NewRequestWithContext(probeCtx, http.MethodGet, c.apiURL+"/v1/status", nil)
248
- if req != nil {
249
- req.Header.Set("Authorization", "Bearer "+c.token)
250
- resp, err := http.DefaultClient.Do(req)
251
- if err != nil || resp.StatusCode != 200 {
252
- failures++
253
- if resp != nil {
254
- resp.Body.Close()
255
- }
256
- } else {
257
- resp.Body.Close()
258
- failures = 0
259
- }
260
- }
261
- cancel()
262
- }
263
-
264
- if failures >= 3 {
265
- client.Close()
266
- select {
267
- case c.errCh <- fmt.Errorf("watchdog: API unreachable"):
268
- default:
269
- }
270
- failures = 0
271
- }
272
- }
273
-}
274
-
275218
func (c *ircConnector) Post(_ context.Context, text string) error {
276219
c.mu.RLock()
277220
client := c.client
278221
c.mu.RUnlock()
279222
if client == nil {
@@ -311,11 +254,81 @@
311254
}
312255
}
313256
return out, nil
314257
}
315258
316
-func (c *ircConnector) Touch(context.Context) error {
259
+func (c *ircConnector) Touch(ctx context.Context) error {
260
+ c.mu.RLock()
261
+ client := c.client
262
+ c.mu.RUnlock()
263
+
264
+ if client == nil {
265
+ return fmt.Errorf("sessionrelay: not connected")
266
+ }
267
+
268
+ if !client.IsConnected() {
269
+ client.Close()
270
+ select {
271
+ case c.errCh <- fmt.Errorf("touch: client disconnected"):
272
+ default:
273
+ }
274
+ return fmt.Errorf("sessionrelay: disconnected")
275
+ }
276
+
277
+ // Detect server restarts by checking the server's startup time.
278
+ // If the server started after our IRC connection was established,
279
+ // the IRC connection is stale and must be recycled.
280
+ if c.apiURL != "" && c.token != "" {
281
+ probeCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
282
+ defer cancel()
283
+ req, err := http.NewRequestWithContext(probeCtx, http.MethodGet, c.apiURL+"/v1/status", nil)
284
+ if err != nil {
285
+ return nil
286
+ }
287
+ req.Header.Set("Authorization", "Bearer "+c.token)
288
+ resp, err := http.DefaultClient.Do(req)
289
+ if err != nil {
290
+ return nil // API unreachable, transient
291
+ }
292
+ defer resp.Body.Close()
293
+
294
+ var status struct {
295
+ Started string `json:"started"`
296
+ }
297
+ if err := json.NewDecoder(resp.Body).Decode(&status); err == nil && status.Started != "" {
298
+ serverStart, err := time.Parse(time.RFC3339Nano, status.Started)
299
+ if err == nil {
300
+ c.mu.RLock()
301
+ connectedAt := c.connectedAt
302
+ c.mu.RUnlock()
303
+ if !connectedAt.IsZero() && serverStart.After(connectedAt) {
304
+ // Server restarted after we connected — our IRC session is dead.
305
+ client.Close()
306
+ select {
307
+ case c.errCh <- fmt.Errorf("touch: server restarted (started %s, connected %s)", serverStart.Format(time.RFC3339), connectedAt.Format(time.RFC3339)):
308
+ default:
309
+ }
310
+ return fmt.Errorf("sessionrelay: server restarted")
311
+ }
312
+ }
313
+ }
314
+
315
+ // Also touch presence so the server tracks us.
316
+ presenceReq, _ := http.NewRequestWithContext(probeCtx, http.MethodPost,
317
+ c.apiURL+"/v1/channels/"+channelSlug(c.primary)+"/presence",
318
+ bytes.NewReader([]byte(`{"nick":"`+c.nick+`"}`)))
319
+ if presenceReq != nil {
320
+ presenceReq.Header.Set("Authorization", "Bearer "+c.token)
321
+ presenceReq.Header.Set("Content-Type", "application/json")
322
+ pr, err := http.DefaultClient.Do(presenceReq)
323
+ if pr != nil {
324
+ pr.Body.Close()
325
+ }
326
+ _ = err
327
+ }
328
+ }
329
+
317330
return nil
318331
}
319332
320333
func (c *ircConnector) JoinChannel(ctx context.Context, channel string) error {
321334
channel = normalizeChannel(channel)
322335
--- pkg/sessionrelay/irc.go
+++ pkg/sessionrelay/irc.go
@@ -33,10 +33,11 @@
33 messages []Message
34 client *girc.Client
35 errCh chan error
36
37 registeredByRelay bool
 
38 }
39
40 func newIRCConnector(cfg Config) (Connector, error) {
41 if cfg.IRC.Addr == "" {
42 return nil, fmt.Errorf("sessionrelay: irc transport requires irc addr")
@@ -87,11 +88,10 @@
87 case err := <-c.errCh:
88 _ = c.cleanupRegistration(context.Background())
89 return fmt.Errorf("sessionrelay: irc connect: %w", err)
90 case <-joined:
91 go c.keepAlive(ctx, host, port)
92 go c.watchdog(ctx)
93 return nil
94 }
95 }
96
97 // dial creates a fresh girc client, wires up handlers, and starts the
@@ -108,10 +108,13 @@
108 SASL: &girc.SASLPlain{User: c.nick, Pass: c.pass},
109 PingDelay: 30 * time.Second,
110 PingTimeout: 30 * time.Second,
111 })
112 client.Handlers.AddBg(girc.CONNECTED, func(cl *girc.Client, _ girc.Event) {
 
 
 
113 for _, channel := range c.Channels() {
114 cl.Cmd.Join(channel)
115 }
116 })
117 client.Handlers.AddBg(girc.JOIN, func(_ *girc.Client, e girc.Event) {
@@ -210,70 +213,10 @@
210 fmt.Fprintf(os.Stderr, "sessionrelay: reconnected successfully\n")
211 })
212 }
213 }
214
215 // watchdog periodically checks if the IRC client is still connected and
216 // if the API is reachable. Forces reconnection when the connection is dead.
217 func (c *ircConnector) watchdog(ctx context.Context) {
218 failures := 0
219 for {
220 select {
221 case <-ctx.Done():
222 return
223 case <-time.After(10 * time.Second):
224 }
225
226 c.mu.RLock()
227 client := c.client
228 c.mu.RUnlock()
229 if client == nil {
230 failures = 0
231 continue
232 }
233
234 if !client.IsConnected() {
235 client.Close()
236 select {
237 case c.errCh <- fmt.Errorf("watchdog: client disconnected"):
238 default:
239 }
240 failures = 0
241 continue
242 }
243
244 // Probe the API to detect server restarts.
245 if c.apiURL != "" && c.token != "" {
246 probeCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
247 req, _ := http.NewRequestWithContext(probeCtx, http.MethodGet, c.apiURL+"/v1/status", nil)
248 if req != nil {
249 req.Header.Set("Authorization", "Bearer "+c.token)
250 resp, err := http.DefaultClient.Do(req)
251 if err != nil || resp.StatusCode != 200 {
252 failures++
253 if resp != nil {
254 resp.Body.Close()
255 }
256 } else {
257 resp.Body.Close()
258 failures = 0
259 }
260 }
261 cancel()
262 }
263
264 if failures >= 3 {
265 client.Close()
266 select {
267 case c.errCh <- fmt.Errorf("watchdog: API unreachable"):
268 default:
269 }
270 failures = 0
271 }
272 }
273 }
274
275 func (c *ircConnector) Post(_ context.Context, text string) error {
276 c.mu.RLock()
277 client := c.client
278 c.mu.RUnlock()
279 if client == nil {
@@ -311,11 +254,81 @@
311 }
312 }
313 return out, nil
314 }
315
316 func (c *ircConnector) Touch(context.Context) error {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317 return nil
318 }
319
320 func (c *ircConnector) JoinChannel(ctx context.Context, channel string) error {
321 channel = normalizeChannel(channel)
322
--- pkg/sessionrelay/irc.go
+++ pkg/sessionrelay/irc.go
@@ -33,10 +33,11 @@
33 messages []Message
34 client *girc.Client
35 errCh chan error
36
37 registeredByRelay bool
38 connectedAt time.Time
39 }
40
41 func newIRCConnector(cfg Config) (Connector, error) {
42 if cfg.IRC.Addr == "" {
43 return nil, fmt.Errorf("sessionrelay: irc transport requires irc addr")
@@ -87,11 +88,10 @@
88 case err := <-c.errCh:
89 _ = c.cleanupRegistration(context.Background())
90 return fmt.Errorf("sessionrelay: irc connect: %w", err)
91 case <-joined:
92 go c.keepAlive(ctx, host, port)
 
93 return nil
94 }
95 }
96
97 // dial creates a fresh girc client, wires up handlers, and starts the
@@ -108,10 +108,13 @@
108 SASL: &girc.SASLPlain{User: c.nick, Pass: c.pass},
109 PingDelay: 30 * time.Second,
110 PingTimeout: 30 * time.Second,
111 })
112 client.Handlers.AddBg(girc.CONNECTED, func(cl *girc.Client, _ girc.Event) {
113 c.mu.Lock()
114 c.connectedAt = time.Now()
115 c.mu.Unlock()
116 for _, channel := range c.Channels() {
117 cl.Cmd.Join(channel)
118 }
119 })
120 client.Handlers.AddBg(girc.JOIN, func(_ *girc.Client, e girc.Event) {
@@ -210,70 +213,10 @@
213 fmt.Fprintf(os.Stderr, "sessionrelay: reconnected successfully\n")
214 })
215 }
216 }
217
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218 func (c *ircConnector) Post(_ context.Context, text string) error {
219 c.mu.RLock()
220 client := c.client
221 c.mu.RUnlock()
222 if client == nil {
@@ -311,11 +254,81 @@
254 }
255 }
256 return out, nil
257 }
258
259 func (c *ircConnector) Touch(ctx context.Context) error {
260 c.mu.RLock()
261 client := c.client
262 c.mu.RUnlock()
263
264 if client == nil {
265 return fmt.Errorf("sessionrelay: not connected")
266 }
267
268 if !client.IsConnected() {
269 client.Close()
270 select {
271 case c.errCh <- fmt.Errorf("touch: client disconnected"):
272 default:
273 }
274 return fmt.Errorf("sessionrelay: disconnected")
275 }
276
277 // Detect server restarts by checking the server's startup time.
278 // If the server started after our IRC connection was established,
279 // the IRC connection is stale and must be recycled.
280 if c.apiURL != "" && c.token != "" {
281 probeCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
282 defer cancel()
283 req, err := http.NewRequestWithContext(probeCtx, http.MethodGet, c.apiURL+"/v1/status", nil)
284 if err != nil {
285 return nil
286 }
287 req.Header.Set("Authorization", "Bearer "+c.token)
288 resp, err := http.DefaultClient.Do(req)
289 if err != nil {
290 return nil // API unreachable, transient
291 }
292 defer resp.Body.Close()
293
294 var status struct {
295 Started string `json:"started"`
296 }
297 if err := json.NewDecoder(resp.Body).Decode(&status); err == nil && status.Started != "" {
298 serverStart, err := time.Parse(time.RFC3339Nano, status.Started)
299 if err == nil {
300 c.mu.RLock()
301 connectedAt := c.connectedAt
302 c.mu.RUnlock()
303 if !connectedAt.IsZero() && serverStart.After(connectedAt) {
304 // Server restarted after we connected — our IRC session is dead.
305 client.Close()
306 select {
307 case c.errCh <- fmt.Errorf("touch: server restarted (started %s, connected %s)", serverStart.Format(time.RFC3339), connectedAt.Format(time.RFC3339)):
308 default:
309 }
310 return fmt.Errorf("sessionrelay: server restarted")
311 }
312 }
313 }
314
315 // Also touch presence so the server tracks us.
316 presenceReq, _ := http.NewRequestWithContext(probeCtx, http.MethodPost,
317 c.apiURL+"/v1/channels/"+channelSlug(c.primary)+"/presence",
318 bytes.NewReader([]byte(`{"nick":"`+c.nick+`"}`)))
319 if presenceReq != nil {
320 presenceReq.Header.Set("Authorization", "Bearer "+c.token)
321 presenceReq.Header.Set("Content-Type", "application/json")
322 pr, err := http.DefaultClient.Do(presenceReq)
323 if pr != nil {
324 pr.Body.Close()
325 }
326 _ = err
327 }
328 }
329
330 return nil
331 }
332
333 func (c *ircConnector) JoinChannel(ctx context.Context, channel string) error {
334 channel = normalizeChannel(channel)
335

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button