diff --git a/src/channels/chat-sdk-bridge.ts b/src/channels/chat-sdk-bridge.ts index 18ab2cbf8..c23e9ee32 100644 --- a/src/channels/chat-sdk-bridge.ts +++ b/src/channels/chat-sdk-bridge.ts @@ -305,8 +305,14 @@ export function createChatSdkBridge(config: ChatSdkBridgeConfig): ChannelAdapter // Start local HTTP server to receive forwarded Gateway events (including interactions) const webhookUrl = await startLocalWebhookServer(gatewayAdapter, setupConfig, config.botToken); + // Exponential backoff capped at 1h. Without this, an unrecoverable + // failure (e.g., TokenInvalid) restarts ~10×/sec and Discord's + // Cloudflare layer issues a multi-hour IP block. A run that lasts + // longer than 5 minutes counts as healthy and resets the counter. + let consecutiveFailures = 0; const startGateway = () => { if (gatewayAbort?.signal.aborted) return; + const startedAt = Date.now(); // Capture the long-running listener promise via waitUntil let listenerPromise: Promise | undefined; gatewayAdapter.startGatewayListener!( @@ -321,21 +327,30 @@ export function createChatSdkBridge(config: ChatSdkBridgeConfig): ChannelAdapter ).then(() => { // startGatewayListener resolves immediately with a Response; // the actual work is in the listenerPromise passed to waitUntil - if (listenerPromise) { - listenerPromise - .then(() => { - if (!gatewayAbort?.signal.aborted) { - log.info('Gateway listener expired, restarting', { adapter: adapter.name }); - startGateway(); - } - }) - .catch((err) => { - if (!gatewayAbort?.signal.aborted) { - log.error('Gateway listener error, restarting in 5s', { adapter: adapter.name, err }); - setTimeout(startGateway, 5000); - } + if (!listenerPromise) return; + const reschedule = (err?: unknown) => { + if (gatewayAbort?.signal.aborted) return; + const ranForMs = Date.now() - startedAt; + if (ranForMs > 5 * 60 * 1000) consecutiveFailures = 0; + else consecutiveFailures++; + const delayMs = Math.min(60 * 60 * 1000, 2 ** consecutiveFailures * 1000); + if (err) { + log.error('Gateway listener error, retrying', { + adapter: adapter.name, + err, + consecutiveFailures, + delayMs, }); - } + } else { + log.info('Gateway listener expired, restarting', { + adapter: adapter.name, + consecutiveFailures, + delayMs, + }); + } + setTimeout(startGateway, delayMs); + }; + listenerPromise.then(() => reschedule()).catch(reschedule); }); }; startGateway();