mirror of
https://github.com/qwibitai/nanoclaw.git
synced 2026-06-04 10:14:47 +08:00
fix(opencode): kill server process group + configurable idle timeout
Two bugs in the upstream OpenCode provider that fire together when a
local backend (Ollama, llama.cpp) is slower than the hardcoded 90s
event timeout:
1. proc.kill('SIGKILL') only kills the wrapper process the spawn
returned, not the opencode-linux-*/bin/opencode child it execs into.
The child keeps holding port 4096, so the next spawnOpencodeServer()
fails with "Failed to start server on port 4096" / EADDRINUSE.
Fix: spawn detached and signal the whole process group via
process.kill(-pid, 'SIGKILL') in a new killProcessTree() helper.
2. IDLE_TIMEOUT_MS = 90_000 is hardcoded. For a local 31B model the
first prompt's time-to-first-token routinely exceeds that, tripping
the timeout. Fix: read OPENCODE_IDLE_TIMEOUT_MS from env, default
300_000 (5 min) — generous for cloud APIs, just enough for local.
Per-group override goes in container.json env (e.g. "600000" for a
slow local box), no rebuild needed since src/ is bind-mounted.
Same bugs exist on origin/providers — should be ported upstream.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -17,6 +17,19 @@ const SESSION_STATUS_RETRY_ERROR_AFTER = 3;
|
||||
const STALE_SESSION_RE =
|
||||
/no conversation found|ENOENT.*\.jsonl|session.*not found|NotFoundError|connection reset|ECONNRESET|404|event timeout/i;
|
||||
|
||||
function killProcessTree(proc: ChildProcess): void {
|
||||
if (!proc.pid) return;
|
||||
try {
|
||||
process.kill(-proc.pid, 'SIGKILL');
|
||||
} catch {
|
||||
try {
|
||||
proc.kill('SIGKILL');
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function spawnOpencodeServer(config: Record<string, unknown>, timeoutMs = 10_000): Promise<{ url: string; proc: ChildProcess }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const hostname = '127.0.0.1';
|
||||
@@ -26,10 +39,11 @@ function spawnOpencodeServer(config: Record<string, unknown>, timeoutMs = 10_000
|
||||
...process.env,
|
||||
OPENCODE_CONFIG_CONTENT: JSON.stringify(config),
|
||||
},
|
||||
detached: true,
|
||||
});
|
||||
|
||||
const id = setTimeout(() => {
|
||||
proc.kill('SIGKILL');
|
||||
killProcessTree(proc);
|
||||
reject(new Error(`Timeout waiting for OpenCode server to start after ${timeoutMs}ms`));
|
||||
}, timeoutMs);
|
||||
|
||||
@@ -189,11 +203,7 @@ export function destroySharedRuntime(): void {
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
try {
|
||||
sharedRuntime.proc.kill('SIGKILL');
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
killProcessTree(sharedRuntime.proc);
|
||||
sharedRuntime = null;
|
||||
sharedConfigKey = null;
|
||||
}
|
||||
@@ -243,7 +253,7 @@ export class OpenCodeProvider implements AgentProvider {
|
||||
};
|
||||
|
||||
const self = this;
|
||||
const IDLE_TIMEOUT_MS = 90_000;
|
||||
const IDLE_TIMEOUT_MS = Number(process.env.OPENCODE_IDLE_TIMEOUT_MS) || 300_000;
|
||||
|
||||
async function* gen(): AsyncGenerator<ProviderEvent> {
|
||||
let initYielded = false;
|
||||
|
||||
Reference in New Issue
Block a user