mirror of
https://github.com/qwibitai/nanoclaw.git
synced 2026-06-27 18:34:58 +08:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 8d3eca7027 | |||
| 1d6bba4d3f | |||
| 9bb69c0e50 | |||
| add6145f1c | |||
| 4e14d08173 | |||
| 8f2f788b6e | |||
| e96d7fd961 | |||
| 15292ae76c |
@@ -46,7 +46,7 @@ import './discord.js';
|
||||
### 4. Install the adapter package (pinned)
|
||||
|
||||
```bash
|
||||
pnpm install @chat-adapter/discord@4.27.0
|
||||
pnpm install @chat-adapter/discord@4.29.0
|
||||
```
|
||||
|
||||
### 5. Build and validate
|
||||
|
||||
@@ -46,7 +46,7 @@ import './gchat.js';
|
||||
### 4. Install the adapter package (pinned)
|
||||
|
||||
```bash
|
||||
pnpm install @chat-adapter/gchat@4.27.0
|
||||
pnpm install @chat-adapter/gchat@4.29.0
|
||||
```
|
||||
|
||||
### 5. Build and validate
|
||||
|
||||
@@ -50,7 +50,7 @@ import './github.js';
|
||||
### 4. Install the adapter package (pinned)
|
||||
|
||||
```bash
|
||||
pnpm install @chat-adapter/github@4.27.0
|
||||
pnpm install @chat-adapter/github@4.29.0
|
||||
```
|
||||
|
||||
### 5. Build and validate
|
||||
|
||||
@@ -59,7 +59,7 @@ import './linear.js';
|
||||
### 4. Install the adapter package (pinned)
|
||||
|
||||
```bash
|
||||
pnpm install @chat-adapter/linear@4.27.0
|
||||
pnpm install @chat-adapter/linear@4.29.0
|
||||
```
|
||||
|
||||
### 5. Build and validate
|
||||
|
||||
@@ -46,7 +46,7 @@ import './slack.js';
|
||||
### 4. Install the adapter package (pinned)
|
||||
|
||||
```bash
|
||||
pnpm install @chat-adapter/slack@4.27.0
|
||||
pnpm install @chat-adapter/slack@4.29.0
|
||||
```
|
||||
|
||||
### 5. Build and validate
|
||||
|
||||
@@ -46,7 +46,7 @@ import './teams.js';
|
||||
### 4. Install the adapter package (pinned)
|
||||
|
||||
```bash
|
||||
pnpm install @chat-adapter/teams@4.27.0
|
||||
pnpm install @chat-adapter/teams@4.29.0
|
||||
```
|
||||
|
||||
### 5. Build and validate
|
||||
|
||||
@@ -60,7 +60,7 @@ In `setup/index.ts`, add this entry to the `STEPS` map (right after the `registe
|
||||
### 5. Install the adapter package (pinned)
|
||||
|
||||
```bash
|
||||
pnpm install @chat-adapter/telegram@4.27.0
|
||||
pnpm install @chat-adapter/telegram@4.29.0
|
||||
```
|
||||
|
||||
### 6. Build and validate
|
||||
|
||||
@@ -46,7 +46,7 @@ import './whatsapp-cloud.js';
|
||||
### 4. Install the adapter package (pinned)
|
||||
|
||||
```bash
|
||||
pnpm install @chat-adapter/whatsapp@4.27.0
|
||||
pnpm install @chat-adapter/whatsapp@4.29.0
|
||||
```
|
||||
|
||||
### 5. Build and validate
|
||||
|
||||
@@ -4,6 +4,8 @@ All notable changes to NanoClaw will be documented in this file.
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
- **Optional per-container resource caps.** `CONTAINER_CPU_LIMIT` and `CONTAINER_MEMORY_LIMIT` pass through to `docker run` as `--cpus` / `--memory` (`container-runner.ts`). Both empty by default — no flag added, spawn args byte-identical to today — so existing installs are unaffected. Set them to cap an agent container's CPU/memory so one agent can't monopolize the host (e.g. `CONTAINER_CPU_LIMIT=2`, `CONTAINER_MEMORY_LIMIT=8g`). Swap is intentionally not managed here: `--memory` is a hard cap on a swapless host.
|
||||
- [BREAKING] **Chat SDK pinned to `4.29.0` (was `4.26.0` via `^4.24.0`).** `chat` and the `@chat-adapter/*` channel adapters are version-locked — the adapter's `ChatInstance` must match the bridge's, so a mismatched pair fails to typecheck at `createChatSdkBridge(...)`. `chat` is therefore pinned exactly, and the channel-adapter install pins move with it — the `/add-<channel>` SKILL.md steps and `setup/*.sh` scripts on `main`, plus the adapter code on the `channels` branch. Core installs with no channel (only `cli`) are unaffected. **Migration:** if any channel is installed (Slack, Discord, Telegram, Teams, …), re-run its `/add-<channel>` skill to pull the matching `4.29.0` adapter.
|
||||
- **Budget/billing-exhausted LLM turns now reach the user instead of being silently dropped.** When a turn ends in a non-retryable provider error (e.g. an Anthropic `403 billing_error`) with no `<message>` wrapping, the agent-runner delivers the provider's notice to the originating channel and stops re-nudging the failing gateway. `providers/claude.ts` now surfaces the SDK's `is_error` flag (and the error subtype's `errors[]` text); `poll-loop.ts` delivers that text and skips the re-wrap retry. Fixes the case where a spend-limit notice produced silence plus a turn-after-turn retry loop.
|
||||
- [BREAKING] **`@onecli-sh/sdk` 0.5.0 -> 2.2.1 — requires a OneCLI server with the `/v1` API** (older servers 404 every SDK call). The sanctioned gateway and CLI versions are pinned in `versions.json`. **The gateway is a separate component — updating NanoClaw does not upgrade it for you:** `/update-nanoclaw` upgrades it when the pin moves, otherwise upgrade manually. **Migration:** [docs/onecli-upgrades.md](docs/onecli-upgrades.md).
|
||||
- **New agent provider: Codex (OpenAI) — run `/add-codex`.** Full runtime via `codex app-server` (planning, MCP tools, server-side history, resume). Trunk ships the seams and the skill; the payload installs from the `providers` branch (the skill, the setup picker, or `--step provider-auth codex`). Auth is vault-only — no credential ever enters a container.
|
||||
|
||||
@@ -341,6 +341,12 @@ export const CONTAINER_IMAGE = process.env.CONTAINER_IMAGE || 'nanoclaw-agent:la
|
||||
export const CONTAINER_TIMEOUT = parseInt(process.env.CONTAINER_TIMEOUT || '1800000', 10); // 30min default
|
||||
export const IDLE_TIMEOUT = parseInt(process.env.IDLE_TIMEOUT || '1800000', 10); // 30min — keep container alive after last result
|
||||
export const MAX_CONCURRENT_CONTAINERS = Math.max(1, parseInt(process.env.MAX_CONCURRENT_CONTAINERS || '5', 10) || 5);
|
||||
// Per-container resource caps → `docker run --cpus/--memory`. Empty default =
|
||||
// no flag = unbounded (today's behavior). Opt in to bound a fleet sharing one
|
||||
// host: CONTAINER_CPU_LIMIT=2, CONTAINER_MEMORY_LIMIT=8g. Swap is a host concern
|
||||
// (run the host swapless to make --memory a hard cap); not managed here.
|
||||
export const CONTAINER_CPU_LIMIT = process.env.CONTAINER_CPU_LIMIT || '';
|
||||
export const CONTAINER_MEMORY_LIMIT = process.env.CONTAINER_MEMORY_LIMIT || '';
|
||||
|
||||
export const TRIGGER_PATTERN = new RegExp(`^@${ASSISTANT_NAME}\\b`, 'i');
|
||||
```
|
||||
|
||||
+1
-1
@@ -32,7 +32,7 @@
|
||||
"@clack/prompts": "^1.2.0",
|
||||
"@onecli-sh/sdk": "2.2.1",
|
||||
"better-sqlite3": "11.10.0",
|
||||
"chat": "^4.24.0",
|
||||
"chat": "4.29.0",
|
||||
"cron-parser": "5.5.0",
|
||||
"kleur": "^4.1.5"
|
||||
},
|
||||
|
||||
Generated
+14
-5
@@ -21,8 +21,8 @@ importers:
|
||||
specifier: 11.10.0
|
||||
version: 11.10.0
|
||||
chat:
|
||||
specifier: ^4.24.0
|
||||
version: 4.26.0
|
||||
specifier: 4.29.0
|
||||
version: 4.29.0
|
||||
cron-parser:
|
||||
specifier: 5.5.0
|
||||
version: 5.5.0
|
||||
@@ -609,8 +609,17 @@ packages:
|
||||
character-entities@2.0.2:
|
||||
resolution: {integrity: sha512-shx7oQ0Awen/BRIdkjkvz54PnEEI/EjwXDSIZp86/KKdbafHh1Df/RYGBhn4hbe2+uKC9FnT5UCEdyPz3ai9hQ==}
|
||||
|
||||
chat@4.26.0:
|
||||
resolution: {integrity: sha512-QToDnIEGpyb8yQA6YLMHOSRK30YVk4RtsyFyuWFYyB2c4jQlyIrSWtwVK7qyvmvqzQp9uDwCdJRAhS8GtCHAGQ==}
|
||||
chat@4.29.0:
|
||||
resolution: {integrity: sha512-KdPfzaie5ivYytyRICTERg5xT+LeCbYefokvNAqTHe92eqkFaoTMXXkSitikxJVWhZIb2YoXF1b9UZHyzSzKzw==}
|
||||
engines: {node: '>=20'}
|
||||
peerDependencies:
|
||||
ai: ^6.0.182
|
||||
zod: ^3.0.0 || ^4.0.0
|
||||
peerDependenciesMeta:
|
||||
ai:
|
||||
optional: true
|
||||
zod:
|
||||
optional: true
|
||||
|
||||
chownr@1.1.4:
|
||||
resolution: {integrity: sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==}
|
||||
@@ -1963,7 +1972,7 @@ snapshots:
|
||||
|
||||
character-entities@2.0.2: {}
|
||||
|
||||
chat@4.26.0:
|
||||
chat@4.29.0:
|
||||
dependencies:
|
||||
'@workflow/serde': 4.1.0-beta.2
|
||||
mdast-util-to-string: 4.0.0
|
||||
|
||||
@@ -15,7 +15,7 @@ PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
# Keep in sync with .claude/skills/add-discord/SKILL.md.
|
||||
ADAPTER_VERSION="@chat-adapter/discord@4.26.0"
|
||||
ADAPTER_VERSION="@chat-adapter/discord@4.29.0"
|
||||
|
||||
# Resolve which remote carries the channels branch — handles forks where
|
||||
# upstream lives on a different remote than `origin`.
|
||||
|
||||
+1
-1
@@ -15,7 +15,7 @@ PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
# Keep in sync with .claude/skills/add-slack/SKILL.md.
|
||||
ADAPTER_VERSION="@chat-adapter/slack@4.26.0"
|
||||
ADAPTER_VERSION="@chat-adapter/slack@4.29.0"
|
||||
|
||||
# Resolve which remote carries the channels branch — handles forks where
|
||||
# upstream lives on a different remote than `origin`.
|
||||
|
||||
+1
-1
@@ -18,7 +18,7 @@ PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
# Keep in sync with .claude/skills/add-teams/SKILL.md.
|
||||
ADAPTER_VERSION="@chat-adapter/teams@4.26.0"
|
||||
ADAPTER_VERSION="@chat-adapter/teams@4.29.0"
|
||||
|
||||
# Resolve which remote carries the channels branch — handles forks where
|
||||
# upstream lives on a different remote than `origin`.
|
||||
|
||||
@@ -15,7 +15,7 @@ PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
# Keep in sync with .claude/skills/add-telegram/SKILL.md.
|
||||
ADAPTER_VERSION="@chat-adapter/telegram@4.26.0"
|
||||
ADAPTER_VERSION="@chat-adapter/telegram@4.29.0"
|
||||
|
||||
# Resolve which remote carries the channels branch — handles forks where
|
||||
# upstream lives on a different remote than `origin`.
|
||||
|
||||
@@ -37,7 +37,7 @@ if ! grep -q "import './discord.js';" src/channels/index.ts; then
|
||||
fi
|
||||
|
||||
echo "STEP: pnpm-install"
|
||||
pnpm install @chat-adapter/discord@4.26.0
|
||||
pnpm install @chat-adapter/discord@4.29.0
|
||||
|
||||
echo "STEP: pnpm-build"
|
||||
pnpm run build
|
||||
|
||||
@@ -37,7 +37,7 @@ if ! grep -q "import './gchat.js';" src/channels/index.ts; then
|
||||
fi
|
||||
|
||||
echo "STEP: pnpm-install"
|
||||
pnpm install @chat-adapter/gchat@4.26.0
|
||||
pnpm install @chat-adapter/gchat@4.29.0
|
||||
|
||||
echo "STEP: pnpm-build"
|
||||
pnpm run build
|
||||
|
||||
@@ -37,7 +37,7 @@ if ! grep -q "import './github.js';" src/channels/index.ts; then
|
||||
fi
|
||||
|
||||
echo "STEP: pnpm-install"
|
||||
pnpm install @chat-adapter/github@4.26.0
|
||||
pnpm install @chat-adapter/github@4.29.0
|
||||
|
||||
echo "STEP: pnpm-build"
|
||||
pnpm run build
|
||||
|
||||
@@ -86,7 +86,7 @@ if ! grep -q 'if (config.catchAll) {' src/channels/chat-sdk-bridge.ts; then
|
||||
fi
|
||||
|
||||
echo "STEP: pnpm-install"
|
||||
pnpm install @chat-adapter/linear@4.26.0
|
||||
pnpm install @chat-adapter/linear@4.29.0
|
||||
|
||||
echo "STEP: pnpm-build"
|
||||
pnpm run build
|
||||
|
||||
@@ -37,7 +37,7 @@ if ! grep -q "import './slack.js';" src/channels/index.ts; then
|
||||
fi
|
||||
|
||||
echo "STEP: pnpm-install"
|
||||
pnpm install @chat-adapter/slack@4.26.0
|
||||
pnpm install @chat-adapter/slack@4.29.0
|
||||
|
||||
echo "STEP: pnpm-build"
|
||||
pnpm run build
|
||||
|
||||
@@ -37,7 +37,7 @@ if ! grep -q "import './teams.js';" src/channels/index.ts; then
|
||||
fi
|
||||
|
||||
echo "STEP: pnpm-install"
|
||||
pnpm install @chat-adapter/teams@4.26.0
|
||||
pnpm install @chat-adapter/teams@4.29.0
|
||||
|
||||
echo "STEP: pnpm-build"
|
||||
pnpm run build
|
||||
|
||||
@@ -63,7 +63,7 @@ if ! grep -q "'pair-telegram':" setup/index.ts; then
|
||||
fi
|
||||
|
||||
echo "STEP: pnpm-install"
|
||||
pnpm install @chat-adapter/telegram@4.26.0
|
||||
pnpm install @chat-adapter/telegram@4.29.0
|
||||
|
||||
echo "STEP: pnpm-build"
|
||||
pnpm run build
|
||||
|
||||
@@ -37,7 +37,7 @@ if ! grep -q "import './whatsapp-cloud.js';" src/channels/index.ts; then
|
||||
fi
|
||||
|
||||
echo "STEP: pnpm-install"
|
||||
pnpm install @chat-adapter/whatsapp@4.26.0
|
||||
pnpm install @chat-adapter/whatsapp@4.29.0
|
||||
|
||||
echo "STEP: pnpm-build"
|
||||
pnpm run build
|
||||
|
||||
@@ -0,0 +1,138 @@
|
||||
import fs from 'fs';
|
||||
import os from 'os';
|
||||
import path from 'path';
|
||||
|
||||
import { afterEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
import { getLaunchdLabel, getSystemdUnit } from '../src/install-slug.js';
|
||||
import { cleanupUnhealthyPeers } from './peer-cleanup.js';
|
||||
|
||||
// The reaper deletes config files from ~/Library/LaunchAgents (or the systemd
|
||||
// user dir). We point HOME at a throwaway temp dir so real registrations are
|
||||
// never touched, and force os.platform() so the launchd/systemd branch runs
|
||||
// regardless of the host running the suite. The best-effort unload inside the
|
||||
// reaper (launchctl/systemctl) is swallowed when the binary is absent, so these
|
||||
// tests are deterministic on both macOS and Linux CI.
|
||||
|
||||
function tempHome(): string {
|
||||
return fs.mkdtempSync(path.join(os.tmpdir(), 'peer-cleanup-'));
|
||||
}
|
||||
|
||||
function writePlist(filePath: string, target: string): void {
|
||||
fs.writeFileSync(
|
||||
filePath,
|
||||
`<?xml version="1.0" encoding="UTF-8"?>
|
||||
<plist version="1.0"><dict>
|
||||
<key>ProgramArguments</key>
|
||||
<array><string>/usr/bin/node</string><string>${target}</string></array>
|
||||
</dict></plist>`,
|
||||
);
|
||||
}
|
||||
|
||||
function writeUnit(filePath: string, target: string): void {
|
||||
fs.writeFileSync(filePath, `[Service]\nExecStart=/usr/bin/node ${target}\n`);
|
||||
}
|
||||
|
||||
const created: string[] = [];
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
for (const dir of created.splice(0)) {
|
||||
fs.rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
describe('cleanupUnhealthyPeers — dead launchd registrations', () => {
|
||||
function setup(): { home: string; agentsDir: string; projectRoot: string } {
|
||||
const home = tempHome();
|
||||
created.push(home);
|
||||
const agentsDir = path.join(home, 'Library', 'LaunchAgents');
|
||||
fs.mkdirSync(agentsDir, { recursive: true });
|
||||
vi.spyOn(os, 'homedir').mockReturnValue(home);
|
||||
vi.spyOn(os, 'platform').mockReturnValue('darwin');
|
||||
return { home, agentsDir, projectRoot: path.join(home, 'install') };
|
||||
}
|
||||
|
||||
it('removes a plist whose target binary is gone', () => {
|
||||
const { agentsDir, projectRoot } = setup();
|
||||
const dead = path.join(agentsDir, 'com.nanoclaw-v2-dead.plist');
|
||||
writePlist(dead, path.join(agentsDir, 'gone', 'dist', 'index.js'));
|
||||
|
||||
const result = cleanupUnhealthyPeers(projectRoot);
|
||||
|
||||
expect(fs.existsSync(dead)).toBe(false);
|
||||
expect(result.removed.map((r) => r.label)).toContain('com.nanoclaw-v2-dead');
|
||||
});
|
||||
|
||||
it('leaves a plist whose target still exists', () => {
|
||||
const { agentsDir, projectRoot } = setup();
|
||||
const liveTarget = path.join(agentsDir, 'live', 'dist', 'index.js');
|
||||
fs.mkdirSync(path.dirname(liveTarget), { recursive: true });
|
||||
fs.writeFileSync(liveTarget, '// host entry');
|
||||
const live = path.join(agentsDir, 'com.nanoclaw-v2-live.plist');
|
||||
writePlist(live, liveTarget);
|
||||
|
||||
const result = cleanupUnhealthyPeers(projectRoot);
|
||||
|
||||
expect(fs.existsSync(live)).toBe(true);
|
||||
expect(result.removed).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("never reaps this install's own plist, even with a missing target", () => {
|
||||
const { agentsDir, projectRoot } = setup();
|
||||
const ownLabel = getLaunchdLabel(projectRoot);
|
||||
const own = path.join(agentsDir, `${ownLabel}.plist`);
|
||||
writePlist(own, path.join(agentsDir, 'gone', 'dist', 'index.js'));
|
||||
|
||||
const result = cleanupUnhealthyPeers(projectRoot);
|
||||
|
||||
expect(fs.existsSync(own)).toBe(true);
|
||||
expect(result.removed).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('ignores an unrecognized plist (no dist/index.js target)', () => {
|
||||
const { agentsDir, projectRoot } = setup();
|
||||
const weird = path.join(agentsDir, 'com.nanoclaw-v2-weird.plist');
|
||||
fs.writeFileSync(weird, '<plist><dict></dict></plist>');
|
||||
|
||||
const result = cleanupUnhealthyPeers(projectRoot);
|
||||
|
||||
expect(fs.existsSync(weird)).toBe(true);
|
||||
expect(result.removed).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('cleanupUnhealthyPeers — dead systemd registrations', () => {
|
||||
function setup(): { unitDir: string; projectRoot: string } {
|
||||
const home = tempHome();
|
||||
created.push(home);
|
||||
const unitDir = path.join(home, '.config', 'systemd', 'user');
|
||||
fs.mkdirSync(unitDir, { recursive: true });
|
||||
vi.spyOn(os, 'homedir').mockReturnValue(home);
|
||||
vi.spyOn(os, 'platform').mockReturnValue('linux');
|
||||
return { unitDir, projectRoot: path.join(home, 'install') };
|
||||
}
|
||||
|
||||
it('removes a unit whose target binary is gone', () => {
|
||||
const { unitDir, projectRoot } = setup();
|
||||
const dead = path.join(unitDir, 'nanoclaw-v2-dead.service');
|
||||
writeUnit(dead, path.join(unitDir, 'gone', 'dist', 'index.js'));
|
||||
|
||||
const result = cleanupUnhealthyPeers(projectRoot);
|
||||
|
||||
expect(fs.existsSync(dead)).toBe(false);
|
||||
expect(result.removed.map((r) => r.label)).toContain('nanoclaw-v2-dead');
|
||||
});
|
||||
|
||||
it("never reaps this install's own unit", () => {
|
||||
const { unitDir, projectRoot } = setup();
|
||||
const ownUnit = getSystemdUnit(projectRoot);
|
||||
const own = path.join(unitDir, `${ownUnit}.service`);
|
||||
writeUnit(own, path.join(unitDir, 'gone', 'dist', 'index.js'));
|
||||
|
||||
const result = cleanupUnhealthyPeers(projectRoot);
|
||||
|
||||
expect(fs.existsSync(own)).toBe(true);
|
||||
expect(result.removed).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
+112
-3
@@ -11,6 +11,14 @@
|
||||
* - launchd: `state != running` AND `runs > UNHEALTHY_RUNS_THRESHOLD`
|
||||
* - systemd: unit is in `failed` state, OR `activating` with many restarts
|
||||
*
|
||||
* Separately, a peer registration is "dead" when the program it launches no
|
||||
* longer exists on disk — almost always a deleted test checkout or worktree.
|
||||
* The service manager keeps retrying the missing binary forever, and the
|
||||
* health probes can't see it because an unloaded/inactive job doesn't report
|
||||
* via `launchctl print` / `systemctl show`. Deleting an install's folder
|
||||
* without running the uninstaller leaves these behind, so they accumulate. We
|
||||
* unload and delete the orphaned config file outright.
|
||||
*
|
||||
* Healthy peers are left alone — multiple installs can coexist fine now that
|
||||
* container-reaper is label-scoped.
|
||||
*/
|
||||
@@ -35,6 +43,7 @@ export interface PeerStatus {
|
||||
export interface PeerCleanupResult {
|
||||
checked: PeerStatus[];
|
||||
unloaded: PeerStatus[];
|
||||
removed: Array<{ label: string; configPath: string }>;
|
||||
failures: Array<{ label: string; err: string }>;
|
||||
}
|
||||
|
||||
@@ -50,7 +59,39 @@ export function cleanupUnhealthyPeers(projectRoot: string = process.cwd()): Peer
|
||||
if (platform === 'linux') {
|
||||
return cleanupSystemdPeers(projectRoot);
|
||||
}
|
||||
return { checked: [], unloaded: [], failures: [] };
|
||||
return { checked: [], unloaded: [], removed: [], failures: [] };
|
||||
}
|
||||
|
||||
/**
|
||||
* Unload a dead peer's job (best-effort) and delete its orphaned config file.
|
||||
* `unload` runs first and may throw harmlessly when the job isn't loaded or the
|
||||
* service-manager binary is absent (e.g. exercising launchd cleanup on Linux).
|
||||
*/
|
||||
function reapDeadPeer(
|
||||
result: PeerCleanupResult,
|
||||
peer: { label: string; configPath: string },
|
||||
unload: () => void,
|
||||
kind: string,
|
||||
missingTarget: string,
|
||||
): void {
|
||||
try {
|
||||
unload();
|
||||
} catch {
|
||||
/* job not loaded — nothing to unload */
|
||||
}
|
||||
try {
|
||||
fs.rmSync(peer.configPath, { force: true });
|
||||
log.info(`Removed dead peer ${kind}`, {
|
||||
label: peer.label,
|
||||
configPath: peer.configPath,
|
||||
missingTarget,
|
||||
});
|
||||
result.removed.push(peer);
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
log.warn(`Failed to remove dead peer ${kind}`, { label: peer.label, err: message });
|
||||
result.failures.push({ label: peer.label, err: message });
|
||||
}
|
||||
}
|
||||
|
||||
// ---- launchd (macOS) --------------------------------------------------------
|
||||
@@ -58,7 +99,7 @@ export function cleanupUnhealthyPeers(projectRoot: string = process.cwd()): Peer
|
||||
function cleanupLaunchdPeers(projectRoot: string): PeerCleanupResult {
|
||||
const ownLabel = getLaunchdLabel(projectRoot);
|
||||
const agentsDir = path.join(os.homedir(), 'Library', 'LaunchAgents');
|
||||
const result: PeerCleanupResult = { checked: [], unloaded: [], failures: [] };
|
||||
const result: PeerCleanupResult = { checked: [], unloaded: [], removed: [], failures: [] };
|
||||
|
||||
let plists: string[];
|
||||
try {
|
||||
@@ -76,6 +117,20 @@ function cleanupLaunchdPeers(projectRoot: string): PeerCleanupResult {
|
||||
const label = path.basename(plistPath, '.plist');
|
||||
if (label === ownLabel) continue;
|
||||
|
||||
const missingTarget = deadLaunchdTarget(plistPath);
|
||||
if (missingTarget) {
|
||||
reapDeadPeer(
|
||||
result,
|
||||
{ label, configPath: plistPath },
|
||||
// Best-effort unload in case launchd still has it registered; throwing
|
||||
// (not loaded, or launchctl absent off-macOS) is expected and ignored.
|
||||
() => execFileSync('launchctl', ['unload', plistPath], { stdio: 'pipe' }),
|
||||
'launchd plist',
|
||||
missingTarget,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
const status = probeLaunchdPeer(label, plistPath, uid);
|
||||
if (!status) continue;
|
||||
result.checked.push(status);
|
||||
@@ -121,12 +176,32 @@ function probeLaunchdPeer(label: string, plistPath: string, uid: number): PeerSt
|
||||
return { label, configPath: plistPath, state, runs, unhealthy };
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the program path a launchd plist launches when that program no longer
|
||||
* exists on disk (a dead registration), or undefined when the plist is
|
||||
* unreadable, has an unrecognized shape, or its target still exists — in which
|
||||
* case the plist must not be touched.
|
||||
*/
|
||||
function deadLaunchdTarget(plistPath: string): string | undefined {
|
||||
let xml: string;
|
||||
try {
|
||||
xml = fs.readFileSync(plistPath, 'utf-8');
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
// ProgramArguments is [nodePath, "<projectRoot>/dist/index.js"]; the host
|
||||
// entry point is the stable marker to match on.
|
||||
const target = /<string>([^<]*\/dist\/index\.js)<\/string>/.exec(xml)?.[1];
|
||||
if (!target) return undefined;
|
||||
return fs.existsSync(target) ? undefined : target;
|
||||
}
|
||||
|
||||
// ---- systemd (Linux) --------------------------------------------------------
|
||||
|
||||
function cleanupSystemdPeers(projectRoot: string): PeerCleanupResult {
|
||||
const ownUnit = getSystemdUnit(projectRoot);
|
||||
const unitDir = path.join(os.homedir(), '.config', 'systemd', 'user');
|
||||
const result: PeerCleanupResult = { checked: [], unloaded: [], failures: [] };
|
||||
const result: PeerCleanupResult = { checked: [], unloaded: [], removed: [], failures: [] };
|
||||
|
||||
let units: string[];
|
||||
try {
|
||||
@@ -141,6 +216,22 @@ function cleanupSystemdPeers(projectRoot: string): PeerCleanupResult {
|
||||
for (const unit of units) {
|
||||
if (unit === ownUnit) continue;
|
||||
|
||||
const unitPath = path.join(unitDir, `${unit}.service`);
|
||||
const missingTarget = deadSystemdTarget(unitPath);
|
||||
if (missingTarget) {
|
||||
reapDeadPeer(
|
||||
result,
|
||||
{ label: unit, configPath: unitPath },
|
||||
() => {
|
||||
execFileSync('systemctl', ['--user', 'disable', '--now', `${unit}.service`], { stdio: 'pipe' });
|
||||
execFileSync('systemctl', ['--user', 'daemon-reload'], { stdio: 'pipe' });
|
||||
},
|
||||
'systemd unit',
|
||||
missingTarget,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
const status = probeSystemdPeer(unit);
|
||||
if (!status) continue;
|
||||
result.checked.push(status);
|
||||
@@ -184,3 +275,21 @@ function probeSystemdPeer(unit: string): PeerStatus | null {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the program path a systemd unit launches when that program no longer
|
||||
* exists on disk (a dead registration), or undefined when the unit is
|
||||
* unreadable, has an unrecognized shape, or its target still exists.
|
||||
*/
|
||||
function deadSystemdTarget(unitPath: string): string | undefined {
|
||||
let unit: string;
|
||||
try {
|
||||
unit = fs.readFileSync(unitPath, 'utf-8');
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
// ExecStart=<nodePath> <projectRoot>/dist/index.js
|
||||
const target = /^ExecStart=\S+\s+(\S+\/dist\/index\.js)\s*$/m.exec(unit)?.[1];
|
||||
if (!target) return undefined;
|
||||
return fs.existsSync(target) ? undefined : target;
|
||||
}
|
||||
|
||||
@@ -72,6 +72,12 @@ export async function run(_args: string[]): Promise<void> {
|
||||
labels: peerReport.unloaded.map((p) => p.label),
|
||||
});
|
||||
}
|
||||
if (peerReport.removed.length > 0) {
|
||||
log.warn('Removed dead peer NanoClaw registrations (target binary missing)', {
|
||||
count: peerReport.removed.length,
|
||||
labels: peerReport.removed.map((p) => p.label),
|
||||
});
|
||||
}
|
||||
|
||||
if (platform === 'macos') {
|
||||
setupLaunchd(projectRoot, nodePath, homeDir);
|
||||
|
||||
@@ -38,6 +38,11 @@ export const ONECLI_API_KEY = process.env.ONECLI_API_KEY || envConfig.ONECLI_API
|
||||
export const MAX_MESSAGES_PER_PROMPT = Math.max(1, parseInt(process.env.MAX_MESSAGES_PER_PROMPT || '10', 10) || 10);
|
||||
export const IDLE_TIMEOUT = parseInt(process.env.IDLE_TIMEOUT || '1800000', 10); // 30min default — how long to keep container alive after last result
|
||||
export const MAX_CONCURRENT_CONTAINERS = Math.max(1, parseInt(process.env.MAX_CONCURRENT_CONTAINERS || '5', 10) || 5);
|
||||
// Per-container resource caps, passed through to `docker run`. Default empty =
|
||||
// no flag added = today's unbounded behavior (don't OOM existing OSS workloads).
|
||||
// Operators opt in: CONTAINER_CPU_LIMIT=2, CONTAINER_MEMORY_LIMIT=8g.
|
||||
export const CONTAINER_CPU_LIMIT = process.env.CONTAINER_CPU_LIMIT || '';
|
||||
export const CONTAINER_MEMORY_LIMIT = process.env.CONTAINER_MEMORY_LIMIT || '';
|
||||
|
||||
function escapeRegex(str: string): string {
|
||||
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
|
||||
@@ -47,6 +47,37 @@ describe('buildContainerArgs ordering invariant (structural)', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('per-container resource limits (structural)', () => {
|
||||
// CONTAINER_CPU_LIMIT / CONTAINER_MEMORY_LIMIT pass through to `docker run` as
|
||||
// --cpus / --memory, but only when set. The default is empty string → no flag →
|
||||
// today's unbounded behavior (don't OOM existing OSS workloads). Swap is not
|
||||
// managed here (a swapless host makes --memory a hard cap). buildContainerArgs
|
||||
// needs a live gateway to drive, so guard the wiring structurally: the flags
|
||||
// must be pushed, and each must be guarded by its env knob so empty emits nothing.
|
||||
it('reads both limit knobs from config', () => {
|
||||
const src = fs.readFileSync(path.join(process.cwd(), 'src', 'container-runner.ts'), 'utf-8');
|
||||
expect(src).toContain('CONTAINER_CPU_LIMIT');
|
||||
expect(src).toContain('CONTAINER_MEMORY_LIMIT');
|
||||
});
|
||||
|
||||
it('guards --cpus behind a truthy CONTAINER_CPU_LIMIT', () => {
|
||||
const src = fs.readFileSync(path.join(process.cwd(), 'src', 'container-runner.ts'), 'utf-8');
|
||||
expect(src).toMatch(/if \(CONTAINER_CPU_LIMIT\)[\s\S]*?args\.push\('--cpus', CONTAINER_CPU_LIMIT\)/);
|
||||
});
|
||||
|
||||
it('guards --memory behind a truthy CONTAINER_MEMORY_LIMIT (and sets no swap flag)', () => {
|
||||
const src = fs.readFileSync(path.join(process.cwd(), 'src', 'container-runner.ts'), 'utf-8');
|
||||
expect(src).toMatch(/if \(CONTAINER_MEMORY_LIMIT\) args\.push\('--memory', CONTAINER_MEMORY_LIMIT\)/);
|
||||
expect(src).not.toContain('--memory-swap');
|
||||
});
|
||||
|
||||
it('defaults both knobs to empty string in config (no flag = unbounded)', () => {
|
||||
const cfg = fs.readFileSync(path.join(process.cwd(), 'src', 'config.ts'), 'utf-8');
|
||||
expect(cfg).toContain("CONTAINER_CPU_LIMIT = process.env.CONTAINER_CPU_LIMIT || ''");
|
||||
expect(cfg).toContain("CONTAINER_MEMORY_LIMIT = process.env.CONTAINER_MEMORY_LIMIT || ''");
|
||||
});
|
||||
});
|
||||
|
||||
describe('container boot-failure tripwire (structural)', () => {
|
||||
// A container that dies at boot (unknown provider, missing CLI binary, bad
|
||||
// config) explains itself only on stderr — which logs at debug, below the
|
||||
|
||||
@@ -10,9 +10,11 @@ import path from 'path';
|
||||
import { OneCLI } from '@onecli-sh/sdk';
|
||||
|
||||
import {
|
||||
CONTAINER_CPU_LIMIT,
|
||||
CONTAINER_IMAGE,
|
||||
CONTAINER_IMAGE_BASE,
|
||||
CONTAINER_INSTALL_LABEL,
|
||||
CONTAINER_MEMORY_LIMIT,
|
||||
DATA_DIR,
|
||||
GROUPS_DIR,
|
||||
ONECLI_API_KEY,
|
||||
@@ -434,6 +436,13 @@ async function buildContainerArgs(
|
||||
): Promise<string[]> {
|
||||
const args: string[] = ['run', '--rm', '--name', containerName, '--label', CONTAINER_INSTALL_LABEL];
|
||||
|
||||
// Per-container resource caps (opt-in; empty = unbounded, today's behavior).
|
||||
// Only --memory is set. Whether that's a hard cap depends on the host having no
|
||||
// swap (a deployment concern) — on a swapless host --memory is hard and a runaway
|
||||
// is OOM-killed; we don't manage swap from here.
|
||||
if (CONTAINER_CPU_LIMIT) args.push('--cpus', CONTAINER_CPU_LIMIT);
|
||||
if (CONTAINER_MEMORY_LIMIT) args.push('--memory', CONTAINER_MEMORY_LIMIT);
|
||||
|
||||
// Environment — only vars read by code we don't own.
|
||||
// Everything NanoClaw-specific is in container.json (read by runner at startup).
|
||||
args.push('-e', `TZ=${TIMEZONE}`);
|
||||
|
||||
Reference in New Issue
Block a user