mirror of
https://github.com/qwibitai/nanoclaw.git
synced 2026-06-15 18:21:47 +08:00
Compare commits
27 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 904871aaa7 | |||
| 5e76b9d7e8 | |||
| b429ab37b8 | |||
| 09ddde33e1 | |||
| c0c46c14d6 | |||
| c993527e25 | |||
| 0367dbb6f0 | |||
| c7a7a709ed | |||
| 7c8d220115 | |||
| 2d2c3204bc | |||
| 4836cb59df | |||
| c20213a133 | |||
| d9ed98fd65 | |||
| 5ae1c33fff | |||
| af542adad5 | |||
| 894b154e41 | |||
| 831ef88f16 | |||
| d05923f274 | |||
| aba618215d | |||
| 138c277fae | |||
| 1e7cb8b8c8 | |||
| eb0055a0b0 | |||
| ef6ea87628 | |||
| b29df213ad | |||
| dd53875574 | |||
| a1ce73c376 | |||
| 48e4172899 |
@@ -1,161 +0,0 @@
|
||||
---
|
||||
name: add-codex
|
||||
description: Use Codex (CLI + AppServer) as the full agent provider — planning, tool orchestration, native compaction, MCP tools, session resume — in place of the Claude Agent SDK. ChatGPT subscription or OPENAI_API_KEY. Per-group via agent_provider. Distinct from using OpenAI as an MCP tool (where Claude remains the planner).
|
||||
---
|
||||
|
||||
# Codex agent provider
|
||||
|
||||
NanoClaw runs agents in a long-lived **poll loop** inside the container. The backend is selected with **`AGENT_PROVIDER`** (`claude` | `opencode` | `codex` | `mock`).
|
||||
|
||||
Trunk ships with only the `claude` provider baked in. This skill copies the Codex provider files in from the `providers` branch, wires them into the host and container barrels, updates the Dockerfile to install the Codex CLI, and rebuilds the image.
|
||||
|
||||
The Codex provider runs `codex app-server` as a child process and speaks JSON-RPC over stdio. That gives it native session resume, streaming events, MCP tool access, and `thread/compact/start` compaction — same feature bar as the Claude Agent SDK, without the Anthropic-only lock-in.
|
||||
|
||||
## Install
|
||||
|
||||
### Pre-flight
|
||||
|
||||
If all of the following are already present, skip to **Configuration**:
|
||||
|
||||
- `src/providers/codex.ts`
|
||||
- `container/agent-runner/src/providers/codex.ts`
|
||||
- `container/agent-runner/src/providers/codex-app-server.ts`
|
||||
- `container/agent-runner/src/providers/codex.factory.test.ts`
|
||||
- `import './codex.js';` line in `src/providers/index.ts`
|
||||
- `import './codex.js';` line in `container/agent-runner/src/providers/index.ts`
|
||||
- `ARG CODEX_VERSION` and `"@openai/codex@${CODEX_VERSION}"` in the pnpm global-install block in `container/Dockerfile`
|
||||
|
||||
Missing pieces — continue below. All steps are idempotent; re-running is safe.
|
||||
|
||||
### 1. Fetch the providers branch
|
||||
|
||||
```bash
|
||||
git fetch origin providers
|
||||
```
|
||||
|
||||
### 2. Copy the Codex source files
|
||||
|
||||
Wholesale copies (owned entirely by this skill — user edits to these files won't survive a re-run, as designed):
|
||||
|
||||
```bash
|
||||
git show origin/providers:src/providers/codex.ts > src/providers/codex.ts
|
||||
git show origin/providers:container/agent-runner/src/providers/codex.ts > container/agent-runner/src/providers/codex.ts
|
||||
git show origin/providers:container/agent-runner/src/providers/codex-app-server.ts > container/agent-runner/src/providers/codex-app-server.ts
|
||||
git show origin/providers:container/agent-runner/src/providers/codex.factory.test.ts > container/agent-runner/src/providers/codex.factory.test.ts
|
||||
```
|
||||
|
||||
### 3. Append the self-registration imports
|
||||
|
||||
Each barrel gets one line — alphabetical placement keeps diffs small.
|
||||
|
||||
`src/providers/index.ts`:
|
||||
|
||||
```typescript
|
||||
import './codex.js';
|
||||
```
|
||||
|
||||
`container/agent-runner/src/providers/index.ts`:
|
||||
|
||||
```typescript
|
||||
import './codex.js';
|
||||
```
|
||||
|
||||
### 4. Add the Codex CLI to the container Dockerfile
|
||||
|
||||
Two edits to `container/Dockerfile`, both idempotent (skip if already present):
|
||||
|
||||
**(a)** In the "Pin CLI versions" ARG block (around line 18), add after `ARG CLAUDE_CODE_VERSION=...`:
|
||||
|
||||
```dockerfile
|
||||
ARG CODEX_VERSION=0.124.0
|
||||
```
|
||||
|
||||
**(b)** Add a new standalone `RUN` block for the Codex CLI, after the existing per-CLI install blocks (around line 106, right after the `@anthropic-ai/claude-code` block). The Dockerfile splits each global CLI into its own layer for cache granularity — keep that pattern; do not collapse them into a single combined `pnpm install -g` call:
|
||||
|
||||
```dockerfile
|
||||
RUN --mount=type=cache,target=/root/.cache/pnpm \
|
||||
pnpm install -g "@openai/codex@${CODEX_VERSION}"
|
||||
```
|
||||
|
||||
Note: **no agent-runner package dependency** — Codex is a CLI binary, not a library. Unlike OpenCode, there's nothing to add to `container/agent-runner/package.json`.
|
||||
|
||||
### 5. Build
|
||||
|
||||
```bash
|
||||
pnpm run build # host
|
||||
pnpm exec tsc -p container/agent-runner/tsconfig.json --noEmit # container typecheck
|
||||
./container/build.sh # agent image
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
Codex supports two primary auth paths and one experimental BYO-endpoint path. Pick the one that matches your setup.
|
||||
|
||||
### Option A — ChatGPT subscription (recommended for individuals)
|
||||
|
||||
On the host (not inside the container), run Codex's OAuth login:
|
||||
|
||||
```bash
|
||||
codex login
|
||||
```
|
||||
|
||||
This writes `~/.codex/auth.json` with a subscription token. The host-side Codex provider ([src/providers/codex.ts](../../../src/providers/codex.ts)) copies `auth.json` into a per-session `~/.codex` directory mounted into the container — your host's own Codex CLI is never touched.
|
||||
|
||||
No `.env` variables required for this mode.
|
||||
|
||||
### Option B — API key (recommended for CI or API billing)
|
||||
|
||||
```env
|
||||
OPENAI_API_KEY=sk-...
|
||||
CODEX_MODEL=gpt-5.4-mini
|
||||
```
|
||||
|
||||
The host forwards both variables into the container. If both subscription (`auth.json`) and `OPENAI_API_KEY` are present, Codex prefers the subscription.
|
||||
|
||||
### Option C — BYO OpenAI-compatible endpoint (experimental)
|
||||
|
||||
Codex's built-in `openai` provider honors the `OPENAI_BASE_URL` env var directly. Point it at any OpenAI-compatible endpoint — Groq, Together, self-hosted vLLM, an OpenAI proxy, etc.
|
||||
|
||||
```env
|
||||
OPENAI_API_KEY=...
|
||||
OPENAI_BASE_URL=https://api.groq.com/openai/v1
|
||||
CODEX_MODEL=llama-3.3-70b-versatile
|
||||
```
|
||||
|
||||
Codex also ships first-class local-runner flags — `codex --oss --local-provider ollama` or `--local-provider lmstudio` — that auto-detect a local server. To use those inside NanoClaw, set `CODEX_MODEL` to a model your local runner serves and add the corresponding base URL; see the Codex CLI docs for the full `model_provider = oss` configuration.
|
||||
|
||||
**Experimental caveat:** tool-calling quality depends on the model and endpoint. Not every OpenAI-compat provider implements the full function-calling spec, and smaller models (< 30B) often struggle with multi-step tool orchestration. Test before committing.
|
||||
|
||||
### Per group / per session
|
||||
|
||||
Set `"provider": "codex"` in the group's **`container.json`** (`groups/<folder>/container.json`) — the in-container runner reads `provider` from there, not from the DB. The DB columns **`agent_groups.agent_provider`** and **`sessions.agent_provider`** (session overrides group) only drive host-side provider contribution — per-session `~/.codex` mount, `OPENAI_*` / `CODEX_MODEL` env passthrough — and do not propagate into `container.json` at spawn time. Set both, or just edit `container.json`; if they disagree, the runner uses `container.json` and the host-side resolver falls back through session → group → `container.json` → `'claude'`.
|
||||
|
||||
`CODEX_MODEL` applies process-wide via `.env`; if you need different models for different groups, set them via `container_config.env` on the group.
|
||||
|
||||
Extra MCP servers still come from **`NANOCLAW_MCP_SERVERS`** / `container_config.mcpServers` on the host. The runner merges them into the same `mcpServers` object passed to all providers.
|
||||
|
||||
## Operational notes
|
||||
|
||||
- **Spawn-per-query:** Codex's app-server is spawned fresh per query invocation, matching the OpenCode pattern. No long-lived daemon to keep healthy across sessions.
|
||||
- **Per-session `~/.codex` isolation:** each group gets its own copy of the host's `auth.json`. The container can rewrite `config.toml` freely on every wake without touching the host's Codex config.
|
||||
- **Native compaction:** kicks in automatically at 40K cumulative input tokens between turns, via `thread/compact/start`. If compaction fails, the provider logs and continues uncompacted — no fatal error.
|
||||
- **Approvals:** auto-accepted inside the container (the container is the sandbox; same posture as Claude/OpenCode).
|
||||
- **Mid-turn input:** Codex turns don't accept mid-turn messages. Follow-up `push()` calls queue and drain between turns, matching the OpenCode pattern. The poll-loop only pushes between turns anyway, so no messages are dropped.
|
||||
- **Stale thread recovery:** `isSessionInvalid` matches on stale-thread-ID errors (`thread not found`, `unknown thread`, etc.) so a cold-started app-server can recover cleanly when it sees a stored continuation it no longer has.
|
||||
|
||||
## Verify
|
||||
|
||||
```bash
|
||||
grep -q "./codex.js" container/agent-runner/src/providers/index.ts && echo "container barrel: OK"
|
||||
grep -q "./codex.js" src/providers/index.ts && echo "host barrel: OK"
|
||||
grep -q "@openai/codex@" container/Dockerfile && echo "Dockerfile install: OK"
|
||||
cd container/agent-runner && bun test src/providers/codex.factory.test.ts && cd -
|
||||
```
|
||||
|
||||
After the image rebuild, set `agent_provider = 'codex'` on a test group and send a message. Successful round-trip looks like:
|
||||
|
||||
- `init` event with a stable thread ID as continuation
|
||||
- One or more `activity` / `progress` events during the turn
|
||||
- `result` event with the model's reply
|
||||
|
||||
If the agent hangs or errors, check `~/.codex/auth.json` exists on the host (Option A) or that `OPENAI_API_KEY` is forwarding correctly (Option B) — `docker exec` into a running container and `env | grep -i openai` to confirm.
|
||||
@@ -0,0 +1,15 @@
|
||||
You are a NanoClaw agent. Your name, destinations, and message-sending rules are provided in the runtime system prompt at the top of each turn.
|
||||
|
||||
## Communication
|
||||
|
||||
Be concise. Prefer outcomes over play-by-play; when the work is done, the final message should be about the result.
|
||||
|
||||
When you produce a file for the user in the workspace — a document, export, or asset — deliver it with `send_file` in the same turn; announcing without sending is an unfinished reply.
|
||||
|
||||
## Workspace
|
||||
|
||||
Files you create are saved in `/workspace/agent/`. Use this for notes, research, artifacts, and anything that should persist across turns in this group.
|
||||
|
||||
## Conversation History
|
||||
|
||||
The `conversations/` folder holds searchable past conversation transcripts or exchange archives for this group. Use it to recall prior context when a request references something that happened before.
|
||||
@@ -20,6 +20,7 @@ ARG INSTALL_CJK_FONTS=false
|
||||
# mean every rebuild silently picks up the latest and can break in lockstep
|
||||
# across all users.
|
||||
ARG CLAUDE_CODE_VERSION=2.1.116
|
||||
ARG CODEX_VERSION=0.138.0
|
||||
ARG AGENT_BROWSER_VERSION=latest
|
||||
ARG VERCEL_VERSION=latest
|
||||
ARG BUN_VERSION=1.3.12
|
||||
@@ -101,6 +102,9 @@ RUN --mount=type=cache,target=/root/.cache/pnpm \
|
||||
RUN --mount=type=cache,target=/root/.cache/pnpm \
|
||||
pnpm install -g "agent-browser@${AGENT_BROWSER_VERSION}"
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pnpm \
|
||||
pnpm install -g "@openai/codex@${CODEX_VERSION}"
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pnpm \
|
||||
pnpm install -g "@anthropic-ai/claude-code@${CLAUDE_CODE_VERSION}"
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
"dependencies": {
|
||||
"@anthropic-ai/claude-agent-sdk": "^0.2.116",
|
||||
"@modelcontextprotocol/sdk": "^1.12.1",
|
||||
"@opencode-ai/sdk": "^1.4.3",
|
||||
"cron-parser": "^5.0.0",
|
||||
"zod": "^4.0.0",
|
||||
},
|
||||
@@ -44,6 +45,8 @@
|
||||
|
||||
"@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.29.0", "", { "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.2.1", "express-rate-limit": "^8.2.1", "hono": "^4.11.4", "jose": "^6.1.3", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.1" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ=="],
|
||||
|
||||
"@opencode-ai/sdk": ["@opencode-ai/sdk@1.4.11", "", { "dependencies": { "cross-spawn": "7.0.6" } }, "sha512-EJxSfc7D/dda/vrw8zQe4g7yVTxERktvb5SvIBlGBnKYQJGOgo9RyA/1EL3l208rHeo6jm1sdrAF0E6o/k94ug=="],
|
||||
|
||||
"@types/bun": ["@types/bun@1.3.12", "", { "dependencies": { "bun-types": "1.3.12" } }, "sha512-DBv81elK+/VSwXHDlnH3Qduw+KxkTIWi7TXkAeh24zpi5l0B2kUg9Ga3tb4nJaPcOFswflgi/yAvMVBPrxMB+A=="],
|
||||
|
||||
"@types/node": ["@types/node@22.19.17", "", { "dependencies": { "undici-types": "~6.21.0" } }, "sha512-wGdMcf+vPYM6jikpS/qhg6WiqSV/OhG+jeeHT/KlVqxYfD40iYJf9/AE1uQxVWFvU7MipKRkRv8NSHiCGgPr8Q=="],
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
"dependencies": {
|
||||
"@anthropic-ai/claude-agent-sdk": "^0.2.116",
|
||||
"@modelcontextprotocol/sdk": "^1.12.1",
|
||||
"@opencode-ai/sdk": "^1.4.3",
|
||||
"cron-parser": "^5.0.0",
|
||||
"zod": "^4.0.0"
|
||||
},
|
||||
|
||||
@@ -0,0 +1,162 @@
|
||||
import { describe, expect, it, afterEach } from 'bun:test';
|
||||
import fs from 'fs';
|
||||
import os from 'os';
|
||||
import path from 'path';
|
||||
|
||||
import {
|
||||
type AppServer,
|
||||
attachCodexAutoApproval,
|
||||
buildCodexProcessEnv,
|
||||
tomlBasicString,
|
||||
writeCodexConfigToml,
|
||||
} from './codex-app-server.js';
|
||||
|
||||
let tmpHome: string | null = null;
|
||||
const originalHome = process.env.HOME;
|
||||
|
||||
afterEach(() => {
|
||||
process.env.HOME = originalHome;
|
||||
if (tmpHome) {
|
||||
fs.rmSync(tmpHome, { recursive: true, force: true });
|
||||
tmpHome = null;
|
||||
}
|
||||
});
|
||||
|
||||
describe('Codex config TOML', () => {
|
||||
it('escapes basic strings', () => {
|
||||
expect(tomlBasicString('a "quoted" \\\\ value')).toBe('"a \\"quoted\\" \\\\\\\\ value"');
|
||||
});
|
||||
|
||||
it('rejects newlines', () => {
|
||||
expect(() => tomlBasicString('bad\nvalue')).toThrow(/newline/);
|
||||
});
|
||||
|
||||
it('hardcodes danger-full-access + never and writes model, effort, and MCP servers', () => {
|
||||
tmpHome = fs.mkdtempSync(path.join(os.tmpdir(), 'codex-home-'));
|
||||
process.env.HOME = tmpHome;
|
||||
|
||||
writeCodexConfigToml(
|
||||
{
|
||||
nanoclaw: {
|
||||
command: 'bun',
|
||||
args: ['run', '/app/src/mcp-tools/index.ts'],
|
||||
env: { FOO: 'bar' },
|
||||
},
|
||||
},
|
||||
{ model: 'gpt-5', effort: 'medium' },
|
||||
);
|
||||
|
||||
const content = fs.readFileSync(path.join(tmpHome, '.codex', 'config.toml'), 'utf-8');
|
||||
expect(content).toContain('sandbox_mode = "danger-full-access"');
|
||||
expect(content).toContain('approval_policy = "never"');
|
||||
expect(content).toContain('project_doc_max_bytes = 32768');
|
||||
expect(content).toContain('model = "gpt-5"');
|
||||
expect(content).toContain('model_reasoning_effort = "medium"');
|
||||
expect(content).not.toContain('[sandbox_workspace_write]');
|
||||
expect(content).not.toContain('writable_roots =');
|
||||
expect(content).toContain('[mcp_servers.nanoclaw]');
|
||||
expect(content).toContain('command = "bun"');
|
||||
expect(content).toContain('args = ["run", "/app/src/mcp-tools/index.ts"]');
|
||||
expect(content).toContain('[mcp_servers.nanoclaw.env]');
|
||||
expect(content).toContain('FOO = "bar"');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Codex auto-approval', () => {
|
||||
// NanoClaw (container isolation + OneCLI) is the boundary, so the handler accepts
|
||||
// every request unconditionally — even paths/commands a sandbox policy would refuse.
|
||||
it('grants full filesystem + network for permission requests', () => {
|
||||
const { server, writes } = fakeServer();
|
||||
attachCodexAutoApproval(server);
|
||||
|
||||
server.serverRequestHandlers[0]({
|
||||
id: 1,
|
||||
method: 'item/permissions/requestApproval',
|
||||
params: { permissions: { fileSystem: { read: ['/workspace/agent'], write: ['/workspace/agent'] } } },
|
||||
});
|
||||
|
||||
const result = JSON.parse(writes[0]).result as {
|
||||
permissions: { fileSystem: { read: string[]; write: string[] }; network: { enabled: boolean } };
|
||||
scope: string;
|
||||
};
|
||||
expect(result.scope).toBe('turn');
|
||||
expect(result.permissions.fileSystem.read).toEqual(['/']);
|
||||
expect(result.permissions.fileSystem.write).toEqual(['/']);
|
||||
expect(result.permissions.network.enabled).toBe(true);
|
||||
});
|
||||
|
||||
it('accepts file-change and command-exec approvals regardless of path', () => {
|
||||
const { server, writes } = fakeServer();
|
||||
attachCodexAutoApproval(server);
|
||||
|
||||
server.serverRequestHandlers[0]({ id: 2, method: 'item/fileChange/requestApproval', params: { grantRoot: '/etc' } });
|
||||
server.serverRequestHandlers[0]({
|
||||
id: 3,
|
||||
method: 'item/commandExecution/requestApproval',
|
||||
params: { command: 'rm -rf /', cwd: '/' },
|
||||
});
|
||||
|
||||
expect(JSON.parse(writes[0]).result).toEqual({ decision: 'accept' });
|
||||
expect(JSON.parse(writes[1]).result).toEqual({ decision: 'accept' });
|
||||
});
|
||||
|
||||
it('approves legacy patch and command-exec approvals regardless of path', () => {
|
||||
const { server, writes } = fakeServer();
|
||||
attachCodexAutoApproval(server);
|
||||
|
||||
server.serverRequestHandlers[0]({
|
||||
id: 4,
|
||||
method: 'applyPatchApproval',
|
||||
params: { fileChanges: { '/etc/passwd': {} } },
|
||||
});
|
||||
server.serverRequestHandlers[0]({ id: 5, method: 'execCommandApproval', params: { command: 'rm -rf /', cwd: '/' } });
|
||||
|
||||
expect(JSON.parse(writes[0]).result).toEqual({ decision: 'approved' });
|
||||
expect(JSON.parse(writes[1]).result).toEqual({ decision: 'approved' });
|
||||
});
|
||||
|
||||
it('fails closed for unknown server requests', () => {
|
||||
const { server, writes } = fakeServer();
|
||||
attachCodexAutoApproval(server);
|
||||
|
||||
server.serverRequestHandlers[0]({ id: 6, method: 'new/unknown/request' });
|
||||
|
||||
const response = JSON.parse(writes[0]);
|
||||
expect(response.error.message).toContain('Unhandled Codex app-server request');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Codex process env', () => {
|
||||
it('forwards proxy/runtime env without leaking secret-like host env', () => {
|
||||
const env = buildCodexProcessEnv({
|
||||
PATH: '/bin',
|
||||
HOME: '/home/node',
|
||||
CODEX_HOME: '/home/node/.codex',
|
||||
HTTPS_PROXY: 'http://proxy',
|
||||
OPENAI_API_KEY: 'sk-test',
|
||||
ONECLI_API_KEY: 'onecli-secret',
|
||||
SOME_TOKEN: 'token',
|
||||
});
|
||||
|
||||
expect(env.PATH).toBe('/bin');
|
||||
expect(env.HOME).toBe('/home/node');
|
||||
expect(env.CODEX_HOME).toBe('/home/node/.codex');
|
||||
expect(env.HTTPS_PROXY).toBe('http://proxy');
|
||||
expect(env.OPENAI_API_KEY).toBeUndefined();
|
||||
expect(env.ONECLI_API_KEY).toBeUndefined();
|
||||
expect(env.SOME_TOKEN).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
function fakeServer(): { server: AppServer; writes: string[] } {
|
||||
const writes: string[] = [];
|
||||
const server = {
|
||||
process: { stdin: { write: (line: string) => writes.push(line) } },
|
||||
readline: { close: () => {} },
|
||||
pending: new Map(),
|
||||
notificationHandlers: [],
|
||||
exitHandlers: [],
|
||||
serverRequestHandlers: [],
|
||||
} as unknown as AppServer;
|
||||
return { server, writes };
|
||||
}
|
||||
@@ -0,0 +1,441 @@
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { spawn, type ChildProcess } from 'child_process';
|
||||
import { createInterface, type Interface as ReadlineInterface } from 'readline';
|
||||
|
||||
// Cap Codex's project-doc loading (AGENTS.md). The host-side composer
|
||||
// (src/providers/codex-agents-md.ts) enforces the same cap at compose time —
|
||||
// host and container share no modules, so the constant lives in both.
|
||||
const CODEX_PROJECT_DOC_MAX_BYTES = 32 * 1024;
|
||||
|
||||
function log(msg: string): void {
|
||||
console.error(`[codex-app-server] ${msg}`);
|
||||
}
|
||||
|
||||
const INIT_TIMEOUT_MS = 30_000;
|
||||
|
||||
export const STALE_THREAD_RE = /thread\s+not\s+found|unknown\s+thread|thread[_\s]id|no such thread/i;
|
||||
|
||||
let nextRequestId = 1;
|
||||
|
||||
export interface JsonRpcResponse {
|
||||
id: number | string;
|
||||
result?: unknown;
|
||||
error?: { code: number; message: string; data?: unknown };
|
||||
}
|
||||
|
||||
export interface JsonRpcNotification {
|
||||
method: string;
|
||||
params?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface JsonRpcServerRequest {
|
||||
id: number | string;
|
||||
method: string;
|
||||
params?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
type JsonRpcMessage = JsonRpcResponse | JsonRpcNotification | JsonRpcServerRequest;
|
||||
|
||||
export interface AppServer {
|
||||
process: ChildProcess;
|
||||
readline: ReadlineInterface;
|
||||
pending: Map<number | string, { resolve: (r: JsonRpcResponse) => void; reject: (e: Error) => void }>;
|
||||
notificationHandlers: Array<(n: JsonRpcNotification) => void>;
|
||||
serverRequestHandlers: Array<(r: JsonRpcServerRequest) => void>;
|
||||
/**
|
||||
* Fired when the app-server process dies (exit or spawn error). Pending
|
||||
* request/response pairs are rejected separately via failPending — but a
|
||||
* turn in flight has NO pending request (turn/start already resolved); it
|
||||
* is parked on a notification waker that a dead process will never kick.
|
||||
* Without these handlers a mid-turn crash surfaces as a 10-minute turn
|
||||
* timeout instead of the real exit code, after the --rm container has
|
||||
* already taken the server's stderr with it.
|
||||
*/
|
||||
exitHandlers: Array<(err: Error) => void>;
|
||||
}
|
||||
|
||||
export interface CodexMcpServer {
|
||||
command: string;
|
||||
args?: string[];
|
||||
env?: Record<string, string>;
|
||||
}
|
||||
|
||||
export type CodexReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';
|
||||
|
||||
// Codex runs unrestricted inside the container. NanoClaw's container isolation and
|
||||
// the OneCLI allow-list are the security boundary — not Codex's own sandbox/approval
|
||||
// primitives (which can't run here anyway: workspace-write/read-only need user
|
||||
// namespaces, which the agent containers deny). Both are hardcoded as instance-level
|
||||
// defaults in config.toml; threads and turns inherit them, never override them.
|
||||
const CODEX_SANDBOX_MODE = 'danger-full-access';
|
||||
const CODEX_APPROVAL_POLICY = 'never';
|
||||
|
||||
const CODEX_ENV_ALLOWLIST = new Set([
|
||||
'ALL_PROXY',
|
||||
'CURL_CA_BUNDLE',
|
||||
'GIT_SSL_CAINFO',
|
||||
'HOME',
|
||||
'HTTP_PROXY',
|
||||
'HTTPS_PROXY',
|
||||
'LANG',
|
||||
'LC_ALL',
|
||||
'NODE_EXTRA_CA_CERTS',
|
||||
'NO_PROXY',
|
||||
'PATH',
|
||||
'PNPM_HOME',
|
||||
'REQUESTS_CA_BUNDLE',
|
||||
'SSL_CERT_DIR',
|
||||
'SSL_CERT_FILE',
|
||||
'TEMP',
|
||||
'TERM',
|
||||
'TMP',
|
||||
'TMPDIR',
|
||||
'TZ',
|
||||
'USER',
|
||||
'all_proxy',
|
||||
'http_proxy',
|
||||
'https_proxy',
|
||||
'no_proxy',
|
||||
'CODEX_HOME',
|
||||
]);
|
||||
|
||||
export interface ThreadParams {
|
||||
model?: string;
|
||||
cwd: string;
|
||||
baseInstructions?: string;
|
||||
developerInstructions?: string;
|
||||
}
|
||||
|
||||
export interface TurnParams {
|
||||
threadId: string;
|
||||
inputText: string;
|
||||
model?: string;
|
||||
effort?: string;
|
||||
cwd?: string;
|
||||
}
|
||||
|
||||
export function spawnCodexAppServer(): AppServer {
|
||||
const args = ['app-server', '--listen', 'stdio://'];
|
||||
log(`Spawning: codex ${args.join(' ')}`);
|
||||
|
||||
const proc = spawn('codex', args, {
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
env: buildCodexProcessEnv(process.env),
|
||||
});
|
||||
const rl = createInterface({ input: proc.stdout! });
|
||||
|
||||
const server: AppServer = {
|
||||
process: proc,
|
||||
readline: rl,
|
||||
pending: new Map(),
|
||||
notificationHandlers: [],
|
||||
exitHandlers: [],
|
||||
serverRequestHandlers: [],
|
||||
};
|
||||
|
||||
proc.stderr?.on('data', (chunk: Buffer) => {
|
||||
const text = chunk.toString().trim();
|
||||
if (text) log(`[stderr] ${text}`);
|
||||
});
|
||||
|
||||
rl.on('line', (line: string) => {
|
||||
if (!line.trim()) return;
|
||||
let msg: JsonRpcMessage;
|
||||
try {
|
||||
msg = JSON.parse(line) as JsonRpcMessage;
|
||||
} catch {
|
||||
log(`[parse-error] ${line.slice(0, 200)}`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (isResponse(msg)) {
|
||||
const handler = server.pending.get(msg.id);
|
||||
if (handler) {
|
||||
server.pending.delete(msg.id);
|
||||
handler.resolve(msg);
|
||||
}
|
||||
} else if (isServerRequest(msg)) {
|
||||
for (const h of server.serverRequestHandlers) h(msg);
|
||||
} else if ('method' in msg) {
|
||||
for (const h of server.notificationHandlers) h(msg as JsonRpcNotification);
|
||||
}
|
||||
});
|
||||
|
||||
const failPending = (err: Error): void => {
|
||||
for (const [, handler] of server.pending) handler.reject(err);
|
||||
server.pending.clear();
|
||||
};
|
||||
|
||||
proc.on('error', (err) => {
|
||||
log(`[process-error] ${err.message}`);
|
||||
failPending(err);
|
||||
for (const h of [...server.exitHandlers]) h(err);
|
||||
});
|
||||
|
||||
proc.on('exit', (code, signal) => {
|
||||
log(`[exit] code=${code} signal=${signal}`);
|
||||
const err = new Error(`Codex app-server exited: code=${code} signal=${signal}`);
|
||||
failPending(err);
|
||||
for (const h of [...server.exitHandlers]) h(err);
|
||||
});
|
||||
|
||||
return server;
|
||||
}
|
||||
|
||||
export function sendCodexRequest(
|
||||
server: AppServer,
|
||||
method: string,
|
||||
params?: Record<string, unknown>,
|
||||
timeoutMs = 60_000,
|
||||
): Promise<JsonRpcResponse> {
|
||||
const id = nextRequestId++;
|
||||
const req = params === undefined ? { id, method } : { id, method, params };
|
||||
const line = JSON.stringify(req) + '\n';
|
||||
|
||||
return new Promise<JsonRpcResponse>((resolve, reject) => {
|
||||
const timer = setTimeout(() => {
|
||||
server.pending.delete(id);
|
||||
reject(new Error(`Timeout waiting for ${method} response (${timeoutMs}ms)`));
|
||||
}, timeoutMs);
|
||||
|
||||
server.pending.set(id, {
|
||||
resolve: (r) => {
|
||||
clearTimeout(timer);
|
||||
resolve(r);
|
||||
},
|
||||
reject: (e) => {
|
||||
clearTimeout(timer);
|
||||
reject(e);
|
||||
},
|
||||
});
|
||||
|
||||
try {
|
||||
server.process.stdin!.write(line);
|
||||
} catch (err) {
|
||||
clearTimeout(timer);
|
||||
server.pending.delete(id);
|
||||
reject(err instanceof Error ? err : new Error(String(err)));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
export function sendCodexNotification(server: AppServer, method: string, params?: Record<string, unknown>): void {
|
||||
const line = JSON.stringify(params === undefined ? { method } : { method, params }) + '\n';
|
||||
server.process.stdin!.write(line);
|
||||
}
|
||||
|
||||
export function sendCodexResponse(server: AppServer, id: number | string, result: unknown): void {
|
||||
try {
|
||||
server.process.stdin!.write(JSON.stringify({ id, result }) + '\n');
|
||||
} catch (err) {
|
||||
log(`[send-error] response id=${id}: ${err instanceof Error ? err.message : String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
export function killCodexAppServer(server: AppServer): void {
|
||||
try {
|
||||
server.readline.close();
|
||||
server.process.kill('SIGTERM');
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
|
||||
export async function initializeCodexAppServer(server: AppServer): Promise<void> {
|
||||
const resp = await sendCodexRequest(
|
||||
server,
|
||||
'initialize',
|
||||
{
|
||||
clientInfo: { name: 'nanoclaw', title: 'NanoClaw', version: '2.0' },
|
||||
capabilities: { experimentalApi: true },
|
||||
},
|
||||
INIT_TIMEOUT_MS,
|
||||
);
|
||||
if (resp.error) throw new Error(`initialize failed: ${resp.error.message}`);
|
||||
sendCodexNotification(server, 'initialized');
|
||||
}
|
||||
|
||||
export async function startOrResumeCodexThread(
|
||||
server: AppServer,
|
||||
threadId: string | undefined,
|
||||
params: ThreadParams,
|
||||
): Promise<string> {
|
||||
const baseParams = {
|
||||
model: params.model,
|
||||
cwd: params.cwd,
|
||||
approvalPolicy: CODEX_APPROVAL_POLICY,
|
||||
sandbox: CODEX_SANDBOX_MODE,
|
||||
baseInstructions: params.baseInstructions,
|
||||
developerInstructions: params.developerInstructions,
|
||||
personality: 'friendly',
|
||||
sessionStartSource: 'startup',
|
||||
persistExtendedHistory: false,
|
||||
};
|
||||
|
||||
if (threadId) {
|
||||
const resp = await sendCodexRequest(server, 'thread/resume', {
|
||||
threadId,
|
||||
...baseParams,
|
||||
excludeTurns: true,
|
||||
});
|
||||
if (!resp.error) return threadId;
|
||||
if (!STALE_THREAD_RE.test(resp.error.message)) {
|
||||
throw new Error(`thread/resume failed: ${resp.error.message}`);
|
||||
}
|
||||
log(`Stale thread ${threadId}; starting fresh thread.`);
|
||||
}
|
||||
|
||||
const resp = await sendCodexRequest(server, 'thread/start', {
|
||||
...baseParams,
|
||||
experimentalRawEvents: false,
|
||||
});
|
||||
if (resp.error) throw new Error(`thread/start failed: ${resp.error.message}`);
|
||||
|
||||
const result = resp.result as { thread?: { id?: string } } | undefined;
|
||||
const newThreadId = result?.thread?.id;
|
||||
if (!newThreadId) throw new Error('thread/start response missing thread ID');
|
||||
return newThreadId;
|
||||
}
|
||||
|
||||
export async function startCodexTurn(server: AppServer, params: TurnParams): Promise<string> {
|
||||
const resp = await sendCodexRequest(server, 'turn/start', {
|
||||
threadId: params.threadId,
|
||||
input: [{ type: 'text', text: params.inputText, text_elements: [] }],
|
||||
model: params.model,
|
||||
effort: params.effort,
|
||||
cwd: params.cwd,
|
||||
});
|
||||
if (resp.error) throw new Error(`turn/start failed: ${resp.error.message}`);
|
||||
const result = resp.result as { turn?: { id?: string } } | undefined;
|
||||
const turnId = result?.turn?.id;
|
||||
if (!turnId) throw new Error('turn/start response missing turn ID');
|
||||
return turnId;
|
||||
}
|
||||
|
||||
export async function steerCodexTurn(
|
||||
server: AppServer,
|
||||
threadId: string,
|
||||
turnId: string,
|
||||
inputText: string,
|
||||
): Promise<void> {
|
||||
const resp = await sendCodexRequest(server, 'turn/steer', {
|
||||
threadId,
|
||||
expectedTurnId: turnId,
|
||||
input: [{ type: 'text', text: inputText, text_elements: [] }],
|
||||
});
|
||||
if (resp.error) throw new Error(`turn/steer failed: ${resp.error.message}`);
|
||||
}
|
||||
|
||||
export async function interruptCodexTurn(server: AppServer, threadId: string, turnId: string): Promise<void> {
|
||||
const resp = await sendCodexRequest(server, 'turn/interrupt', { threadId, turnId }, 10_000);
|
||||
if (resp.error) throw new Error(`turn/interrupt failed: ${resp.error.message}`);
|
||||
}
|
||||
|
||||
// With approval_policy=never the command/patch approval requests don't fire, but the
|
||||
// app-server still sends a few non-approval server→client requests (permission
|
||||
// negotiation, MCP elicitations, tool calls) that must be answered or the turn hangs.
|
||||
// NanoClaw is the boundary, so accept/grant everything.
|
||||
export function attachCodexAutoApproval(server: AppServer): void {
|
||||
server.serverRequestHandlers.push((req) => {
|
||||
switch (req.method) {
|
||||
case 'item/commandExecution/requestApproval':
|
||||
case 'item/fileChange/requestApproval':
|
||||
sendCodexResponse(server, req.id, { decision: 'accept' });
|
||||
break;
|
||||
case 'applyPatchApproval':
|
||||
case 'execCommandApproval':
|
||||
sendCodexResponse(server, req.id, { decision: 'approved' });
|
||||
break;
|
||||
case 'item/permissions/requestApproval':
|
||||
sendCodexResponse(server, req.id, {
|
||||
permissions: { fileSystem: { read: ['/'], write: ['/'] }, network: { enabled: true } },
|
||||
scope: 'turn',
|
||||
strictAutoReview: true,
|
||||
});
|
||||
break;
|
||||
case 'item/tool/requestUserInput':
|
||||
sendCodexResponse(server, req.id, { answers: {} });
|
||||
break;
|
||||
case 'mcpServer/elicitation/request':
|
||||
sendCodexResponse(server, req.id, { action: 'cancel', content: null, _meta: null });
|
||||
break;
|
||||
case 'item/tool/call':
|
||||
sendCodexResponse(server, req.id, { success: false, contentItems: [] });
|
||||
break;
|
||||
default:
|
||||
sendCodexError(server, req.id, `Unhandled Codex app-server request: ${req.method}`);
|
||||
break;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
export function writeCodexConfigToml(
|
||||
servers: Record<string, CodexMcpServer>,
|
||||
opts: { model?: string; effort?: string } = {},
|
||||
): void {
|
||||
const codexConfigDir = path.join(process.env.HOME || '/home/node', '.codex');
|
||||
fs.mkdirSync(codexConfigDir, { recursive: true });
|
||||
const configTomlPath = path.join(codexConfigDir, 'config.toml');
|
||||
|
||||
// Instance-level defaults the app-server reads on startup; threads/turns inherit them.
|
||||
const lines: string[] = [
|
||||
`sandbox_mode = ${tomlBasicString(CODEX_SANDBOX_MODE)}`,
|
||||
`approval_policy = ${tomlBasicString(CODEX_APPROVAL_POLICY)}`,
|
||||
`project_doc_max_bytes = ${CODEX_PROJECT_DOC_MAX_BYTES}`,
|
||||
];
|
||||
if (opts.model) lines.push(`model = ${tomlBasicString(opts.model)}`);
|
||||
if (opts.effort) lines.push(`model_reasoning_effort = ${tomlBasicString(opts.effort)}`);
|
||||
lines.push('');
|
||||
|
||||
for (const [name, config] of Object.entries(servers)) {
|
||||
lines.push(`[mcp_servers.${name}]`);
|
||||
lines.push(`command = ${tomlBasicString(config.command)}`);
|
||||
if (config.args && config.args.length > 0) {
|
||||
lines.push(`args = [${config.args.map(tomlBasicString).join(', ')}]`);
|
||||
}
|
||||
if (config.env && Object.keys(config.env).length > 0) {
|
||||
lines.push(`[mcp_servers.${name}.env]`);
|
||||
for (const [key, value] of Object.entries(config.env)) {
|
||||
lines.push(`${key} = ${tomlBasicString(value)}`);
|
||||
}
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
fs.writeFileSync(configTomlPath, lines.join('\n'));
|
||||
}
|
||||
|
||||
export function buildCodexProcessEnv(env: NodeJS.ProcessEnv): NodeJS.ProcessEnv {
|
||||
const next: NodeJS.ProcessEnv = {};
|
||||
for (const key of CODEX_ENV_ALLOWLIST) {
|
||||
const value = env[key];
|
||||
if (value !== undefined) next[key] = value;
|
||||
}
|
||||
if (!next.CODEX_HOME) next.CODEX_HOME = next.HOME ? path.join(next.HOME, '.codex') : '/home/node/.codex';
|
||||
if (!next.HOME) next.HOME = '/home/node';
|
||||
return next;
|
||||
}
|
||||
|
||||
export function tomlBasicString(value: string): string {
|
||||
if (value.includes('\n') || value.includes('\r')) {
|
||||
throw new Error(`MCP config value contains newline: ${JSON.stringify(value.slice(0, 40))}`);
|
||||
}
|
||||
return `"${value.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"`;
|
||||
}
|
||||
|
||||
function sendCodexError(server: AppServer, id: number | string, message: string, data?: unknown): void {
|
||||
try {
|
||||
server.process.stdin!.write(JSON.stringify({ id, error: { code: -32000, message, data } }) + '\n');
|
||||
} catch (err) {
|
||||
log(`[send-error] error id=${id}: ${err instanceof Error ? err.message : String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
function isResponse(msg: JsonRpcMessage): msg is JsonRpcResponse {
|
||||
return 'id' in msg && ('result' in msg || 'error' in msg) && !('method' in msg);
|
||||
}
|
||||
|
||||
function isServerRequest(msg: JsonRpcMessage): msg is JsonRpcServerRequest {
|
||||
return 'id' in msg && 'method' in msg;
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
// Structural guard for the Codex CLI install in container/Dockerfile.
|
||||
//
|
||||
// @openai/codex is a CLI *binary* installed via the Dockerfile, not an
|
||||
// importable package, so the barrel-driven registration tests cannot see it.
|
||||
// This test reads the real Dockerfile and asserts the version ARG and the
|
||||
// `pnpm install -g` line for @openai/codex are both present. It goes red if
|
||||
// either Dockerfile edit is dropped or drifts.
|
||||
//
|
||||
// Runs under bun (same suite as the container registration test):
|
||||
// cd container/agent-runner && bun test src/providers/codex-dockerfile.test.ts
|
||||
|
||||
import { readFileSync } from 'fs';
|
||||
import path from 'path';
|
||||
|
||||
import { describe, it, expect } from 'bun:test';
|
||||
|
||||
// container/agent-runner/src/providers/ -> container/Dockerfile
|
||||
const DOCKERFILE = path.join(import.meta.dir, '..', '..', '..', 'Dockerfile');
|
||||
|
||||
describe('container/Dockerfile codex CLI install', () => {
|
||||
const dockerfile = readFileSync(DOCKERFILE, 'utf8');
|
||||
|
||||
it('declares the CODEX_VERSION ARG', () => {
|
||||
expect(dockerfile).toMatch(/ARG\s+CODEX_VERSION=/);
|
||||
});
|
||||
|
||||
it('installs the @openai/codex CLI pinned to that ARG', () => {
|
||||
expect(dockerfile).toMatch(/pnpm install -g\s+"@openai\/codex@\$\{CODEX_VERSION\}"/);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,22 @@
|
||||
/**
|
||||
* Integration test for the codex provider's CONTAINER-side reach-in: the self-registration
|
||||
* import in container/agent-runner/src/providers/index.ts. Importing the barrel runs
|
||||
* codex.ts's top-level registerProvider('codex', …); without that import line
|
||||
* createProvider('codex') throws 'Unknown provider' at runtime.
|
||||
*
|
||||
* Behavior, not structural, and BARREL-ONLY: it imports the real barrel (./index.js),
|
||||
* never ./codex.js directly, then asserts listProviderNames() contains the provider. The
|
||||
* existing codex.factory.test.ts imports ./codex.js directly, so it self-registers and
|
||||
* stays GREEN when the barrel line is deleted — a unit test, not a registration guard.
|
||||
* This goes red if the barrel import is deleted/drifts or the barrel fails to evaluate. codex uses the @openai/codex CLI *binary* (not an importable package), so this test does not guard that dependency — the Dockerfile install line is guarded structurally + by the container build (see the skill validate step).
|
||||
*/
|
||||
import { describe, it, expect } from 'bun:test';
|
||||
|
||||
import { listProviderNames } from './provider-registry.js';
|
||||
import './index.js'; // the real container provider barrel — triggers each provider's registerProvider()
|
||||
|
||||
describe('codex provider registration', () => {
|
||||
it('registers codex via the provider barrel', () => {
|
||||
expect(listProviderNames()).toContain('codex');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,17 @@
|
||||
import { describe, expect, it } from 'bun:test';
|
||||
|
||||
import { CodexProvider } from './codex.js';
|
||||
|
||||
describe('CodexProvider', () => {
|
||||
it('rejects unsupported reasoning effort values', () => {
|
||||
expect(() => new CodexProvider({ effort: 'max' })).toThrow(/Unsupported Codex reasoning effort/);
|
||||
});
|
||||
|
||||
it('normalizes supported reasoning effort values', () => {
|
||||
expect(new CodexProvider({ effort: 'HIGH' })).toBeInstanceOf(CodexProvider);
|
||||
});
|
||||
|
||||
it('accepts supported reasoning effort values', () => {
|
||||
expect(new CodexProvider({ effort: 'xhigh' })).toBeInstanceOf(CodexProvider);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,419 @@
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
|
||||
import { registerProvider } from './provider-registry.js';
|
||||
import type {
|
||||
AgentProvider,
|
||||
AgentQuery,
|
||||
McpServerConfig,
|
||||
ProviderEvent,
|
||||
ProviderExchange,
|
||||
ProviderOptions,
|
||||
QueryInput,
|
||||
} from './types.js';
|
||||
import { archiveProviderExchange } from './exchange-archive.js';
|
||||
import {
|
||||
type AppServer,
|
||||
type CodexReasoningEffort,
|
||||
type JsonRpcNotification,
|
||||
STALE_THREAD_RE,
|
||||
attachCodexAutoApproval,
|
||||
initializeCodexAppServer,
|
||||
interruptCodexTurn,
|
||||
killCodexAppServer,
|
||||
spawnCodexAppServer,
|
||||
startCodexTurn,
|
||||
startOrResumeCodexThread,
|
||||
steerCodexTurn,
|
||||
writeCodexConfigToml,
|
||||
} from './codex-app-server.js';
|
||||
|
||||
const TURN_TIMEOUT_MS = 10 * 60 * 1000;
|
||||
const SUPPORTED_EFFORTS = new Set<CodexReasoningEffort>(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']);
|
||||
|
||||
export interface CodexRuntimeDeps {
|
||||
writeCodexConfigToml: typeof writeCodexConfigToml;
|
||||
spawnCodexAppServer: typeof spawnCodexAppServer;
|
||||
attachCodexAutoApproval: typeof attachCodexAutoApproval;
|
||||
initializeCodexAppServer: typeof initializeCodexAppServer;
|
||||
startOrResumeCodexThread: typeof startOrResumeCodexThread;
|
||||
startCodexTurn: typeof startCodexTurn;
|
||||
steerCodexTurn: typeof steerCodexTurn;
|
||||
interruptCodexTurn: typeof interruptCodexTurn;
|
||||
killCodexAppServer: typeof killCodexAppServer;
|
||||
}
|
||||
|
||||
const defaultCodexRuntimeDeps: CodexRuntimeDeps = {
|
||||
writeCodexConfigToml,
|
||||
spawnCodexAppServer,
|
||||
attachCodexAutoApproval,
|
||||
initializeCodexAppServer,
|
||||
startOrResumeCodexThread,
|
||||
startCodexTurn,
|
||||
steerCodexTurn,
|
||||
interruptCodexTurn,
|
||||
killCodexAppServer,
|
||||
};
|
||||
|
||||
function classifyError(message: string): string | undefined {
|
||||
if (/auth|api key|unauthorized|login|credential/i.test(message)) return 'auth';
|
||||
if (/quota|rate limit|insufficient|billing|credit/i.test(message)) return 'quota';
|
||||
if (/sandbox|permission|denied/i.test(message)) return 'sandbox';
|
||||
if (/thread|conversation|session/i.test(message)) return 'stale-session';
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function normalizeEffort(effort: string | undefined): CodexReasoningEffort | undefined {
|
||||
const normalized = effort?.trim().toLowerCase();
|
||||
if (!normalized) return undefined;
|
||||
if (!SUPPORTED_EFFORTS.has(normalized as CodexReasoningEffort)) {
|
||||
throw new Error(`Unsupported Codex reasoning effort: ${effort}`);
|
||||
}
|
||||
return normalized as CodexReasoningEffort;
|
||||
}
|
||||
|
||||
export class CodexProvider implements AgentProvider {
|
||||
readonly supportsNativeSlashCommands = false;
|
||||
// Codex has no native NanoClaw memory — opt in to the runner's persistent
|
||||
// memory/ scaffold (see memory-scaffold.ts).
|
||||
readonly usesMemoryScaffold = true;
|
||||
// The app-server keeps history server-side; there is no on-disk transcript,
|
||||
// so the provider persists each exchange itself into `conversations/`
|
||||
// (see exchange-archive.ts). The poll-loop reports exchanges through this
|
||||
// hook and does nothing else — archiving is payload code, not runner code.
|
||||
onExchangeComplete(exchange: ProviderExchange): void {
|
||||
archiveProviderExchange({
|
||||
provider: 'codex',
|
||||
prompt: exchange.prompt,
|
||||
result: exchange.result,
|
||||
continuation: exchange.continuation,
|
||||
status: exchange.status,
|
||||
});
|
||||
}
|
||||
|
||||
private readonly mcpServers: Record<string, McpServerConfig>;
|
||||
private readonly model?: string;
|
||||
private readonly effort?: CodexReasoningEffort;
|
||||
private readonly runtime: CodexRuntimeDeps;
|
||||
|
||||
constructor(options: ProviderOptions = {}, runtime: CodexRuntimeDeps = defaultCodexRuntimeDeps) {
|
||||
this.mcpServers = options.mcpServers ?? {};
|
||||
this.model = options.model;
|
||||
this.runtime = runtime;
|
||||
this.effort = normalizeEffort(options.effort);
|
||||
}
|
||||
|
||||
isSessionInvalid(err: unknown): boolean {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
return STALE_THREAD_RE.test(msg);
|
||||
}
|
||||
|
||||
query(input: QueryInput): AgentQuery {
|
||||
const pending: string[] = [input.prompt];
|
||||
let waiting: (() => void) | null = null;
|
||||
let ended = false;
|
||||
let aborted = false;
|
||||
let activeServer: AppServer | null = null;
|
||||
let activeThreadId: string | null = null;
|
||||
let activeTurnId: string | null = null;
|
||||
let wakeActiveTurn: (() => void) | null = null;
|
||||
|
||||
const wake = (): void => {
|
||||
waiting?.();
|
||||
waiting = null;
|
||||
};
|
||||
|
||||
const pushOrSteer = (message: string): void => {
|
||||
if (activeServer && activeThreadId && activeTurnId) {
|
||||
void this.runtime.steerCodexTurn(activeServer, activeThreadId, activeTurnId, message).catch(() => {
|
||||
pending.push(message);
|
||||
wake();
|
||||
});
|
||||
return;
|
||||
}
|
||||
pending.push(message);
|
||||
wake();
|
||||
};
|
||||
|
||||
const self = this;
|
||||
|
||||
async function* gen(): AsyncGenerator<ProviderEvent> {
|
||||
self.runtime.writeCodexConfigToml(self.mcpServers, { model: self.model, effort: self.effort });
|
||||
const server = self.runtime.spawnCodexAppServer();
|
||||
activeServer = server;
|
||||
self.runtime.attachCodexAutoApproval(server);
|
||||
|
||||
let threadId: string | undefined = input.continuation;
|
||||
let initYielded = false;
|
||||
|
||||
try {
|
||||
await self.runtime.initializeCodexAppServer(server);
|
||||
threadId = await self.runtime.startOrResumeCodexThread(server, threadId, {
|
||||
model: self.model,
|
||||
cwd: input.cwd,
|
||||
baseInstructions: input.systemContext?.instructions,
|
||||
});
|
||||
activeThreadId = threadId;
|
||||
|
||||
while (!aborted) {
|
||||
while (pending.length === 0 && !ended && !aborted) {
|
||||
await new Promise<void>((resolve) => {
|
||||
waiting = resolve;
|
||||
});
|
||||
}
|
||||
if (aborted) return;
|
||||
if (pending.length === 0 && ended) return;
|
||||
|
||||
const text = pending.shift()!;
|
||||
yield* runOneTurn(
|
||||
server,
|
||||
threadId,
|
||||
text,
|
||||
self.model,
|
||||
self.effort,
|
||||
input.cwd,
|
||||
(turnId) => {
|
||||
activeTurnId = turnId;
|
||||
},
|
||||
() => {
|
||||
activeTurnId = null;
|
||||
},
|
||||
() => initYielded,
|
||||
() => {
|
||||
initYielded = true;
|
||||
},
|
||||
() => aborted,
|
||||
(waker) => {
|
||||
wakeActiveTurn = waker;
|
||||
},
|
||||
self.runtime.startCodexTurn,
|
||||
);
|
||||
}
|
||||
} finally {
|
||||
activeTurnId = null;
|
||||
activeThreadId = null;
|
||||
activeServer = null;
|
||||
wakeActiveTurn = null;
|
||||
self.runtime.killCodexAppServer(server);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
push: pushOrSteer,
|
||||
end: () => {
|
||||
ended = true;
|
||||
wake();
|
||||
},
|
||||
abort: () => {
|
||||
aborted = true;
|
||||
if (activeServer && activeThreadId && activeTurnId) {
|
||||
void this.runtime.interruptCodexTurn(activeServer, activeThreadId, activeTurnId).catch(() => {});
|
||||
}
|
||||
wakeActiveTurn?.();
|
||||
wake();
|
||||
},
|
||||
events: gen(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async function* runOneTurn(
|
||||
server: AppServer,
|
||||
threadId: string,
|
||||
inputText: string,
|
||||
model: string | undefined,
|
||||
effort: string | undefined,
|
||||
cwd: string,
|
||||
setActiveTurn: (turnId: string) => void,
|
||||
clearActiveTurn: () => void,
|
||||
hasInit: () => boolean,
|
||||
markInit: () => void,
|
||||
isAborted: () => boolean,
|
||||
setAbortWaker: (waker: (() => void) | null) => void,
|
||||
startTurn: typeof startCodexTurn,
|
||||
): AsyncGenerator<ProviderEvent> {
|
||||
const state: { error: Error | null } = { error: null };
|
||||
let resultText = '';
|
||||
let turnDone = false;
|
||||
let turnId: string | null = null;
|
||||
|
||||
// A finished turn can no longer absorb steered input: codex's turn/steer
|
||||
// against a completed turn resolves as a no-op, so a follow-up routed there
|
||||
// is lost silently. Clear the active-turn marker the moment the turn ends —
|
||||
// before the generator drains and tears down in its `finally` — so
|
||||
// pushOrSteer queues any racing follow-up into a fresh turn instead.
|
||||
const finishTurn = (): void => {
|
||||
turnDone = true;
|
||||
clearActiveTurn();
|
||||
};
|
||||
|
||||
const buffer: ProviderEvent[] = [];
|
||||
let waker: (() => void) | null = null;
|
||||
const kick = (): void => {
|
||||
waker?.();
|
||||
waker = null;
|
||||
};
|
||||
setAbortWaker(kick);
|
||||
|
||||
const handler = (n: JsonRpcNotification): void => {
|
||||
const method = n.method;
|
||||
const params = n.params ?? {};
|
||||
buffer.push({ type: 'activity' });
|
||||
|
||||
switch (method) {
|
||||
case 'thread/started': {
|
||||
const thread = params.thread as { id?: string } | undefined;
|
||||
if (thread?.id && !hasInit()) {
|
||||
markInit();
|
||||
buffer.push({ type: 'init', continuation: thread.id });
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'turn/started': {
|
||||
const turn = params.turn as { id?: string } | undefined;
|
||||
if (turn?.id) {
|
||||
turnId = turn.id;
|
||||
setActiveTurn(turn.id);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'item/agentMessage/delta': {
|
||||
const delta = params.delta as string | undefined;
|
||||
if (delta) resultText += delta;
|
||||
break;
|
||||
}
|
||||
case 'item/completed': {
|
||||
const item = params.item as { type?: string; text?: string } | undefined;
|
||||
if (item?.type === 'agentMessage' && item.text) resultText = item.text;
|
||||
break;
|
||||
}
|
||||
case 'thread/status/changed': {
|
||||
const status = params.status as string | undefined;
|
||||
if (status) buffer.push({ type: 'progress', message: `status: ${status}` });
|
||||
break;
|
||||
}
|
||||
case 'error': {
|
||||
const err = params.error as { message?: string; additionalDetails?: string | null } | undefined;
|
||||
const msg = [err?.message, err?.additionalDetails].filter(Boolean).join(': ') || 'Codex turn failed';
|
||||
state.error = new Error(msg);
|
||||
finishTurn();
|
||||
break;
|
||||
}
|
||||
case 'turn/completed': {
|
||||
const turn = params.turn as
|
||||
| { error?: { message?: string; additionalDetails?: string | null } | null; items?: unknown[] }
|
||||
| undefined;
|
||||
const agentMessage = turn?.items
|
||||
?.filter((item): item is { type: string; text?: string } => typeof item === 'object' && item !== null)
|
||||
.find((item) => item.type === 'agentMessage' && item.text);
|
||||
if (agentMessage?.text) resultText = agentMessage.text;
|
||||
if (turn?.error) {
|
||||
const msg =
|
||||
[turn.error.message, turn.error.additionalDetails].filter(Boolean).join(': ') || 'Codex turn failed';
|
||||
state.error = new Error(msg);
|
||||
}
|
||||
finishTurn();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
kick();
|
||||
};
|
||||
|
||||
server.notificationHandlers.push(handler);
|
||||
|
||||
// A dead app-server can't send the notification this turn is parked on —
|
||||
// end the turn immediately with the real cause instead of the 10-min timeout.
|
||||
const onServerExit = (err: Error): void => {
|
||||
if (turnDone) return;
|
||||
state.error = err;
|
||||
finishTurn();
|
||||
kick();
|
||||
};
|
||||
server.exitHandlers.push(onServerExit);
|
||||
|
||||
const timer = setTimeout(() => {
|
||||
state.error = new Error(`Turn timed out after ${TURN_TIMEOUT_MS}ms`);
|
||||
finishTurn();
|
||||
kick();
|
||||
}, TURN_TIMEOUT_MS);
|
||||
|
||||
try {
|
||||
if (!hasInit()) {
|
||||
markInit();
|
||||
buffer.push({ type: 'init', continuation: threadId });
|
||||
}
|
||||
|
||||
turnId = await startTurn(server, {
|
||||
threadId,
|
||||
inputText,
|
||||
model,
|
||||
effort,
|
||||
cwd,
|
||||
});
|
||||
setActiveTurn(turnId);
|
||||
const imagesBefore = listGeneratedImages(threadId);
|
||||
if (isAborted()) return;
|
||||
|
||||
while (true) {
|
||||
while (buffer.length > 0) {
|
||||
yield buffer.shift()!;
|
||||
}
|
||||
if (turnDone || isAborted()) break;
|
||||
await new Promise<void>((resolve) => {
|
||||
waker = resolve;
|
||||
});
|
||||
waker = null;
|
||||
}
|
||||
|
||||
while (buffer.length > 0) yield buffer.shift()!;
|
||||
|
||||
if (isAborted()) return;
|
||||
|
||||
if (state.error) {
|
||||
yield {
|
||||
type: 'error',
|
||||
message: state.error.message,
|
||||
retryable: false,
|
||||
classification: classifyError(state.error.message),
|
||||
};
|
||||
throw state.error;
|
||||
}
|
||||
|
||||
for (const imagePath of listGeneratedImages(threadId)) {
|
||||
if (!imagesBefore.has(imagePath)) {
|
||||
yield { type: 'file', path: imagePath };
|
||||
}
|
||||
}
|
||||
|
||||
yield { type: 'result', text: resultText || null };
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
clearActiveTurn();
|
||||
setAbortWaker(null);
|
||||
const idx = server.notificationHandlers.indexOf(handler);
|
||||
if (idx >= 0) server.notificationHandlers.splice(idx, 1);
|
||||
const exitIdx = server.exitHandlers.indexOf(onServerExit);
|
||||
if (exitIdx >= 0) server.exitHandlers.splice(exitIdx, 1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Codex's built-in image generation saves into CODEX_HOME/generated_images/
|
||||
* <threadId>/ — its native client renders those to the user, so the model
|
||||
* believes delivery already happened and won't send_file them. The runner
|
||||
* must deliver them itself: snapshot the dir at turn start, emit a `file`
|
||||
* event for anything new at turn end.
|
||||
*/
|
||||
function listGeneratedImages(threadId: string): Set<string> {
|
||||
const dir = path.join(process.env.CODEX_HOME || '/home/node/.codex', 'generated_images', threadId);
|
||||
try {
|
||||
return new Set(fs.readdirSync(dir).map((f) => path.join(dir, f)));
|
||||
} catch {
|
||||
return new Set();
|
||||
}
|
||||
}
|
||||
|
||||
registerProvider('codex', (opts) => new CodexProvider(opts));
|
||||
@@ -0,0 +1,267 @@
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
|
||||
import fs from 'fs';
|
||||
import os from 'os';
|
||||
import path from 'path';
|
||||
|
||||
import { CodexProvider, type CodexRuntimeDeps } from './codex.js';
|
||||
import type { AppServer, JsonRpcNotification, TurnParams } from './codex-app-server.js';
|
||||
import type { ProviderEvent } from './types.js';
|
||||
|
||||
describe('CodexProvider active turns', () => {
|
||||
it('steers follow-ups into the active turn and yields liveness activity', async () => {
|
||||
const fake = createFakeCodexRuntime();
|
||||
const provider = new CodexProvider({}, fake.runtime);
|
||||
const query = provider.query({ prompt: 'first prompt', cwd: '/workspace/agent' });
|
||||
const events: ProviderEvent[] = [];
|
||||
|
||||
const collect = collectEvents(query.events, events);
|
||||
|
||||
await waitFor(() => fake.startCalls.length === 1);
|
||||
query.push('follow-up prompt');
|
||||
await waitFor(() => fake.steerCalls.length === 1);
|
||||
query.end();
|
||||
fake.completeTurn('final answer');
|
||||
|
||||
await collect;
|
||||
|
||||
expect(fake.startCalls).toHaveLength(1);
|
||||
expect(fake.startCalls[0].inputText).toBe('first prompt');
|
||||
expect(fake.steerCalls).toEqual([{ threadId: 'thread-1', turnId: 'turn-1', inputText: 'follow-up prompt' }]);
|
||||
expect(events.filter((event) => event.type === 'activity').length).toBeGreaterThanOrEqual(2);
|
||||
expect(events.filter((event) => event.type === 'result')).toEqual([{ type: 'result', text: 'final answer' }]);
|
||||
expect(fake.killed).toBe(true);
|
||||
});
|
||||
|
||||
it('queues follow-ups for the next turn when steering is rejected', async () => {
|
||||
const fake = createFakeCodexRuntime({ rejectSteer: true });
|
||||
const provider = new CodexProvider({}, fake.runtime);
|
||||
const query = provider.query({ prompt: 'first prompt', cwd: '/workspace/agent' });
|
||||
const events: ProviderEvent[] = [];
|
||||
|
||||
const collect = collectEvents(query.events, events);
|
||||
|
||||
await waitFor(() => fake.startCalls.length === 1);
|
||||
query.push('queued follow-up');
|
||||
await waitFor(() => fake.steerCalls.length === 1);
|
||||
await sleep(0);
|
||||
|
||||
fake.completeTurn('first answer');
|
||||
await waitFor(() => fake.startCalls.length === 2);
|
||||
query.end();
|
||||
fake.completeTurn('second answer');
|
||||
|
||||
await collect;
|
||||
|
||||
expect(fake.startCalls.map((call) => call.inputText)).toEqual(['first prompt', 'queued follow-up']);
|
||||
expect(fake.steerCalls).toHaveLength(1);
|
||||
expect(events.filter((event) => event.type === 'result')).toEqual([
|
||||
{ type: 'result', text: 'first answer' },
|
||||
{ type: 'result', text: 'second answer' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('queues a follow-up that races turn completion into a new turn, never steering the finished turn', async () => {
|
||||
const fake = createFakeCodexRuntime();
|
||||
const provider = new CodexProvider({}, fake.runtime);
|
||||
const query = provider.query({ prompt: 'first prompt', cwd: '/workspace/agent' });
|
||||
const events: ProviderEvent[] = [];
|
||||
|
||||
const collect = collectEvents(query.events, events);
|
||||
|
||||
await waitFor(() => fake.startCalls.length === 1);
|
||||
|
||||
// The turn completes, then a follow-up lands in the same tick — before the
|
||||
// generator has drained and torn the turn down. codex's turn/steer no-ops
|
||||
// on a finished turn (resolves without error), so steering here would drop
|
||||
// the message silently. It must start a fresh turn instead.
|
||||
fake.completeTurn('first answer');
|
||||
query.push('racing follow-up');
|
||||
|
||||
await waitFor(() => fake.startCalls.length === 2);
|
||||
query.end();
|
||||
fake.completeTurn('second answer');
|
||||
|
||||
await collect;
|
||||
|
||||
expect(fake.steerCalls).toHaveLength(0);
|
||||
expect(fake.startCalls.map((call) => call.inputText)).toEqual(['first prompt', 'racing follow-up']);
|
||||
expect(events.filter((event) => event.type === 'result')).toEqual([
|
||||
{ type: 'result', text: 'first answer' },
|
||||
{ type: 'result', text: 'second answer' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('interrupts the active turn and closes the stream on abort', async () => {
|
||||
const fake = createFakeCodexRuntime();
|
||||
const provider = new CodexProvider({}, fake.runtime);
|
||||
const query = provider.query({ prompt: 'first prompt', cwd: '/workspace/agent' });
|
||||
const events: ProviderEvent[] = [];
|
||||
|
||||
const collect = collectEvents(query.events, events);
|
||||
|
||||
await waitFor(() => fake.startCalls.length === 1);
|
||||
query.abort();
|
||||
|
||||
await collect;
|
||||
|
||||
expect(fake.interruptCalls).toEqual([{ threadId: 'thread-1', turnId: 'turn-1' }]);
|
||||
expect(events.some((event) => event.type === 'result')).toBe(false);
|
||||
expect(fake.killed).toBe(true);
|
||||
});
|
||||
|
||||
it('threads the configured model and effort into the turn', async () => {
|
||||
const fake = createFakeCodexRuntime();
|
||||
const provider = new CodexProvider({ model: 'gpt-5.5', effort: 'high' }, fake.runtime);
|
||||
const query = provider.query({ prompt: 'first prompt', cwd: '/workspace/agent' });
|
||||
const events: ProviderEvent[] = [];
|
||||
|
||||
const collect = collectEvents(query.events, events);
|
||||
|
||||
await waitFor(() => fake.startCalls.length === 1);
|
||||
query.end();
|
||||
fake.completeTurn('final answer');
|
||||
|
||||
await collect;
|
||||
|
||||
expect(fake.startCalls[0].model).toBe('gpt-5.5');
|
||||
expect(fake.startCalls[0].effort).toBe('high');
|
||||
expect(events.filter((event) => event.type === 'result')).toEqual([{ type: 'result', text: 'final answer' }]);
|
||||
});
|
||||
|
||||
it('delivers harness-generated images as file events — the model never sends them itself', async () => {
|
||||
const codexHome = fs.mkdtempSync(path.join(os.tmpdir(), 'codex-home-'));
|
||||
const prevHome = process.env.CODEX_HOME;
|
||||
process.env.CODEX_HOME = codexHome;
|
||||
try {
|
||||
const fake = createFakeCodexRuntime();
|
||||
const provider = new CodexProvider({}, fake.runtime);
|
||||
const query = provider.query({ prompt: 'make an image', cwd: '/workspace/agent' });
|
||||
const events: ProviderEvent[] = [];
|
||||
const collect = collectEvents(query.events, events);
|
||||
|
||||
await waitFor(() => fake.startCalls.length === 1);
|
||||
// Codex's built-in image_gen writes into CODEX_HOME mid-turn.
|
||||
const imagesDir = path.join(codexHome, 'generated_images', 'thread-1');
|
||||
fs.mkdirSync(imagesDir, { recursive: true });
|
||||
fs.writeFileSync(path.join(imagesDir, 'ig_abc.png'), 'png-bytes');
|
||||
|
||||
query.end();
|
||||
fake.completeTurn('Here you go — created the image.');
|
||||
await collect;
|
||||
|
||||
const files = events.filter((event) => event.type === 'file') as Array<{ type: 'file'; path: string }>;
|
||||
expect(files).toHaveLength(1);
|
||||
expect(files[0].path).toBe(path.join(imagesDir, 'ig_abc.png'));
|
||||
// file events arrive before the result so delivery shares the turn.
|
||||
expect(events.findIndex((e) => e.type === 'file')).toBeLessThan(events.findIndex((e) => e.type === 'result'));
|
||||
} finally {
|
||||
if (prevHome === undefined) delete process.env.CODEX_HOME;
|
||||
else process.env.CODEX_HOME = prevHome;
|
||||
fs.rmSync(codexHome, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('ends the turn immediately with the real cause when the app-server dies mid-turn', async () => {
|
||||
const fake = createFakeCodexRuntime();
|
||||
const provider = new CodexProvider({}, fake.runtime);
|
||||
const query = provider.query({ prompt: 'prompt', cwd: '/workspace/agent' });
|
||||
const events: ProviderEvent[] = [];
|
||||
|
||||
const collect = collectEvents(query.events, events);
|
||||
await waitFor(() => fake.startCalls.length === 1);
|
||||
|
||||
// No pending request exists mid-turn (turn/start already resolved), so
|
||||
// only the exitHandlers seam can end the turn — without it this parks
|
||||
// on the waker until the 10-minute turn timeout.
|
||||
fake.crashServer(new Error('Codex app-server exited: code=1 signal=null'));
|
||||
|
||||
// The generator yields the error event, then rethrows to its consumer.
|
||||
await collect.catch(() => {});
|
||||
|
||||
const errors = events.filter((event) => event.type === 'error');
|
||||
expect(errors).toHaveLength(1);
|
||||
expect((errors[0] as { message: string }).message).toContain('app-server exited');
|
||||
});
|
||||
});
|
||||
|
||||
function createFakeCodexRuntime(opts: { rejectSteer?: boolean } = {}) {
|
||||
const server = fakeServer();
|
||||
const startCalls: TurnParams[] = [];
|
||||
const steerCalls: Array<{ threadId: string; turnId: string; inputText: string }> = [];
|
||||
const interruptCalls: Array<{ threadId: string; turnId: string }> = [];
|
||||
let killed = false;
|
||||
|
||||
const notify = (method: string, params?: Record<string, unknown>): void => {
|
||||
const notification: JsonRpcNotification = { method, params };
|
||||
for (const handler of [...server.notificationHandlers]) handler(notification);
|
||||
};
|
||||
|
||||
const runtime: CodexRuntimeDeps = {
|
||||
writeCodexConfigToml: () => {},
|
||||
spawnCodexAppServer: () => server,
|
||||
attachCodexAutoApproval: () => {},
|
||||
initializeCodexAppServer: async () => {},
|
||||
startOrResumeCodexThread: async (_server, threadId) => threadId ?? 'thread-1',
|
||||
startCodexTurn: async (_server, params) => {
|
||||
startCalls.push(params);
|
||||
const turnId = `turn-${startCalls.length}`;
|
||||
notify('turn/started', { turn: { id: turnId } });
|
||||
return turnId;
|
||||
},
|
||||
steerCodexTurn: async (_server, threadId, turnId, inputText) => {
|
||||
steerCalls.push({ threadId, turnId, inputText });
|
||||
if (opts.rejectSteer) throw new Error('steer rejected');
|
||||
},
|
||||
interruptCodexTurn: async (_server, threadId, turnId) => {
|
||||
interruptCalls.push({ threadId, turnId });
|
||||
},
|
||||
killCodexAppServer: () => {
|
||||
killed = true;
|
||||
},
|
||||
};
|
||||
|
||||
return {
|
||||
runtime,
|
||||
startCalls,
|
||||
steerCalls,
|
||||
interruptCalls,
|
||||
get killed() {
|
||||
return killed;
|
||||
},
|
||||
completeTurn(text: string) {
|
||||
notify('turn/completed', { turn: { items: [{ type: 'agentMessage', text }] } });
|
||||
},
|
||||
crashServer(err: Error) {
|
||||
for (const h of [...server.exitHandlers]) h(err);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function fakeServer(): AppServer {
|
||||
return {
|
||||
process: { stdin: { write: () => true }, kill: () => true },
|
||||
readline: { close: () => {} },
|
||||
pending: new Map(),
|
||||
notificationHandlers: [],
|
||||
exitHandlers: [],
|
||||
serverRequestHandlers: [],
|
||||
} as unknown as AppServer;
|
||||
}
|
||||
|
||||
async function collectEvents(events: AsyncIterable<ProviderEvent>, sink: ProviderEvent[]): Promise<void> {
|
||||
for await (const event of events) {
|
||||
sink.push(event);
|
||||
}
|
||||
}
|
||||
|
||||
async function waitFor(condition: () => boolean, timeoutMs = 1000): Promise<void> {
|
||||
const start = Date.now();
|
||||
while (!condition()) {
|
||||
if (Date.now() - start > timeoutMs) throw new Error('waitFor timeout');
|
||||
await sleep(10);
|
||||
}
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
import { afterEach, describe, expect, it } from 'bun:test';
|
||||
import fs from 'fs';
|
||||
import os from 'os';
|
||||
import path from 'path';
|
||||
|
||||
import { archiveProviderExchange } from './exchange-archive.js';
|
||||
|
||||
let tmpDir: string | null = null;
|
||||
|
||||
afterEach(() => {
|
||||
if (tmpDir) {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
tmpDir = null;
|
||||
}
|
||||
});
|
||||
|
||||
function makeTmpDir(): string {
|
||||
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'nanoclaw-archive-'));
|
||||
return tmpDir;
|
||||
}
|
||||
|
||||
describe('provider exchange archive', () => {
|
||||
it('writes unique exchange-level archives with provider metadata', () => {
|
||||
const conversationsDir = makeTmpDir();
|
||||
const timestamp = new Date('2026-06-03T12:34:56.789Z');
|
||||
|
||||
const first = archiveProviderExchange({
|
||||
conversationsDir,
|
||||
provider: 'codex',
|
||||
prompt: 'hello',
|
||||
result: 'world',
|
||||
continuation: 'thread-123',
|
||||
status: 'completed',
|
||||
timestamp,
|
||||
});
|
||||
const second = archiveProviderExchange({
|
||||
conversationsDir,
|
||||
provider: 'codex',
|
||||
prompt: 'hello again',
|
||||
result: 'world again',
|
||||
continuation: 'thread-123',
|
||||
status: 'completed',
|
||||
timestamp,
|
||||
});
|
||||
|
||||
expect(first).not.toBeNull();
|
||||
expect(second).not.toBeNull();
|
||||
expect(first).not.toBe(second);
|
||||
|
||||
const content = fs.readFileSync(path.join(conversationsDir, first!), 'utf-8');
|
||||
expect(content).toContain('# Codex Exchange');
|
||||
expect(content).toContain('Provider: codex');
|
||||
expect(content).toContain('Continuation/thread id: thread-123');
|
||||
expect(content).toContain('Status: completed');
|
||||
expect(content).toContain('**User**: hello');
|
||||
expect(content).toContain('**Assistant**: world');
|
||||
});
|
||||
|
||||
it('skips empty result text', () => {
|
||||
const conversationsDir = makeTmpDir();
|
||||
const filename = archiveProviderExchange({
|
||||
conversationsDir,
|
||||
provider: 'codex',
|
||||
prompt: 'hello',
|
||||
result: ' ',
|
||||
continuation: 'thread-123',
|
||||
status: 'completed',
|
||||
});
|
||||
|
||||
expect(filename).toBeNull();
|
||||
expect(fs.readdirSync(conversationsDir)).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,88 @@
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
|
||||
/**
|
||||
* Per-exchange markdown archive for providers with no on-disk transcript —
|
||||
* payload code, shipped with the provider that needs it. The provider's
|
||||
* `onExchangeComplete` hook (see types.ts) calls this with each completed
|
||||
* exchange; the runner never archives on a provider's behalf.
|
||||
*/
|
||||
|
||||
const DEFAULT_CONVERSATIONS_DIR = '/workspace/agent/conversations';
|
||||
|
||||
export interface ProviderExchangeArchiveOptions {
|
||||
provider: string;
|
||||
prompt: string;
|
||||
result: string | null | undefined;
|
||||
continuation?: string;
|
||||
status: string;
|
||||
timestamp?: Date;
|
||||
conversationsDir?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Archive a single prompt/result exchange. Returns the written filename, or
|
||||
* null when there is nothing to archive (empty result).
|
||||
*/
|
||||
export function archiveProviderExchange(options: ProviderExchangeArchiveOptions): string | null {
|
||||
const result = options.result?.trim();
|
||||
if (!result) return null;
|
||||
|
||||
const timestamp = options.timestamp ?? new Date();
|
||||
const conversationsDir =
|
||||
options.conversationsDir || process.env.NANOCLAW_CONVERSATIONS_DIR || DEFAULT_CONVERSATIONS_DIR;
|
||||
fs.mkdirSync(conversationsDir, { recursive: true });
|
||||
|
||||
const filename = uniqueArchiveFilename(conversationsDir, options.provider, options.continuation, timestamp);
|
||||
const lines = [
|
||||
`# ${titleCase(options.provider)} Exchange`,
|
||||
'',
|
||||
`Archived: ${timestamp.toISOString()}`,
|
||||
`Provider: ${options.provider}`,
|
||||
`Continuation/thread id: ${options.continuation || '(none)'}`,
|
||||
`Status: ${options.status}`,
|
||||
'',
|
||||
'---',
|
||||
'',
|
||||
`**User**: ${truncate(options.prompt)}`,
|
||||
'',
|
||||
`**Assistant**: ${truncate(result)}`,
|
||||
'',
|
||||
];
|
||||
fs.writeFileSync(path.join(conversationsDir, filename), lines.join('\n'));
|
||||
return filename;
|
||||
}
|
||||
|
||||
function uniqueArchiveFilename(
|
||||
dir: string,
|
||||
provider: string,
|
||||
continuation: string | undefined,
|
||||
timestamp: Date,
|
||||
): string {
|
||||
const date = timestamp.toISOString().split('T')[0];
|
||||
const time = timestamp.toISOString().replace(/[-:.TZ]/g, '').slice(8, 17);
|
||||
const thread = sanitizeSlug(continuation || 'no-thread').slice(0, 24) || 'no-thread';
|
||||
const base = `${date}-${sanitizeSlug(provider)}-${time}-${thread}`;
|
||||
let filename = `${base}.md`;
|
||||
let counter = 2;
|
||||
while (fs.existsSync(path.join(dir, filename))) {
|
||||
filename = `${base}-${counter}.md`;
|
||||
counter += 1;
|
||||
}
|
||||
return filename;
|
||||
}
|
||||
|
||||
function sanitizeSlug(value: string): string {
|
||||
return value
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/^-+|-+$/g, '');
|
||||
}
|
||||
|
||||
function titleCase(value: string): string {
|
||||
return value ? value[0].toUpperCase() + value.slice(1) : 'Provider';
|
||||
}
|
||||
|
||||
function truncate(value: string): string {
|
||||
return value.length > 2000 ? value.slice(0, 2000) + '...' : value;
|
||||
}
|
||||
@@ -2,6 +2,7 @@ import { describe, it, expect } from 'bun:test';
|
||||
|
||||
import { createProvider, type ProviderName } from './factory.js';
|
||||
import { ClaudeProvider } from './claude.js';
|
||||
import { CodexProvider } from './codex.js';
|
||||
import { MockProvider } from './mock.js';
|
||||
|
||||
describe('createProvider', () => {
|
||||
@@ -9,6 +10,10 @@ describe('createProvider', () => {
|
||||
expect(createProvider('claude')).toBeInstanceOf(ClaudeProvider);
|
||||
});
|
||||
|
||||
it('returns CodexProvider for codex', () => {
|
||||
expect(createProvider('codex')).toBeInstanceOf(CodexProvider);
|
||||
});
|
||||
|
||||
it('returns MockProvider for mock', () => {
|
||||
expect(createProvider('mock')).toBeInstanceOf(MockProvider);
|
||||
});
|
||||
|
||||
@@ -3,4 +3,6 @@
|
||||
// level. Skills add a new provider by appending one import line below.
|
||||
|
||||
import './claude.js';
|
||||
import './codex.js';
|
||||
import './mock.js';
|
||||
import './opencode.js';
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
import { describe, it, expect } from 'bun:test';
|
||||
|
||||
import { mcpServersToOpenCodeConfig } from './mcp-to-opencode.js';
|
||||
|
||||
describe('mcpServersToOpenCodeConfig', () => {
|
||||
it('maps nanoclaw + extra server like v2 index.ts merge', () => {
|
||||
const servers = {
|
||||
nanoclaw: {
|
||||
command: 'node',
|
||||
args: ['/app/src/mcp-tools/index.js'],
|
||||
env: {
|
||||
SESSION_INBOUND_DB_PATH: '/workspace/inbound.db',
|
||||
SESSION_OUTBOUND_DB_PATH: '/workspace/outbound.db',
|
||||
SESSION_HEARTBEAT_PATH: '/workspace/.heartbeat',
|
||||
},
|
||||
},
|
||||
extra: {
|
||||
command: 'npx',
|
||||
args: ['-y', 'some-mcp'],
|
||||
env: { FOO: 'bar' },
|
||||
},
|
||||
};
|
||||
|
||||
const mcp = mcpServersToOpenCodeConfig(servers);
|
||||
|
||||
expect(mcp.nanoclaw).toEqual({
|
||||
type: 'local',
|
||||
command: ['node', '/app/src/mcp-tools/index.js'],
|
||||
environment: {
|
||||
SESSION_INBOUND_DB_PATH: '/workspace/inbound.db',
|
||||
SESSION_OUTBOUND_DB_PATH: '/workspace/outbound.db',
|
||||
SESSION_HEARTBEAT_PATH: '/workspace/.heartbeat',
|
||||
},
|
||||
enabled: true,
|
||||
});
|
||||
|
||||
expect(mcp.extra).toEqual({
|
||||
type: 'local',
|
||||
command: ['npx', '-y', 'some-mcp'],
|
||||
environment: { FOO: 'bar' },
|
||||
enabled: true,
|
||||
});
|
||||
});
|
||||
|
||||
it('omits environment when env is empty', () => {
|
||||
const mcp = mcpServersToOpenCodeConfig({
|
||||
x: { command: 'true', args: [], env: {} },
|
||||
});
|
||||
expect(mcp.x).toEqual({
|
||||
type: 'local',
|
||||
command: ['true'],
|
||||
enabled: true,
|
||||
});
|
||||
});
|
||||
|
||||
it('returns empty record for undefined', () => {
|
||||
expect(mcpServersToOpenCodeConfig(undefined)).toEqual({});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,39 @@
|
||||
import type { McpServerConfig } from './types.js';
|
||||
|
||||
/** OpenCode `mcp` entry shape (local stdio server). */
|
||||
export type OpenCodeMcpLocal = {
|
||||
type: 'local';
|
||||
command: string[];
|
||||
environment?: Record<string, string>;
|
||||
enabled: true;
|
||||
};
|
||||
|
||||
/** OpenCode `mcp` entry shape (remote HTTP server). */
|
||||
export type OpenCodeMcpRemote = {
|
||||
type: 'remote';
|
||||
url: string;
|
||||
headers?: Record<string, string>;
|
||||
enabled: true;
|
||||
};
|
||||
|
||||
export type OpenCodeMcpEntry = OpenCodeMcpLocal | OpenCodeMcpRemote;
|
||||
|
||||
/**
|
||||
* Map NanoClaw v2 MCP definitions (same shape as Claude Agent SDK) into
|
||||
* OpenCode config `mcp` field. Stdio-only until `McpServerConfig` gains remote.
|
||||
*/
|
||||
export function mcpServersToOpenCodeConfig(
|
||||
servers: Record<string, McpServerConfig> | undefined,
|
||||
): Record<string, OpenCodeMcpEntry> {
|
||||
const out: Record<string, OpenCodeMcpEntry> = {};
|
||||
if (!servers) return out;
|
||||
for (const [name, cfg] of Object.entries(servers)) {
|
||||
out[name] = {
|
||||
type: 'local',
|
||||
command: [cfg.command, ...cfg.args],
|
||||
...(Object.keys(cfg.env).length > 0 ? { environment: cfg.env } : {}),
|
||||
enabled: true,
|
||||
};
|
||||
}
|
||||
return out;
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
/**
|
||||
* Integration test for the opencode provider's CONTAINER-side reach-in: the self-registration
|
||||
* import in container/agent-runner/src/providers/index.ts. Importing the barrel runs
|
||||
* opencode.ts's top-level registerProvider('opencode', …); without that import line
|
||||
* createProvider('opencode') throws 'Unknown provider' at runtime.
|
||||
*
|
||||
* Behavior, not structural, and BARREL-ONLY: it imports the real barrel (./index.js),
|
||||
* never ./opencode.js directly, then asserts listProviderNames() contains the provider. The
|
||||
* existing opencode.factory.test.ts imports ./opencode.js directly, so it self-registers and
|
||||
* stays GREEN when the barrel line is deleted — a unit test, not a registration guard.
|
||||
* This goes red if the barrel import is deleted/drifts or the barrel fails to evaluate, or if @opencode-ai/sdk is not installed (the unmocked barrel import throws) — so it also implicitly guards that dependency.
|
||||
*/
|
||||
import { describe, it, expect } from 'bun:test';
|
||||
|
||||
import { listProviderNames } from './provider-registry.js';
|
||||
import './index.js'; // the real container provider barrel — triggers each provider's registerProvider()
|
||||
|
||||
describe('opencode provider registration', () => {
|
||||
it('registers opencode via the provider barrel', () => {
|
||||
expect(listProviderNames()).toContain('opencode');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,10 @@
|
||||
import { describe, it, expect } from 'bun:test';
|
||||
|
||||
import { createProvider } from './factory.js';
|
||||
import { OpenCodeProvider } from './opencode.js';
|
||||
|
||||
describe('createProvider (opencode)', () => {
|
||||
it('returns OpenCodeProvider for opencode', () => {
|
||||
expect(createProvider('opencode')).toBeInstanceOf(OpenCodeProvider);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,423 @@
|
||||
import { spawn, type ChildProcess } from 'child_process';
|
||||
|
||||
import { createOpencodeClient, type OpencodeClient } from '@opencode-ai/sdk';
|
||||
|
||||
import { registerProvider } from './provider-registry.js';
|
||||
import type { AgentProvider, AgentQuery, ProviderEvent, ProviderOptions, QueryInput } from './types.js';
|
||||
import { mcpServersToOpenCodeConfig } from './mcp-to-opencode.js';
|
||||
|
||||
function log(msg: string): void {
|
||||
console.error(`[opencode-provider] ${msg}`);
|
||||
}
|
||||
|
||||
const SESSION_STATUS_RETRY_ERROR_AFTER = 3;
|
||||
|
||||
/** Stale / dead OpenCode session heuristics (complement Claude-centric host patterns). */
|
||||
const STALE_SESSION_RE =
|
||||
/no conversation found|ENOENT.*\.jsonl|session.*not found|NotFoundError|connection reset|ECONNRESET|404|event timeout/i;
|
||||
|
||||
function killProcessTree(proc: ChildProcess): void {
|
||||
if (!proc.pid) return;
|
||||
try {
|
||||
process.kill(-proc.pid, 'SIGKILL');
|
||||
} catch {
|
||||
try {
|
||||
proc.kill('SIGKILL');
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function spawnOpencodeServer(config: Record<string, unknown>, timeoutMs = 10_000): Promise<{ url: string; proc: ChildProcess }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const hostname = '127.0.0.1';
|
||||
const port = 4096;
|
||||
const proc = spawn('opencode', ['serve', `--hostname=${hostname}`, `--port=${port}`], {
|
||||
env: {
|
||||
...process.env,
|
||||
OPENCODE_CONFIG_CONTENT: JSON.stringify(config),
|
||||
},
|
||||
detached: true,
|
||||
});
|
||||
|
||||
const id = setTimeout(() => {
|
||||
killProcessTree(proc);
|
||||
reject(new Error(`Timeout waiting for OpenCode server to start after ${timeoutMs}ms`));
|
||||
}, timeoutMs);
|
||||
|
||||
let output = '';
|
||||
proc.stdout?.on('data', (chunk: Buffer) => {
|
||||
output += chunk.toString();
|
||||
for (const line of output.split('\n')) {
|
||||
if (line.startsWith('opencode server listening')) {
|
||||
const match = line.match(/on\s+(https?:\/\/[^\s]+)/);
|
||||
if (match) {
|
||||
clearTimeout(id);
|
||||
resolve({ url: match[1], proc });
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
proc.stderr?.on('data', (chunk: Buffer) => {
|
||||
output += chunk.toString();
|
||||
});
|
||||
proc.on('exit', (code) => {
|
||||
clearTimeout(id);
|
||||
let msg = `OpenCode server exited with code ${code}`;
|
||||
if (output.trim()) msg += `\nServer output: ${output}`;
|
||||
reject(new Error(msg));
|
||||
});
|
||||
proc.on('error', (err) => {
|
||||
clearTimeout(id);
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function wrapPromptWithContext(text: string, systemInstructions?: string): string {
|
||||
let out = text;
|
||||
if (systemInstructions) {
|
||||
out = `<system>\n${systemInstructions}\n</system>\n\n${out}`;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function buildOpenCodeConfig(options: ProviderOptions): Record<string, unknown> {
|
||||
const provider = process.env.OPENCODE_PROVIDER || 'anthropic';
|
||||
const model = process.env.OPENCODE_MODEL;
|
||||
const smallModel = process.env.OPENCODE_SMALL_MODEL;
|
||||
const proxyUrl = process.env.ANTHROPIC_BASE_URL;
|
||||
|
||||
const providerModelId = model ? model.replace(new RegExp(`^${provider}/`), '') : undefined;
|
||||
const providerSmallModelId = smallModel ? smallModel.replace(new RegExp(`^${provider}/`), '') : undefined;
|
||||
const modelsToRegister = [providerModelId, providerSmallModelId]
|
||||
.filter(Boolean)
|
||||
.filter((mid, i, a) => a.indexOf(mid as string) === i);
|
||||
|
||||
const providerOptions: Record<string, unknown> =
|
||||
provider === 'anthropic'
|
||||
? {}
|
||||
: {
|
||||
[provider]: {
|
||||
options: { apiKey: 'placeholder', baseURL: proxyUrl },
|
||||
...(modelsToRegister.length > 0
|
||||
? {
|
||||
models: Object.fromEntries(
|
||||
modelsToRegister.map((mid) => [mid, { id: mid, name: mid, tool_call: true }]),
|
||||
),
|
||||
}
|
||||
: {}),
|
||||
},
|
||||
};
|
||||
|
||||
const mcp = mcpServersToOpenCodeConfig(options.mcpServers);
|
||||
|
||||
// Load shared base + per-group fragments + per-group memory through OpenCode's
|
||||
// native instructions pipeline (session/instruction.ts). Absolute paths with
|
||||
// globs are supported. Files are read raw — `@./...` includes are NOT expanded
|
||||
// by OpenCode, so point at the concrete files, not at composed CLAUDE.md.
|
||||
const instructions = [
|
||||
'/app/CLAUDE.md',
|
||||
'/workspace/agent/.claude-fragments/*.md',
|
||||
'/workspace/agent/CLAUDE.local.md',
|
||||
];
|
||||
|
||||
return {
|
||||
...(model ? { model } : {}),
|
||||
...(smallModel ? { small_model: smallModel } : {}),
|
||||
enabled_providers: [provider],
|
||||
permission: 'allow',
|
||||
autoupdate: false,
|
||||
snapshot: false,
|
||||
provider: providerOptions,
|
||||
instructions,
|
||||
mcp,
|
||||
};
|
||||
}
|
||||
|
||||
type SharedRuntime = {
|
||||
proc: ChildProcess;
|
||||
client: OpencodeClient;
|
||||
stream: AsyncGenerator<{ type: string; properties: Record<string, unknown> }, void, void>;
|
||||
streamRelease: () => void;
|
||||
};
|
||||
|
||||
let sharedRuntime: SharedRuntime | null = null;
|
||||
let sharedConfigKey: string | null = null;
|
||||
let sharedInit: Promise<SharedRuntime> | null = null;
|
||||
|
||||
function runtimeConfigKey(options: ProviderOptions): string {
|
||||
return JSON.stringify({
|
||||
mcp: mcpServersToOpenCodeConfig(options.mcpServers),
|
||||
model: process.env.OPENCODE_MODEL,
|
||||
small: process.env.OPENCODE_SMALL_MODEL,
|
||||
op: process.env.OPENCODE_PROVIDER,
|
||||
});
|
||||
}
|
||||
|
||||
async function ensureSharedRuntime(options: ProviderOptions): Promise<SharedRuntime> {
|
||||
const key = runtimeConfigKey(options);
|
||||
if (sharedRuntime && sharedConfigKey === key) return sharedRuntime;
|
||||
|
||||
if (sharedInit) return sharedInit;
|
||||
|
||||
sharedInit = (async () => {
|
||||
if (sharedRuntime) {
|
||||
destroySharedRuntime();
|
||||
}
|
||||
const config = buildOpenCodeConfig(options);
|
||||
const { url, proc } = await spawnOpencodeServer(config);
|
||||
const client = createOpencodeClient({ baseUrl: url });
|
||||
const sub = await client.event.subscribe();
|
||||
const stream = sub.stream as AsyncGenerator<{ type: string; properties: Record<string, unknown> }, void, void>;
|
||||
sharedRuntime = {
|
||||
proc,
|
||||
client,
|
||||
stream,
|
||||
streamRelease: () => {
|
||||
void stream.return?.(undefined);
|
||||
},
|
||||
};
|
||||
sharedConfigKey = key;
|
||||
sharedInit = null;
|
||||
return sharedRuntime;
|
||||
})();
|
||||
|
||||
return sharedInit;
|
||||
}
|
||||
|
||||
export function destroySharedRuntime(): void {
|
||||
if (sharedRuntime) {
|
||||
try {
|
||||
sharedRuntime.streamRelease();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
killProcessTree(sharedRuntime.proc);
|
||||
sharedRuntime = null;
|
||||
sharedConfigKey = null;
|
||||
}
|
||||
sharedInit = null;
|
||||
}
|
||||
|
||||
function sessionErrorMessage(props: { error?: unknown }): string {
|
||||
const err = props.error as { data?: { message?: string } } | undefined;
|
||||
if (err && typeof err === 'object' && err.data && typeof err.data.message === 'string') {
|
||||
return err.data.message;
|
||||
}
|
||||
return JSON.stringify(props.error) || 'OpenCode session error';
|
||||
}
|
||||
|
||||
export class OpenCodeProvider implements AgentProvider {
|
||||
readonly supportsNativeSlashCommands = false;
|
||||
|
||||
private readonly options: ProviderOptions;
|
||||
private activeSessionId: string | undefined;
|
||||
|
||||
constructor(options: ProviderOptions = {}) {
|
||||
this.options = options;
|
||||
}
|
||||
|
||||
isSessionInvalid(err: unknown): boolean {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
return STALE_SESSION_RE.test(msg);
|
||||
}
|
||||
|
||||
query(input: QueryInput): AgentQuery {
|
||||
if (input.continuation) {
|
||||
this.activeSessionId = input.continuation;
|
||||
} else {
|
||||
this.activeSessionId = undefined;
|
||||
}
|
||||
|
||||
const pending: string[] = [];
|
||||
let waiting: (() => void) | null = null;
|
||||
let ended = false;
|
||||
let aborted = false;
|
||||
|
||||
const systemInstructions = input.systemContext?.instructions;
|
||||
pending.push(wrapPromptWithContext(input.prompt, systemInstructions));
|
||||
|
||||
const kick = (): void => {
|
||||
waiting?.();
|
||||
};
|
||||
|
||||
const self = this;
|
||||
const IDLE_TIMEOUT_MS = Number(process.env.OPENCODE_IDLE_TIMEOUT_MS) || 300_000;
|
||||
|
||||
async function* gen(): AsyncGenerator<ProviderEvent> {
|
||||
let initYielded = false;
|
||||
const rt = await ensureSharedRuntime(self.options);
|
||||
const { client, stream } = rt;
|
||||
|
||||
while (!aborted) {
|
||||
while (pending.length === 0 && !ended && !aborted) {
|
||||
await new Promise<void>((resolve) => {
|
||||
waiting = resolve;
|
||||
});
|
||||
waiting = null;
|
||||
}
|
||||
|
||||
if (aborted) return;
|
||||
if (pending.length === 0 && ended) return;
|
||||
|
||||
const text = pending.shift()!;
|
||||
let sessionId = self.activeSessionId;
|
||||
|
||||
if (!sessionId) {
|
||||
const created = await client.session.create();
|
||||
if (created.error) {
|
||||
throw new Error(`OpenCode: failed to create session: ${JSON.stringify(created.error)}`);
|
||||
}
|
||||
sessionId = created.data?.id;
|
||||
if (!sessionId) throw new Error('OpenCode: failed to create session (no id)');
|
||||
self.activeSessionId = sessionId;
|
||||
}
|
||||
|
||||
if (!initYielded) {
|
||||
yield { type: 'init', continuation: sessionId };
|
||||
initYielded = true;
|
||||
}
|
||||
|
||||
const promptRes = await client.session.promptAsync({
|
||||
path: { id: sessionId },
|
||||
body: { parts: [{ type: 'text', text }] },
|
||||
});
|
||||
if (promptRes.error) {
|
||||
self.activeSessionId = undefined;
|
||||
throw new Error(`OpenCode promptAsync: ${JSON.stringify(promptRes.error)}`);
|
||||
}
|
||||
|
||||
const partTextByMessageId = new Map<string, string>();
|
||||
const roleByMessageId = new Map<string, string>();
|
||||
let lastEventAt = Date.now();
|
||||
let eventTimedOut = false;
|
||||
const timeoutCheck = setInterval(() => {
|
||||
if (Date.now() - lastEventAt > IDLE_TIMEOUT_MS) {
|
||||
log(`OpenCode event timeout (${IDLE_TIMEOUT_MS}ms) — clearing session ${sessionId}`);
|
||||
eventTimedOut = true;
|
||||
self.activeSessionId = undefined;
|
||||
destroySharedRuntime();
|
||||
kick();
|
||||
}
|
||||
}, 5000);
|
||||
|
||||
try {
|
||||
turn: while (true) {
|
||||
if (aborted) return;
|
||||
if (eventTimedOut) {
|
||||
throw new Error(`OpenCode event timeout (${IDLE_TIMEOUT_MS}ms)`);
|
||||
}
|
||||
|
||||
const { value: ev, done } = await stream.next();
|
||||
if (done) {
|
||||
throw new Error('OpenCode SSE stream ended unexpectedly');
|
||||
}
|
||||
|
||||
if (!ev?.type || ev.type === 'server.connected' || ev.type === 'server.heartbeat') continue;
|
||||
|
||||
lastEventAt = Date.now();
|
||||
yield { type: 'activity' };
|
||||
|
||||
switch (ev.type) {
|
||||
case 'message.updated': {
|
||||
const info = ev.properties.info as { id?: string; role?: string } | undefined;
|
||||
if (info?.id && info?.role) {
|
||||
roleByMessageId.set(info.id, info.role);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'message.part.updated': {
|
||||
const part = ev.properties.part as { type?: string; messageID?: string; text?: string } | undefined;
|
||||
if (part?.type === 'text' && part.messageID && part.text) {
|
||||
partTextByMessageId.set(part.messageID, part.text);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'permission.updated': {
|
||||
const perm = ev.properties as { id?: string; sessionID?: string };
|
||||
if (perm.sessionID === sessionId && perm.id) {
|
||||
try {
|
||||
await client.postSessionIdPermissionsPermissionId({
|
||||
path: { id: sessionId, permissionID: perm.id },
|
||||
body: { response: 'always' },
|
||||
});
|
||||
} catch (err) {
|
||||
log(`Failed to auto-reply permission: ${err instanceof Error ? err.message : String(err)}`);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'session.status': {
|
||||
const props = ev.properties as {
|
||||
sessionID?: string;
|
||||
status?: { type?: string; attempt?: number; message?: string };
|
||||
};
|
||||
if (props.sessionID !== sessionId) break;
|
||||
const st = props.status;
|
||||
if (
|
||||
st?.type === 'retry' &&
|
||||
typeof st.attempt === 'number' &&
|
||||
st.attempt >= SESSION_STATUS_RETRY_ERROR_AFTER &&
|
||||
st.message
|
||||
) {
|
||||
self.activeSessionId = undefined;
|
||||
throw new Error(`OpenCode retry limit (${st.attempt}): ${st.message}`);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'session.error': {
|
||||
const props = ev.properties as { sessionID?: string; error?: unknown };
|
||||
if (props.sessionID === sessionId || props.sessionID === undefined) {
|
||||
self.activeSessionId = undefined;
|
||||
throw new Error(sessionErrorMessage(props));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'session.idle': {
|
||||
const sid = (ev.properties as { sessionID?: string }).sessionID;
|
||||
if (sid === sessionId) {
|
||||
break turn;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
clearInterval(timeoutCheck);
|
||||
}
|
||||
|
||||
let resultText = '';
|
||||
for (const [msgId, role] of roleByMessageId) {
|
||||
if (role === 'assistant') {
|
||||
resultText = partTextByMessageId.get(msgId) ?? resultText;
|
||||
}
|
||||
}
|
||||
yield { type: 'result', text: resultText || null };
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
push: (message: string) => {
|
||||
pending.push(wrapPromptWithContext(message, systemInstructions));
|
||||
kick();
|
||||
},
|
||||
end: () => {
|
||||
ended = true;
|
||||
kick();
|
||||
},
|
||||
events: gen(),
|
||||
abort: () => {
|
||||
aborted = true;
|
||||
this.activeSessionId = undefined;
|
||||
kick();
|
||||
destroySharedRuntime();
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
registerProvider('opencode', (opts) => new OpenCodeProvider(opts));
|
||||
@@ -0,0 +1,22 @@
|
||||
/**
|
||||
* Setup-side registration guard for the codex provider (the third barrel of
|
||||
* the multi-point archetype): imports the REAL setup/providers barrel and
|
||||
* asserts the registry carries codex with its auth + install check. Red if
|
||||
* the barrel line is deleted, the barrel fails to evaluate, or the payload
|
||||
* module breaks. (Importing ./codex.js directly would self-register and stay
|
||||
* green when the barrel line is deleted.)
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import { getSetupProvider } from './registry.js';
|
||||
import './index.js'; // the real setup provider barrel
|
||||
|
||||
describe('codex setup registration', () => {
|
||||
it('registers codex with auth + install check via the barrel', () => {
|
||||
const codex = getSetupProvider('codex');
|
||||
expect(codex).toBeDefined();
|
||||
expect(typeof codex!.runAuth).toBe('function');
|
||||
expect(typeof codex!.runInstallCheck).toBe('function');
|
||||
expect(typeof codex!.offerFailureAssist).toBe('function');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,101 @@
|
||||
import { EventEmitter } from 'events';
|
||||
import fs from 'fs';
|
||||
import os from 'os';
|
||||
import path from 'path';
|
||||
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
|
||||
// Mock child_process so runCodexLoginAuth never spawns a real codex CLI; the
|
||||
// spawn stand-in plays `codex login` writing auth.json into whatever
|
||||
// CODEX_HOME it was handed.
|
||||
const mockSpawn = vi.fn();
|
||||
const mockSpawnSync = vi.fn();
|
||||
const mockExecFileSync = vi.fn();
|
||||
vi.mock('child_process', () => ({
|
||||
spawn: (...args: unknown[]) => mockSpawn(...args),
|
||||
spawnSync: (...args: unknown[]) => mockSpawnSync(...args),
|
||||
execFileSync: (...args: unknown[]) => mockExecFileSync(...args),
|
||||
}));
|
||||
|
||||
// Keep the auth flow's structured logging out of logs/setup.log.
|
||||
vi.mock('../logs.js', () => ({ step: vi.fn(), userInput: vi.fn() }));
|
||||
|
||||
import { buildCodexFailurePrompt, runCodexLoginAuth, verifyCodexInstall } from './codex.js';
|
||||
|
||||
// Structural guard for the codex payload wiring: provider files, both barrel
|
||||
// imports, and the pinned Dockerfile install. Goes red if any of them is
|
||||
// removed without going through the /add-codex (or its REMOVE.md) path.
|
||||
describe('verifyCodexInstall', () => {
|
||||
it('passes on a tree with the codex payload wired', () => {
|
||||
const { ok, problems } = verifyCodexInstall();
|
||||
expect(problems).toEqual([]);
|
||||
expect(ok).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// Pure prompt builder for the failure-assist hook — no spawning involved.
|
||||
describe('buildCodexFailurePrompt', () => {
|
||||
it('carries the failure context and the de-duped reference list', () => {
|
||||
const projectRoot = '/repo';
|
||||
const prompt = buildCodexFailurePrompt(
|
||||
{
|
||||
stepName: 'verify',
|
||||
msg: 'first-chat ping timed out',
|
||||
hint: 'check the container logs',
|
||||
rawLogPath: '/repo/logs/setup-steps/verify.log',
|
||||
},
|
||||
projectRoot,
|
||||
);
|
||||
|
||||
expect(prompt).toContain('Failed step: verify');
|
||||
expect(prompt).toContain('Error: first-chat ping timed out');
|
||||
expect(prompt).toContain('Hint: check the container logs');
|
||||
expect(prompt).toContain('README.md'); // BIG_PICTURE_FILES
|
||||
expect(prompt).toContain('setup/verify.ts'); // STEP_FILES['verify']
|
||||
expect(prompt).toContain('logs/setup.log');
|
||||
expect(prompt).toContain('logs/setup-steps/verify.log'); // relativized rawLogPath
|
||||
});
|
||||
|
||||
it('falls back to the step-log directory when no raw log path is given', () => {
|
||||
const prompt = buildCodexFailurePrompt({ stepName: 'verify', msg: 'boom' }, '/repo');
|
||||
expect(prompt).toContain('logs/setup-steps/');
|
||||
expect(prompt).not.toContain('Hint:');
|
||||
});
|
||||
});
|
||||
|
||||
// Session-isolation invariant: the ChatGPT session vaulted for the gateway
|
||||
// must never be the user's personal ~/.codex session — sharing one OAuth
|
||||
// session across two consumers gets the whole family invalidated server-side
|
||||
// when refresh tokens rotate (see the header of codex.ts).
|
||||
describe('runCodexLoginAuth', () => {
|
||||
it('logs in under an isolated CODEX_HOME, vaults from it, and deletes it', async () => {
|
||||
mockSpawnSync.mockReturnValue({ status: 0, stdout: '', stderr: '' });
|
||||
mockExecFileSync.mockReturnValue('');
|
||||
|
||||
let loginEnv: NodeJS.ProcessEnv | undefined;
|
||||
mockSpawn.mockImplementation((...args: unknown[]) => {
|
||||
const opts = args[2] as { env?: NodeJS.ProcessEnv };
|
||||
loginEnv = opts.env;
|
||||
fs.writeFileSync(path.join(opts.env!.CODEX_HOME!, 'auth.json'), '{"tokens":{}}');
|
||||
const child = new EventEmitter();
|
||||
setImmediate(() => child.emit('close', 0));
|
||||
return child;
|
||||
});
|
||||
|
||||
await runCodexLoginAuth('browser');
|
||||
|
||||
// The login spawn ran under a CODEX_HOME that is not the personal one.
|
||||
const codexHome = loginEnv?.CODEX_HOME;
|
||||
expect(codexHome).toBeDefined();
|
||||
expect(codexHome).not.toBe(path.join(os.homedir(), '.codex'));
|
||||
|
||||
// The vault snapshot was read from the isolated dir, not ~/.codex.
|
||||
const vaultCall = mockExecFileSync.mock.calls.find((c) => c[0] === 'onecli');
|
||||
expect(vaultCall).toBeDefined();
|
||||
const vaultArgs = vaultCall![1] as string[];
|
||||
expect(vaultArgs[vaultArgs.indexOf('--file') + 1]).toBe(path.join(codexHome!, 'auth.json'));
|
||||
|
||||
// The isolated dir holds a live credential — gone once vaulted.
|
||||
expect(fs.existsSync(codexHome!)).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,441 @@
|
||||
/**
|
||||
* Codex provider setup — auth walk-through + install verification.
|
||||
*
|
||||
* Codex-owned payload code: when the codex provider moves to the `providers`
|
||||
* branch, this file travels with it and `/add-codex` copies it back in. The
|
||||
* only trunk reach-in is one import + one picker entry in setup/auto.ts.
|
||||
*
|
||||
* Auth honors the v2 credential invariant — everything lands in the OneCLI
|
||||
* vault, nothing in .env, nothing in the container:
|
||||
* - ChatGPT subscription (the common case): `codex login` (browser) or
|
||||
* `codex login --device-auth` (URL + pairing code) runs with CODEX_HOME
|
||||
* pointed at a throwaway dir; the auth.json written there is stored
|
||||
* WHOLE in the vault (`--file … --host-pattern chatgpt.com`) and the dir
|
||||
* is deleted. The gateway injects it in flight; the container only ever
|
||||
* sees the `onecli-managed` placeholder.
|
||||
* - API key: pasted once, stored as an `openai` secret for api.openai.com.
|
||||
*
|
||||
* Session-isolation invariant: the vaulted ChatGPT session must be DEDICATED
|
||||
* to the gateway. Never vault a copy of the user's live ~/.codex/auth.json.
|
||||
* OpenAI rotates refresh tokens, so two consumers sharing one OAuth session
|
||||
* strand each other on refresh, and replaying the stale token trips reuse
|
||||
* detection — which invalidates the whole session family server-side
|
||||
* (`token_invalidated`) for the gateway AND the user's personal Codex CLI.
|
||||
*/
|
||||
import { execFileSync, spawn, spawnSync } from 'child_process';
|
||||
import fs from 'fs';
|
||||
import os from 'os';
|
||||
import path from 'path';
|
||||
|
||||
import * as p from '@clack/prompts';
|
||||
import k from 'kleur';
|
||||
|
||||
import { brightSelect } from '../lib/bright-select.js';
|
||||
import { type AssistContext, BIG_PICTURE_FILES, STEP_FILES } from '../lib/claude-assist.js';
|
||||
import { brandBody, note } from '../lib/theme.js';
|
||||
import * as setupLog from '../logs.js';
|
||||
import { type FailureAssistResult, registerSetupProvider } from './registry.js';
|
||||
|
||||
// ─── OneCLI vault helpers ────────────────────────────────────────────────
|
||||
|
||||
interface OnecliSecret {
|
||||
id: string;
|
||||
name: string;
|
||||
type: string;
|
||||
hostPattern: string | null;
|
||||
}
|
||||
|
||||
function listSecrets(): OnecliSecret[] {
|
||||
const out = execFileSync('onecli', ['secrets', 'list'], { encoding: 'utf-8' });
|
||||
const parsed = JSON.parse(out) as { data?: unknown };
|
||||
return Array.isArray(parsed.data) ? (parsed.data as OnecliSecret[]) : [];
|
||||
}
|
||||
|
||||
function findOpenAISecret(secrets: OnecliSecret[]): OnecliSecret | undefined {
|
||||
return secrets.find((s) => {
|
||||
const name = s.name.toLowerCase();
|
||||
const type = s.type.toLowerCase();
|
||||
const hostPattern = (s.hostPattern ?? '').toLowerCase();
|
||||
return (
|
||||
name === 'codex' ||
|
||||
name === 'openai' ||
|
||||
type === 'openai' ||
|
||||
hostPattern.includes('api.openai.com') ||
|
||||
hostPattern.includes('chatgpt.com')
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
function openAISecretExists(): boolean {
|
||||
try {
|
||||
return findOpenAISecret(listSecrets()) !== undefined;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// ─── auth step ───────────────────────────────────────────────────────────
|
||||
|
||||
function ensureAnswer<T>(value: T | symbol): T {
|
||||
if (p.isCancel(value)) {
|
||||
p.cancel('Setup cancelled.');
|
||||
process.exit(1);
|
||||
}
|
||||
return value as T;
|
||||
}
|
||||
|
||||
export async function runCodexAuthStep(): Promise<void> {
|
||||
if (openAISecretExists()) {
|
||||
p.log.success(brandBody('Your OpenAI account is already connected.'));
|
||||
setupLog.step('auth', 'skipped', 0, { REASON: 'openai-secret-already-present', PROVIDER: 'codex' });
|
||||
return;
|
||||
}
|
||||
|
||||
const method = ensureAnswer(
|
||||
await brightSelect<'browser' | 'device' | 'api' | 'skip'>({
|
||||
message: 'How would you like to connect Codex?',
|
||||
options: [
|
||||
{
|
||||
value: 'browser',
|
||||
label: 'Sign in with my ChatGPT subscription',
|
||||
hint: 'recommended if you have Plus or Pro — opens a browser',
|
||||
},
|
||||
{
|
||||
value: 'device',
|
||||
label: 'ChatGPT device pairing',
|
||||
hint: 'no browser handoff — shows a URL and a code',
|
||||
},
|
||||
{
|
||||
value: 'api',
|
||||
label: 'Paste an OpenAI API key',
|
||||
hint: 'pay-per-use; stored in OneCLI, never copied into the container',
|
||||
},
|
||||
{
|
||||
value: 'skip',
|
||||
label: "Skip — I'll connect later",
|
||||
hint: 'Codex groups will start, but model calls will fail auth',
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
setupLog.userInput('codex_auth_method', method);
|
||||
|
||||
if (method === 'skip') {
|
||||
const confirmed = ensureAnswer(
|
||||
await p.confirm({
|
||||
message: "Skip Codex sign-in? Codex won't be able to answer until you connect an OpenAI account.",
|
||||
initialValue: false,
|
||||
}),
|
||||
);
|
||||
if (!confirmed) return runCodexAuthStep();
|
||||
setupLog.step('auth', 'skipped', 0, { REASON: 'user-skipped', PROVIDER: 'codex' });
|
||||
p.log.warn(brandBody('Codex sign-in skipped. Add an OpenAI account to OneCLI before using Codex groups.'));
|
||||
return;
|
||||
}
|
||||
|
||||
if (method === 'api') {
|
||||
await runCodexApiKeyAuth();
|
||||
return;
|
||||
}
|
||||
|
||||
await runCodexLoginAuth(method);
|
||||
}
|
||||
|
||||
async function runCodexApiKeyAuth(): Promise<void> {
|
||||
const key = ensureAnswer(
|
||||
await p.password({
|
||||
message: 'Paste your OpenAI API key (sk-…)',
|
||||
validate: (v) => (v && v.trim().startsWith('sk-') ? undefined : 'That does not look like an OpenAI API key.'),
|
||||
}),
|
||||
) as string;
|
||||
|
||||
try {
|
||||
execFileSync(
|
||||
'onecli',
|
||||
[
|
||||
'secrets',
|
||||
'create',
|
||||
'--name',
|
||||
'Codex',
|
||||
'--type',
|
||||
'openai',
|
||||
'--value',
|
||||
key.trim(),
|
||||
'--host-pattern',
|
||||
'api.openai.com',
|
||||
],
|
||||
{ stdio: ['ignore', 'pipe', 'pipe'] },
|
||||
);
|
||||
} catch (err) {
|
||||
setupLog.step('auth', 'failed', 0, { PROVIDER: 'codex', METHOD: 'api', ERROR: String(err) });
|
||||
p.log.error(
|
||||
brandBody(
|
||||
"Couldn't save your OpenAI key to the vault. Make sure OneCLI is running (`onecli version`), then retry.",
|
||||
),
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
setupLog.step('auth', 'success', 0, { PROVIDER: 'codex', METHOD: 'api' });
|
||||
p.log.success(brandBody('OpenAI account connected.'));
|
||||
}
|
||||
|
||||
export async function runCodexLoginAuth(method: 'browser' | 'device'): Promise<void> {
|
||||
const codexCheck = spawnSync('codex', ['--version'], { encoding: 'utf-8', stdio: ['ignore', 'pipe', 'pipe'] });
|
||||
if (codexCheck.status !== 0) {
|
||||
p.log.error(
|
||||
brandBody(
|
||||
'The Codex CLI is not installed on this machine. Install it with `npm install -g @openai/codex`, then re-run setup — or choose the API key option instead.',
|
||||
),
|
||||
);
|
||||
setupLog.step('auth', 'failed', 0, { PROVIDER: 'codex', METHOD: method, ERROR: 'codex_cli_missing' });
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (method === 'browser') {
|
||||
p.log.step(brandBody('Opening the Codex sign-in flow…'));
|
||||
console.log(k.dim(' (a browser will open for sign-in; this part is interactive)'));
|
||||
} else {
|
||||
p.log.step(brandBody('Starting Codex device-code pairing…'));
|
||||
console.log(k.dim(' (a URL and code will appear below — open the URL and enter the code)'));
|
||||
}
|
||||
console.log();
|
||||
|
||||
// Session-isolation invariant (see file header): the login runs under a
|
||||
// throwaway CODEX_HOME so the vaulted session is dedicated to the gateway
|
||||
// and never shared with the user's personal ~/.codex.
|
||||
const loginHome = fs.mkdtempSync(path.join(os.tmpdir(), 'codex-vault-login-'));
|
||||
// Holds a live credential after login — must go on every exit path. The
|
||||
// failure branches call process.exit, which skips finally blocks, so each
|
||||
// removes it explicitly.
|
||||
const removeLoginHome = (): void => fs.rmSync(loginHome, { recursive: true, force: true });
|
||||
|
||||
const args = method === 'device' ? ['login', '--device-auth'] : ['login'];
|
||||
const start = Date.now();
|
||||
const code = await runInherit('codex', args, { CODEX_HOME: loginHome });
|
||||
const durationMs = Date.now() - start;
|
||||
console.log();
|
||||
|
||||
if (code !== 0) {
|
||||
removeLoginHome();
|
||||
setupLog.step('auth', 'failed', durationMs, { PROVIDER: 'codex', METHOD: method, EXIT_CODE: String(code) });
|
||||
p.log.error(
|
||||
brandBody(
|
||||
"Couldn't complete the Codex sign-in. Re-run setup and try again, or choose the API key option instead.",
|
||||
),
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const authJsonPath = path.join(loginHome, 'auth.json');
|
||||
if (!fs.existsSync(authJsonPath)) {
|
||||
removeLoginHome();
|
||||
setupLog.step('auth', 'failed', durationMs, { PROVIDER: 'codex', METHOD: method, ERROR: 'auth_json_not_found' });
|
||||
p.log.error(
|
||||
brandBody('Codex login succeeded but no auth.json was written. Try again, or paste an API key instead.'),
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
try {
|
||||
execFileSync(
|
||||
'onecli',
|
||||
[
|
||||
'secrets',
|
||||
'create',
|
||||
'--name',
|
||||
'Codex',
|
||||
'--type',
|
||||
'openai',
|
||||
'--file',
|
||||
authJsonPath,
|
||||
'--host-pattern',
|
||||
'chatgpt.com',
|
||||
],
|
||||
{ stdio: ['ignore', 'pipe', 'pipe'] },
|
||||
);
|
||||
} catch (err) {
|
||||
removeLoginHome();
|
||||
setupLog.step('auth', 'failed', durationMs, { PROVIDER: 'codex', METHOD: method, ERROR: String(err) });
|
||||
p.log.error(
|
||||
brandBody(
|
||||
"Couldn't save your Codex credentials to the vault. Make sure OneCLI is running (`onecli version`), then retry.",
|
||||
),
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
removeLoginHome();
|
||||
setupLog.step('auth', 'success', durationMs, { PROVIDER: 'codex', METHOD: method });
|
||||
p.log.success(brandBody('OpenAI account connected — credentials live in your OneCLI vault, never in the container.'));
|
||||
}
|
||||
|
||||
function runInherit(cmd: string, args: string[], extraEnv?: Record<string, string>): Promise<number> {
|
||||
return new Promise((resolve) => {
|
||||
const child = spawn(cmd, args, {
|
||||
stdio: 'inherit',
|
||||
env: extraEnv ? { ...process.env, ...extraEnv } : process.env,
|
||||
});
|
||||
child.on('close', (code) => resolve(code ?? 1));
|
||||
child.on('error', () => resolve(1));
|
||||
});
|
||||
}
|
||||
|
||||
// ─── failure assist ──────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* The Codex CLI can debug a setup failure only if the binary runs AND
|
||||
* ~/.codex/auth.json exists (API-key-only installs keep the key in the
|
||||
* OneCLI vault, so the host-side CLI has nothing to authenticate with).
|
||||
*/
|
||||
export function isCodexCliUsable(): boolean {
|
||||
const codexCheck = spawnSync('codex', ['--version'], { encoding: 'utf-8', stdio: ['ignore', 'pipe', 'pipe'] });
|
||||
if (codexCheck.status !== 0) return false;
|
||||
return fs.existsSync(path.join(os.homedir(), '.codex', 'auth.json'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Failure prompt handed to the interactive Codex session — same content as
|
||||
* the dispatcher's Claude system prompt: what failed, the job ("diagnose and
|
||||
* fix, be concise, exit when done"), and a de-duped file reference list.
|
||||
*/
|
||||
export function buildCodexFailurePrompt(ctx: AssistContext, projectRoot: string): string {
|
||||
const stepRefs = STEP_FILES[ctx.stepName] ?? [];
|
||||
const references = [
|
||||
...BIG_PICTURE_FILES,
|
||||
...stepRefs,
|
||||
'logs/setup.log',
|
||||
ctx.rawLogPath ? path.relative(projectRoot, ctx.rawLogPath) : 'logs/setup-steps/',
|
||||
].filter((v, i, a) => a.indexOf(v) === i);
|
||||
|
||||
const lines: string[] = [
|
||||
"The user is running NanoClaw's interactive setup flow and hit a failure.",
|
||||
'',
|
||||
`Failed step: ${ctx.stepName}`,
|
||||
`Error: ${ctx.msg}`,
|
||||
];
|
||||
|
||||
if (ctx.hint) lines.push(`Hint: ${ctx.hint}`);
|
||||
|
||||
lines.push(
|
||||
'',
|
||||
'Your job: help them diagnose and fix this issue. Read the referenced files',
|
||||
'and logs to understand what went wrong, then help them fix it. You can read',
|
||||
'files, run commands, check logs, and explain what happened. Be concise.',
|
||||
"When they're ready to resume setup, tell them to exit Codex.",
|
||||
'',
|
||||
'Relevant files (read as needed):',
|
||||
);
|
||||
for (const f of references) lines.push(` - ${f}`);
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Registry hook: offer to debug a setup failure with the Codex CLI. Returns
|
||||
* 'unavailable' when the CLI can't run here so the dispatcher can fall back
|
||||
* to its guarded Claude offer.
|
||||
*/
|
||||
export async function offerCodexFailureAssist(ctx: AssistContext, projectRoot: string): Promise<FailureAssistResult> {
|
||||
if (!isCodexCliUsable()) return 'unavailable';
|
||||
|
||||
const want = ensureAnswer(
|
||||
await p.confirm({
|
||||
message: 'Want to debug this with Codex?',
|
||||
initialValue: true,
|
||||
}),
|
||||
);
|
||||
if (!want) return 'declined';
|
||||
|
||||
const prompt = buildCodexFailurePrompt(ctx, projectRoot);
|
||||
|
||||
note(
|
||||
[
|
||||
'Launching Codex to help debug this failure.',
|
||||
'It has the context of what went wrong.',
|
||||
'',
|
||||
k.dim("Exit Codex (Ctrl-C or /quit) when you're ready to come back to setup."),
|
||||
].join('\n'),
|
||||
'Handing off to Codex',
|
||||
);
|
||||
|
||||
return new Promise<FailureAssistResult>((resolve) => {
|
||||
// codex accepts a positional initial prompt for the interactive TUI.
|
||||
const child = spawn('codex', [prompt], { cwd: projectRoot, stdio: 'inherit' });
|
||||
child.on('close', () => {
|
||||
p.log.success(brandBody("Back from Codex. Let's continue."));
|
||||
resolve('launched');
|
||||
});
|
||||
child.on('error', () => {
|
||||
p.log.error("Couldn't launch Codex.");
|
||||
resolve('unavailable');
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// ─── install verification ────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Verify the codex provider payload is fully wired — the same pre-flight the
|
||||
* /add-codex skill checks. While codex ships in trunk these always pass; once
|
||||
* the payload moves to the providers branch, a failed check means the install
|
||||
* step should run (or the user finishes via /add-codex).
|
||||
*/
|
||||
export function verifyCodexInstall(): { ok: boolean; problems: string[] } {
|
||||
const problems: string[] = [];
|
||||
const root = process.cwd();
|
||||
|
||||
const requiredFiles = [
|
||||
'src/providers/codex.ts',
|
||||
'src/providers/codex-agents-md.ts',
|
||||
'container/agent-runner/src/providers/codex.ts',
|
||||
'container/agent-runner/src/providers/codex-app-server.ts',
|
||||
];
|
||||
for (const file of requiredFiles) {
|
||||
if (!fs.existsSync(path.join(root, file))) problems.push(`missing file: ${file}`);
|
||||
}
|
||||
|
||||
for (const barrel of ['src/providers/index.ts', 'container/agent-runner/src/providers/index.ts']) {
|
||||
const barrelPath = path.join(root, barrel);
|
||||
if (!fs.existsSync(barrelPath) || !fs.readFileSync(barrelPath, 'utf-8').includes("import './codex.js';")) {
|
||||
problems.push(`missing barrel import in ${barrel}`);
|
||||
}
|
||||
}
|
||||
|
||||
const dockerfilePath = path.join(root, 'container', 'Dockerfile');
|
||||
const dockerfile = fs.existsSync(dockerfilePath) ? fs.readFileSync(dockerfilePath, 'utf-8') : '';
|
||||
if (!/ARG CODEX_VERSION=/.test(dockerfile) || !dockerfile.includes('@openai/codex@${CODEX_VERSION}')) {
|
||||
problems.push('container/Dockerfile missing the pinned @openai/codex install');
|
||||
}
|
||||
|
||||
return { ok: problems.length === 0, problems };
|
||||
}
|
||||
|
||||
export async function runCodexInstallCheck(): Promise<void> {
|
||||
p.log.step(brandBody('Checking the Codex provider install…'));
|
||||
const { ok, problems } = verifyCodexInstall();
|
||||
if (ok) {
|
||||
setupLog.step('codex-install', 'success', 0, {});
|
||||
p.log.success(brandBody('Codex installed properly.'));
|
||||
return;
|
||||
}
|
||||
|
||||
setupLog.step('codex-install', 'failed', 0, { PROBLEMS: problems.join('; ') });
|
||||
p.log.warn(brandBody('The Codex provider is not fully installed:'));
|
||||
for (const problem of problems) console.log(k.dim(` • ${problem}`));
|
||||
p.log.warn(
|
||||
brandBody(
|
||||
'Finish it with your coding agent of choice: open Codex CLI or Claude Code in this repo and run the /add-codex skill. Setup will continue — Codex groups will work once the install completes.',
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
// Self-registration: the setup picker and the standalone `provider-auth` step
|
||||
// render from the registry — this call is codex's only reach-in to the setup
|
||||
// flow (guarded by the barrel-driven registration test).
|
||||
registerSetupProvider({
|
||||
value: 'codex',
|
||||
label: 'Codex',
|
||||
hint: 'OpenAI — ChatGPT subscription or API key',
|
||||
runAuth: runCodexAuthStep,
|
||||
runInstallCheck: runCodexInstallCheck,
|
||||
offerFailureAssist: offerCodexFailureAssist,
|
||||
});
|
||||
@@ -0,0 +1,112 @@
|
||||
/**
|
||||
* The 32KB Codex project-doc cap must DEGRADE, never throw: composeGroupAgentsMd
|
||||
* runs inside the provider contribution at every spawn, and a throw there rides
|
||||
* wakeContainer's transient-retry contract — host-sweep respawns every 60s
|
||||
* forever and the group goes silently dark (a permanent condition disguised as
|
||||
* a transient one). Oversized docs drop their largest optional instruction
|
||||
* sections, keep the core contract, and say so in the doc.
|
||||
*/
|
||||
import fs from 'fs';
|
||||
import os from 'os';
|
||||
import path from 'path';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
vi.mock('../config.js', async (importOriginal) => ({
|
||||
...(await importOriginal<typeof import('../config.js')>()),
|
||||
DATA_DIR: '/tmp/nanoclaw-agents-md-test/data',
|
||||
}));
|
||||
|
||||
import { composeGroupAgentsMd, CODEX_PROJECT_DOC_MAX_BYTES } from './codex-agents-md.js';
|
||||
import { closeDb, createAgentGroup, initTestDb, runMigrations } from '../db/index.js';
|
||||
import { ensureContainerConfig, updateContainerConfigJson } from '../db/container-configs.js';
|
||||
import type { AgentGroup } from '../types.js';
|
||||
|
||||
const TEST_ROOT = '/tmp/nanoclaw-agents-md-test';
|
||||
|
||||
function group(folder: string): AgentGroup {
|
||||
return {
|
||||
id: `ag-${folder}`,
|
||||
name: folder,
|
||||
folder,
|
||||
agent_provider: null,
|
||||
created_at: new Date().toISOString(),
|
||||
} as AgentGroup;
|
||||
}
|
||||
|
||||
describe('composeGroupAgentsMd cap handling', () => {
|
||||
beforeEach(() => {
|
||||
if (fs.existsSync(TEST_ROOT)) fs.rmSync(TEST_ROOT, { recursive: true });
|
||||
fs.mkdirSync(path.join(TEST_ROOT, 'data'), { recursive: true });
|
||||
const db = initTestDb();
|
||||
runMigrations(db);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
closeDb();
|
||||
if (fs.existsSync(TEST_ROOT)) fs.rmSync(TEST_ROOT, { recursive: true });
|
||||
});
|
||||
|
||||
it('writes the doc untouched when under the cap', () => {
|
||||
const g = group('small');
|
||||
createAgentGroup(g);
|
||||
ensureContainerConfig(g.id);
|
||||
const groupDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agents-md-'));
|
||||
try {
|
||||
composeGroupAgentsMd(g, groupDir);
|
||||
const doc = fs.readFileSync(path.join(groupDir, 'AGENTS.md'), 'utf-8');
|
||||
expect(doc).not.toContain('Omitted for size');
|
||||
// Agent-authored skills must be told their persistent home — without
|
||||
// this, authored skills land on ephemeral container paths and vanish.
|
||||
expect(doc).toContain('/workspace/agent/skills');
|
||||
expect(Buffer.byteLength(doc, 'utf-8')).toBeLessThanOrEqual(CODEX_PROJECT_DOC_MAX_BYTES);
|
||||
} finally {
|
||||
fs.rmSync(groupDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('inlines the memory index so recall does not depend on a file read', () => {
|
||||
const g = group('with-memory');
|
||||
createAgentGroup(g);
|
||||
ensureContainerConfig(g.id);
|
||||
const groupDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agents-md-'));
|
||||
try {
|
||||
fs.mkdirSync(path.join(groupDir, 'memory'), { recursive: true });
|
||||
fs.writeFileSync(
|
||||
path.join(groupDir, 'memory', 'index.md'),
|
||||
'# Memory Index\n- [People](memories/people/) - notes about people and their preferences\n',
|
||||
);
|
||||
|
||||
composeGroupAgentsMd(g, groupDir);
|
||||
|
||||
const doc = fs.readFileSync(path.join(groupDir, 'AGENTS.md'), 'utf-8');
|
||||
expect(doc).toContain('Current memory index');
|
||||
expect(doc).toContain('notes about people and their preferences');
|
||||
} finally {
|
||||
fs.rmSync(groupDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('degrades instead of throwing when MCP instructions push the doc over the cap', () => {
|
||||
const g = group('oversized');
|
||||
createAgentGroup(g);
|
||||
ensureContainerConfig(g.id);
|
||||
updateContainerConfigJson(g.id, 'mcp_servers', {
|
||||
bloated: { command: 'x', instructions: 'y'.repeat(CODEX_PROJECT_DOC_MAX_BYTES + 1024) },
|
||||
lean: { command: 'x', instructions: 'short and useful' },
|
||||
});
|
||||
const groupDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agents-md-'));
|
||||
try {
|
||||
composeGroupAgentsMd(g, groupDir); // must not throw
|
||||
|
||||
const doc = fs.readFileSync(path.join(groupDir, 'AGENTS.md'), 'utf-8');
|
||||
expect(Buffer.byteLength(doc, 'utf-8')).toBeLessThanOrEqual(CODEX_PROJECT_DOC_MAX_BYTES);
|
||||
// Largest optional section dropped, named in the doc; the rest survive.
|
||||
expect(doc).toContain('Omitted for size');
|
||||
expect(doc).toContain('MCP Server: bloated');
|
||||
expect(doc).toContain('short and useful');
|
||||
expect(doc).toContain('Memory System');
|
||||
} finally {
|
||||
fs.rmSync(groupDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,188 @@
|
||||
/**
|
||||
* AGENTS.md composition for codex agent groups — codex-owned payload code.
|
||||
*
|
||||
* AGENTS.md is Codex's project doc (its CLAUDE.md equivalent). Composed fresh
|
||||
* on every spawn by the codex provider contribution (see ./codex.ts) from:
|
||||
* - the shared base (`container/AGENTS.md`)
|
||||
* - a pointer to the runner-scaffolded memory system (created container-side
|
||||
* at boot via the `usesMemoryScaffold` capability — nothing is written here)
|
||||
* - a pointer to codex-native skills under `.agents/skills`
|
||||
* - each enabled NanoClaw module's `*.instructions.md` fragment
|
||||
* - MCP server `instructions` from container.json
|
||||
*
|
||||
* Codex hard-caps project-doc loading (`project_doc_max_bytes`, mirrored in
|
||||
* the container provider's config.toml writer) — compose fails loudly rather
|
||||
* than letting Codex truncate silently.
|
||||
*/
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
|
||||
import type { McpServerConfig } from '../container-config.js';
|
||||
import { getContainerConfig } from '../db/container-configs.js';
|
||||
import { log } from '../log.js';
|
||||
import type { AgentGroup } from '../types.js';
|
||||
|
||||
export const CODEX_PROJECT_DOC_MAX_BYTES = 32 * 1024;
|
||||
export const CODEX_PROJECT_DOC_WARN_BYTES = 28 * 1024;
|
||||
|
||||
const HEADER = '<!-- Composed at spawn. Do not edit. Edit memory/system/definition.md for memory behavior. -->';
|
||||
const MCP_TOOLS_HOST_SUBPATH = path.join('container', 'agent-runner', 'src', 'mcp-tools');
|
||||
|
||||
const MEMORY_POINTER = [
|
||||
'Editable memory-system definition: `/workspace/agent/memory/system/definition.md`.',
|
||||
'Top memory index: `/workspace/agent/memory/index.md`.',
|
||||
'Read the definition and index, then use memories, data, and conversation archives when relevant.',
|
||||
'Stored user preferences are binding: before your first reply in a session, check the index below and read any memory file relevant to the user or the request, and apply it without being asked.',
|
||||
'Do not use `AGENTS.local.md` or `AGENTS.override.md` for memory.',
|
||||
].join('\n\n');
|
||||
|
||||
/**
|
||||
* Inline the group's current memory index into the composed doc. Recall must
|
||||
* not depend on the model choosing to read a file before its first reply —
|
||||
* with the map already in the system prompt, applying a stored preference is
|
||||
* one hop (read the relevant memory file), not three. The index is small
|
||||
* (hundreds of bytes); the 32KB fit logic above bounds the worst case.
|
||||
*/
|
||||
function memoryIndexInline(groupDir: string): string {
|
||||
const indexPath = path.join(groupDir, 'memory', 'index.md');
|
||||
if (!fs.existsSync(indexPath)) return '';
|
||||
const content = fs.readFileSync(indexPath, 'utf-8').trim();
|
||||
if (!content) return '';
|
||||
return ['Current memory index (paths relative to `/workspace/agent/memory/`):', content].join('\n\n');
|
||||
}
|
||||
|
||||
const NATIVE_RUNTIME_SKILLS_POINTER = [
|
||||
'Selected NanoClaw runtime skills are available as Codex-native skills at `/workspace/agent/.agents/skills`.',
|
||||
'Each skill directory contains a `SKILL.md` with its trigger description plus any supporting files, and points to the read-only shared skill source under `/app/skills`.',
|
||||
'Use skill discovery to load these skills only when their descriptions match the task. Full skill instructions live in the skill directories, not in `AGENTS.md`.',
|
||||
'Skills YOU author or install yourself go in `/workspace/agent/skills/<name>/SKILL.md` — persistent, provider-neutral (they load under any agent provider this group runs on), and yours to write and update over time. They are linked into `$CODEX_HOME/skills` automatically at boot. Never write skills anywhere else: paths outside your workspace and `$CODEX_HOME` are ephemeral.',
|
||||
].join('\n\n');
|
||||
|
||||
interface AgentsMdSection {
|
||||
name: string;
|
||||
content: string;
|
||||
}
|
||||
|
||||
export function composeGroupAgentsMd(group: AgentGroup, groupDir: string): void {
|
||||
if (!fs.existsSync(groupDir)) fs.mkdirSync(groupDir, { recursive: true });
|
||||
|
||||
const configRow = getContainerConfig(group.id);
|
||||
const mcpServers: Record<string, McpServerConfig> = configRow
|
||||
? (JSON.parse(configRow.mcp_servers) as Record<string, McpServerConfig>)
|
||||
: {};
|
||||
|
||||
const sections: AgentsMdSection[] = [{ name: 'header', content: HEADER }];
|
||||
const pushSection = (name: string, ...content: string[]): void => {
|
||||
const body = content
|
||||
.map((part) => part.trim())
|
||||
.filter(Boolean)
|
||||
.join('\n\n');
|
||||
if (body) sections.push({ name, content: `# ${name}\n\n${body}` });
|
||||
};
|
||||
|
||||
const sharedBase = path.join(process.cwd(), 'container', 'AGENTS.md');
|
||||
if (fs.existsSync(sharedBase)) {
|
||||
pushSection('NanoClaw Runtime Contract', fs.readFileSync(sharedBase, 'utf-8'));
|
||||
}
|
||||
|
||||
pushSection('Memory System', MEMORY_POINTER, memoryIndexInline(groupDir));
|
||||
pushSection('Native Runtime Skills', NATIVE_RUNTIME_SKILLS_POINTER);
|
||||
|
||||
const cliDisabled = configRow?.cli_scope === 'disabled';
|
||||
const mcpToolsHostDir = path.join(process.cwd(), MCP_TOOLS_HOST_SUBPATH);
|
||||
if (fs.existsSync(mcpToolsHostDir)) {
|
||||
for (const entry of fs.readdirSync(mcpToolsHostDir).sort()) {
|
||||
const match = entry.match(/^(.+)\.instructions\.md$/);
|
||||
if (!match) continue;
|
||||
const moduleName = match[1];
|
||||
if (moduleName === 'cli' && cliDisabled) continue;
|
||||
pushSection(`NanoClaw Module: ${moduleName}`, fs.readFileSync(path.join(mcpToolsHostDir, entry), 'utf-8'));
|
||||
}
|
||||
}
|
||||
|
||||
for (const [name, mcp] of Object.entries(mcpServers)) {
|
||||
if (mcp.instructions) {
|
||||
pushSection(`MCP Server: ${name}`, mcp.instructions);
|
||||
}
|
||||
}
|
||||
|
||||
const content = fitAgentsMdToCap(group, sections);
|
||||
writeAtomic(path.join(groupDir, 'AGENTS.md'), content);
|
||||
}
|
||||
|
||||
function renderAgentsMd(sections: AgentsMdSection[]): string {
|
||||
return (
|
||||
sections
|
||||
.map((section) => section.content.trim())
|
||||
.filter(Boolean)
|
||||
.join('\n\n') + '\n'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fit the doc under Codex's 32KB project-doc cap by DEGRADING, never
|
||||
* throwing: a per-spawn throw rides wakeContainer's transient-retry contract
|
||||
* — host-sweep respawns every 60s forever and the group goes silently dark.
|
||||
* Instead, drop the largest optional instruction sections (per-module and
|
||||
* per-MCP-server) until the doc fits, log what was dropped at error level,
|
||||
* and tell the agent in the doc itself. The core contract (header, runtime
|
||||
* contract, memory, skills pointer) is never dropped.
|
||||
*/
|
||||
function fitAgentsMdToCap(group: AgentGroup, sections: AgentsMdSection[]): string {
|
||||
const sectionBytes = (): { section: string; bytes: number }[] =>
|
||||
sections.map((section) => ({ section: section.name, bytes: Buffer.byteLength(section.content, 'utf-8') }));
|
||||
|
||||
const isDroppable = (s: AgentsMdSection): boolean =>
|
||||
s.name.startsWith('MCP Server: ') || s.name.startsWith('NanoClaw Module: ');
|
||||
|
||||
const dropped: string[] = [];
|
||||
const render = (): string => {
|
||||
const parts = [...sections];
|
||||
if (dropped.length > 0) {
|
||||
parts.push({
|
||||
name: 'omitted',
|
||||
content:
|
||||
`# Omitted for size\n\nThese instruction sections were omitted to fit Codex's project-doc cap: ` +
|
||||
`${dropped.join(', ')}. Their tools still work; consult each tool's own description.`,
|
||||
});
|
||||
}
|
||||
return renderAgentsMd(parts);
|
||||
};
|
||||
|
||||
let content = render();
|
||||
while (Buffer.byteLength(content, 'utf-8') > CODEX_PROJECT_DOC_MAX_BYTES) {
|
||||
const candidates = sections
|
||||
.filter(isDroppable)
|
||||
.sort((a, b) => Buffer.byteLength(b.content, 'utf-8') - Buffer.byteLength(a.content, 'utf-8'));
|
||||
if (candidates.length === 0) break; // only core left — write oversized rather than brick the group
|
||||
sections.splice(sections.indexOf(candidates[0]), 1);
|
||||
dropped.push(candidates[0].name);
|
||||
content = render();
|
||||
}
|
||||
|
||||
const bytes = Buffer.byteLength(content, 'utf-8');
|
||||
if (dropped.length > 0) {
|
||||
log.error('AGENTS.md exceeded Codex project-doc cap — dropped largest instruction sections', {
|
||||
group: group.name,
|
||||
bytes,
|
||||
maxBytes: CODEX_PROJECT_DOC_MAX_BYTES,
|
||||
dropped,
|
||||
sections: sectionBytes(),
|
||||
});
|
||||
} else if (bytes >= CODEX_PROJECT_DOC_WARN_BYTES) {
|
||||
log.warn('AGENTS.md is near Codex project-doc cap', {
|
||||
group: group.name,
|
||||
bytes,
|
||||
warnBytes: CODEX_PROJECT_DOC_WARN_BYTES,
|
||||
maxBytes: CODEX_PROJECT_DOC_MAX_BYTES,
|
||||
sections: sectionBytes(),
|
||||
});
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
function writeAtomic(filePath: string, content: string): void {
|
||||
const tmp = `${filePath}.tmp-${process.pid}`;
|
||||
fs.writeFileSync(tmp, content);
|
||||
fs.renameSync(tmp, filePath);
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
/**
|
||||
* In-process seam test for the codex HOST contribution's runtime consumption
|
||||
* of core (the "consumes core" leg the skill guidelines require): drive the
|
||||
* REAL registered contribution — via the real barrel and registry, never by
|
||||
* importing codex.ts's internals — against a real test DB and a temp
|
||||
* GROUPS_DIR/DATA_DIR, then hand its result to the real buildMounts.
|
||||
*
|
||||
* This is what catches core drift that typecheck can't: the
|
||||
* DATA_DIR/v2-sessions/<id>/.codex-shared session layout, the
|
||||
* getAgentGroup/getContainerConfig reads, the mcp_servers JSON shape consumed
|
||||
* by composeGroupAgentsMd, and the mount set buildMounts assembles for a
|
||||
* surfaces-providing provider. (codex-registration.test.ts only guards that
|
||||
* the name is registered; provider-surfaces.test.ts drives a FAKE provider to
|
||||
* test the seam itself.)
|
||||
*/
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
const TEST_ROOT = '/tmp/nanoclaw-codex-host-contribution-test';
|
||||
const DATA_DIR = path.join(TEST_ROOT, 'data');
|
||||
const GROUPS_DIR = path.join(TEST_ROOT, 'groups');
|
||||
|
||||
vi.mock('../config.js', async (importOriginal) => ({
|
||||
...(await importOriginal<typeof import('../config.js')>()),
|
||||
DATA_DIR: '/tmp/nanoclaw-codex-host-contribution-test/data',
|
||||
GROUPS_DIR: '/tmp/nanoclaw-codex-host-contribution-test/groups',
|
||||
}));
|
||||
|
||||
import { buildMounts } from '../container-runner.js';
|
||||
import { closeDb, createAgentGroup, initTestDb, runMigrations } from '../db/index.js';
|
||||
import { ensureContainerConfig, updateContainerConfigJson } from '../db/container-configs.js';
|
||||
import { getProviderContainerConfig } from './provider-container-registry.js';
|
||||
import './index.js'; // the real host provider barrel
|
||||
import type { ContainerConfig } from '../container-config.js';
|
||||
import type { AgentGroup, Session } from '../types.js';
|
||||
|
||||
function group(id: string, folder: string): AgentGroup {
|
||||
return { id, name: folder, folder, agent_provider: null, created_at: new Date().toISOString() } as AgentGroup;
|
||||
}
|
||||
|
||||
describe('codex host contribution against real core', () => {
|
||||
beforeEach(() => {
|
||||
fs.rmSync(TEST_ROOT, { recursive: true, force: true });
|
||||
fs.mkdirSync(DATA_DIR, { recursive: true });
|
||||
fs.mkdirSync(GROUPS_DIR, { recursive: true });
|
||||
runMigrations(initTestDb());
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
closeDb();
|
||||
fs.rmSync(TEST_ROOT, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('creates the per-group state dir, composes AGENTS.md from the real config row, and mounts both', () => {
|
||||
const ag = group('ag-codex', 'codex-group');
|
||||
createAgentGroup(ag);
|
||||
ensureContainerConfig(ag.id);
|
||||
updateContainerConfigJson(ag.id, 'mcp_servers', {
|
||||
tooling: { command: 'x', instructions: 'use the tooling server for builds' },
|
||||
});
|
||||
const groupDir = path.join(GROUPS_DIR, ag.folder);
|
||||
|
||||
const contributionFn = getProviderContainerConfig('codex');
|
||||
expect(contributionFn).toBeDefined();
|
||||
const contribution = contributionFn!({
|
||||
sessionDir: path.join(DATA_DIR, 'v2-sessions', ag.id, 'session-1'),
|
||||
agentGroupId: ag.id,
|
||||
groupDir,
|
||||
selectedSkills: [],
|
||||
hostEnv: process.env,
|
||||
});
|
||||
|
||||
// Per-group codex state dir exists and is mounted RW at ~/.codex.
|
||||
const codexShared = path.join(DATA_DIR, 'v2-sessions', ag.id, '.codex-shared');
|
||||
expect(fs.existsSync(codexShared)).toBe(true);
|
||||
// OneCLI's auth-stub mountpoint is pre-created — on macOS Docker can't
|
||||
// create a missing file mountpoint inside a virtiofs dir mount (exit 125
|
||||
// on first spawn). Red here = the pre-create line was dropped.
|
||||
expect(fs.existsSync(path.join(codexShared, 'auth.json'))).toBe(true);
|
||||
const codexMount = contribution.mounts?.find((m) => m.containerPath === '/home/node/.codex');
|
||||
expect(codexMount).toMatchObject({ hostPath: codexShared, readonly: false });
|
||||
|
||||
// AGENTS.md composed from the real DB row — MCP instructions included.
|
||||
const agentsMd = fs.readFileSync(path.join(groupDir, 'AGENTS.md'), 'utf-8');
|
||||
expect(agentsMd).toContain('MCP Server: tooling');
|
||||
expect(agentsMd).toContain('use the tooling server for builds');
|
||||
|
||||
// The full mount set: codex surfaces in, default claude surfaces out.
|
||||
const session = { id: 'session-1', agent_group_id: ag.id } as Session;
|
||||
const config: ContainerConfig = { mcpServers: {}, packages: { apt: [], npm: [] }, additionalMounts: [], skills: [] };
|
||||
const mounts = buildMounts(ag, session, config, 'codex', contribution);
|
||||
const containerPaths = mounts.map((m) => m.containerPath);
|
||||
expect(containerPaths).toContain('/home/node/.codex');
|
||||
expect(containerPaths.some((p) => p.endsWith('AGENTS.md'))).toBe(true);
|
||||
expect(containerPaths).not.toContain('/home/node/.claude');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,27 @@
|
||||
/**
|
||||
* Integration test for the codex provider's HOST-side reach-in: the self-registration
|
||||
* import in the src/providers/index.ts barrel. Importing the barrel runs codex.ts's
|
||||
* top-level registerProviderContainerConfig('codex', …); without that import line the
|
||||
* host never wires the provider's per-session mounts / env passthrough.
|
||||
*
|
||||
* Behavior, not structural, and BARREL-ONLY: it imports the real barrel (./index.js),
|
||||
* never ./codex.js directly, then asserts the registry actually contains the provider.
|
||||
* Importing the provider module directly (as codex.factory.test.ts does) self-registers
|
||||
* it and would stay GREEN even if the barrel line were deleted — that is a unit test,
|
||||
* not a registration guard. This test goes red if the barrel import is deleted/drifts,
|
||||
* or the barrel fails to evaluate.
|
||||
*
|
||||
* A provider is a MULTI-POINT integration: this guards the HOST barrel; the CONTAINER
|
||||
* barrel is guarded by the sibling bun test; the SDK/CLI dependency + Dockerfile install
|
||||
* are guarded by the build/container legs (see the skill's validate step).
|
||||
*/
|
||||
import { describe, it, expect } from 'vitest';
|
||||
|
||||
import { listProviderContainerConfigNames } from './provider-container-registry.js';
|
||||
import './index.js'; // the real host provider barrel — triggers each provider's self-registration
|
||||
|
||||
describe('codex provider host registration', () => {
|
||||
it('registers codex host container-config via the barrel', () => {
|
||||
expect(listProviderContainerConfigNames()).toContain('codex');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,108 @@
|
||||
/**
|
||||
* Host-side container config for the `codex` provider.
|
||||
*
|
||||
* Registers with `providesAgentSurfaces` — codex owns its agent-facing
|
||||
* surfaces, so core skips the default (Claude) compose/mounts and this
|
||||
* contribution supplies them instead:
|
||||
*
|
||||
* - AGENTS.md — codex's project doc, composed fresh every spawn
|
||||
* (see ./codex-agents-md.ts), mounted RO over the RW group dir.
|
||||
* - .agents/skills — codex-native skill links synced to the group's
|
||||
* container.json selection, mounted RO.
|
||||
* - ~/.codex — a per-GROUP private state dir (`.codex-shared`), persistent
|
||||
* across sessions so thread metadata and config.toml survive respawns.
|
||||
*
|
||||
* Credentials: NONE here — v2's invariant is that containers never receive
|
||||
* raw API keys; OneCLI is the sole credential path. The OpenAI key (or
|
||||
* ChatGPT token) lives in the OneCLI vault with an api.openai.com /
|
||||
* chatgpt.com host pattern; codex's traffic already rides the gateway proxy
|
||||
* (every spawn applies it — see container-runner.ts), which injects the real
|
||||
* credential in flight. The container only ever sees the `onecli-managed`
|
||||
* placeholder. Model/effort come from container_config (`ncl groups config
|
||||
* update --model/--effort`), not env.
|
||||
*
|
||||
* Memory and exchange archiving are NOT handled here either — the
|
||||
* container-side provider declares `usesMemoryScaffold` (the runner
|
||||
* scaffolds the memory tree) and implements `onExchangeComplete` (the
|
||||
* provider's own exchange-archive.ts persists each exchange).
|
||||
*/
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
|
||||
import { DATA_DIR } from '../config.js';
|
||||
import { getAgentGroup } from '../db/agent-groups.js';
|
||||
import { composeGroupAgentsMd } from './codex-agents-md.js';
|
||||
import { registerProviderContainerConfig } from './provider-container-registry.js';
|
||||
|
||||
registerProviderContainerConfig(
|
||||
'codex',
|
||||
(ctx) => {
|
||||
// Per-group codex state (config.toml, thread metadata).
|
||||
const codexDir = path.join(DATA_DIR, 'v2-sessions', ctx.agentGroupId, '.codex-shared');
|
||||
fs.mkdirSync(codexDir, { recursive: true });
|
||||
// OneCLI bind-mounts its auth stub at ~/.codex/auth.json, nested inside
|
||||
// this dir mount — Docker on macOS can't create a missing mountpoint file
|
||||
// inside a virtiofs bind mount (runc: "mountpoint is outside of rootfs",
|
||||
// exit 125), so it must exist before first spawn. Re-created here per
|
||||
// spawn because a group reset that wipes .codex-shared re-triggers it.
|
||||
// The 'a' flag creates the file if missing, never truncates an existing one.
|
||||
fs.closeSync(fs.openSync(path.join(codexDir, 'auth.json'), 'a'));
|
||||
|
||||
// Compose this group's AGENTS.md and sync codex-native skill links.
|
||||
const group = getAgentGroup(ctx.agentGroupId);
|
||||
if (group) composeGroupAgentsMd(group, ctx.groupDir);
|
||||
syncCodexSkillLinks(ctx.groupDir, ctx.selectedSkills);
|
||||
|
||||
// No credential env here — OneCLI's container-config drives auth end to
|
||||
// end: the gateway serves a sentinel auth.json stub into ~/.codex for
|
||||
// BOTH auth modes (ChatGPT subscription and API key) and swaps the real
|
||||
// credential on the wire. Note the runner's CODEX_ENV_ALLOWLIST
|
||||
// deliberately strips OPENAI_API_KEY from the codex process env — auth
|
||||
// never rides env vars, only the stub. Duplicating any of it here would
|
||||
// be a second source of truth.
|
||||
const mounts = [{ hostPath: codexDir, containerPath: '/home/node/.codex', readonly: false }];
|
||||
const composedAgentsMd = path.join(ctx.groupDir, 'AGENTS.md');
|
||||
if (fs.existsSync(composedAgentsMd)) {
|
||||
// RO over the RW group dir — regenerated every spawn, agent edits would
|
||||
// be clobbered anyway. Memory behavior is edited via memory/system/.
|
||||
mounts.push({ hostPath: composedAgentsMd, containerPath: '/workspace/agent/AGENTS.md', readonly: true });
|
||||
}
|
||||
const agentsDir = path.join(ctx.groupDir, '.agents');
|
||||
if (fs.existsSync(agentsDir)) {
|
||||
mounts.push({ hostPath: agentsDir, containerPath: '/workspace/agent/.agents', readonly: true });
|
||||
}
|
||||
|
||||
return { mounts };
|
||||
},
|
||||
{ providesAgentSurfaces: true },
|
||||
);
|
||||
|
||||
/**
|
||||
* Sync `.agents/skills/<name>` symlinks to the selected skill set. Targets are
|
||||
* container paths (`/app/skills/<name>`) — dangling on the host, valid inside.
|
||||
*/
|
||||
function syncCodexSkillLinks(groupDir: string, selectedSkills: string[]): void {
|
||||
const skillsDir = path.join(groupDir, '.agents', 'skills');
|
||||
fs.mkdirSync(skillsDir, { recursive: true });
|
||||
|
||||
const desired = new Set(selectedSkills);
|
||||
for (const entry of fs.readdirSync(skillsDir)) {
|
||||
const entryPath = path.join(skillsDir, entry);
|
||||
let isSymlink = false;
|
||||
try {
|
||||
isSymlink = fs.lstatSync(entryPath).isSymbolicLink();
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
if (isSymlink && !desired.has(entry)) fs.unlinkSync(entryPath);
|
||||
}
|
||||
|
||||
for (const skill of selectedSkills) {
|
||||
const linkPath = path.join(skillsDir, skill);
|
||||
try {
|
||||
fs.lstatSync(linkPath);
|
||||
} catch {
|
||||
fs.symlinkSync(`/app/skills/${skill}`, linkPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4,3 +4,6 @@
|
||||
// needs (claude, mock) don't appear here.
|
||||
//
|
||||
// Skills add a new provider by appending one import line below.
|
||||
|
||||
import './codex.js';
|
||||
import './opencode.js';
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
/**
|
||||
* Integration test for the opencode provider's HOST-side reach-in: the self-registration
|
||||
* import in the src/providers/index.ts barrel. Importing the barrel runs opencode.ts's
|
||||
* top-level registerProviderContainerConfig('opencode', …); without that import line the
|
||||
* host never wires the provider's per-session mounts / env passthrough.
|
||||
*
|
||||
* Behavior, not structural, and BARREL-ONLY: it imports the real barrel (./index.js),
|
||||
* never ./opencode.js directly, then asserts the registry actually contains the provider.
|
||||
* Importing the provider module directly (as opencode.factory.test.ts does) self-registers
|
||||
* it and would stay GREEN even if the barrel line were deleted — that is a unit test,
|
||||
* not a registration guard. This test goes red if the barrel import is deleted/drifts,
|
||||
* or the barrel fails to evaluate.
|
||||
*
|
||||
* A provider is a MULTI-POINT integration: this guards the HOST barrel; the CONTAINER
|
||||
* barrel is guarded by the sibling bun test; the SDK/CLI dependency + Dockerfile install
|
||||
* are guarded by the build/container legs (see the skill's validate step).
|
||||
*/
|
||||
import { describe, it, expect } from 'vitest';
|
||||
|
||||
import { listProviderContainerConfigNames } from './provider-container-registry.js';
|
||||
import './index.js'; // the real host provider barrel — triggers each provider's self-registration
|
||||
|
||||
describe('opencode provider host registration', () => {
|
||||
it('registers opencode host container-config via the barrel', () => {
|
||||
expect(listProviderContainerConfigNames()).toContain('opencode');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,49 @@
|
||||
/**
|
||||
* Host-side container config for the `opencode` provider.
|
||||
*
|
||||
* OpenCode's `opencode serve` process stores state under XDG_DATA_HOME, which
|
||||
* we pin to a per-session host directory mounted at /opencode-xdg. The
|
||||
* OPENCODE_* env vars tell the CLI which provider/model to use at runtime
|
||||
* (read on the host, injected into the container). NO_PROXY / no_proxy are
|
||||
* merged with host values so the in-container OpenCode client can talk to
|
||||
* 127.0.0.1 even when HTTPS_PROXY is set by OneCLI.
|
||||
*/
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
|
||||
import { registerProviderContainerConfig } from './provider-container-registry.js';
|
||||
|
||||
function mergeNoProxy(current: string | undefined, additions: string): string {
|
||||
if (!current?.trim()) return additions;
|
||||
const parts = new Set(
|
||||
current
|
||||
.split(/[\s,]+/)
|
||||
.map((s) => s.trim())
|
||||
.filter(Boolean),
|
||||
);
|
||||
for (const addition of additions.split(',')) {
|
||||
const trimmed = addition.trim();
|
||||
if (trimmed) parts.add(trimmed);
|
||||
}
|
||||
return [...parts].join(',');
|
||||
}
|
||||
|
||||
registerProviderContainerConfig('opencode', (ctx) => {
|
||||
const opencodeDir = path.join(ctx.sessionDir, 'opencode-xdg');
|
||||
fs.mkdirSync(opencodeDir, { recursive: true });
|
||||
|
||||
const env: Record<string, string> = {
|
||||
XDG_DATA_HOME: '/opencode-xdg',
|
||||
NO_PROXY: mergeNoProxy(ctx.hostEnv.NO_PROXY, '127.0.0.1,localhost'),
|
||||
no_proxy: mergeNoProxy(ctx.hostEnv.no_proxy, '127.0.0.1,localhost'),
|
||||
};
|
||||
for (const key of ['OPENCODE_PROVIDER', 'OPENCODE_MODEL', 'OPENCODE_SMALL_MODEL'] as const) {
|
||||
const value = ctx.hostEnv[key];
|
||||
if (value) env[key] = value;
|
||||
}
|
||||
|
||||
return {
|
||||
mounts: [{ hostPath: opencodeDir, containerPath: '/opencode-xdg', readonly: false }],
|
||||
env,
|
||||
};
|
||||
});
|
||||
Reference in New Issue
Block a user