From 4cb13b2b6015ba4c800acc446a1e6f6863919790 Mon Sep 17 00:00:00 2001 From: gavrielc Date: Sun, 8 Mar 2026 23:15:05 +0200 Subject: [PATCH 1/5] skill/ollama-tool: local Ollama model inference via MCP Co-Authored-By: Claude Opus 4.6 --- .env.example | 2 +- container/agent-runner/src/index.ts | 7 +- .../agent-runner/src/ollama-mcp-stdio.ts | 147 ++++++++++++++++++ scripts/ollama-watch.sh | 41 +++++ src/container-runner.ts | 7 +- 5 files changed, 201 insertions(+), 3 deletions(-) create mode 100644 container/agent-runner/src/ollama-mcp-stdio.ts create mode 100755 scripts/ollama-watch.sh diff --git a/.env.example b/.env.example index 8b1378917..bf3bd02d0 100644 --- a/.env.example +++ b/.env.example @@ -1 +1 @@ - +OLLAMA_HOST= diff --git a/container/agent-runner/src/index.ts b/container/agent-runner/src/index.ts index 543c5f55b..74323931d 100644 --- a/container/agent-runner/src/index.ts +++ b/container/agent-runner/src/index.ts @@ -432,7 +432,8 @@ async function runQuery( 'TeamCreate', 'TeamDelete', 'SendMessage', 'TodoWrite', 'ToolSearch', 'Skill', 'NotebookEdit', - 'mcp__nanoclaw__*' + 'mcp__nanoclaw__*', + 'mcp__ollama__*' ], env: sdkEnv, permissionMode: 'bypassPermissions', @@ -448,6 +449,10 @@ async function runQuery( NANOCLAW_IS_MAIN: containerInput.isMain ? '1' : '0', }, }, + ollama: { + command: 'node', + args: [path.join(path.dirname(mcpServerPath), 'ollama-mcp-stdio.js')], + }, }, hooks: { PreCompact: [{ hooks: [createPreCompactHook(containerInput.assistantName)] }], diff --git a/container/agent-runner/src/ollama-mcp-stdio.ts b/container/agent-runner/src/ollama-mcp-stdio.ts new file mode 100644 index 000000000..7d29bb2fc --- /dev/null +++ b/container/agent-runner/src/ollama-mcp-stdio.ts @@ -0,0 +1,147 @@ +/** + * Ollama MCP Server for NanoClaw + * Exposes local Ollama models as tools for the container agent. + * Uses host.docker.internal to reach the host's Ollama instance from Docker. + */ + +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; +import { z } from 'zod'; + +import fs from 'fs'; +import path from 'path'; + +const OLLAMA_HOST = process.env.OLLAMA_HOST || 'http://host.docker.internal:11434'; +const OLLAMA_STATUS_FILE = '/workspace/ipc/ollama_status.json'; + +function log(msg: string): void { + console.error(`[OLLAMA] ${msg}`); +} + +function writeStatus(status: string, detail?: string): void { + try { + const data = { status, detail, timestamp: new Date().toISOString() }; + const tmpPath = `${OLLAMA_STATUS_FILE}.tmp`; + fs.mkdirSync(path.dirname(OLLAMA_STATUS_FILE), { recursive: true }); + fs.writeFileSync(tmpPath, JSON.stringify(data)); + fs.renameSync(tmpPath, OLLAMA_STATUS_FILE); + } catch { /* best-effort */ } +} + +async function ollamaFetch(path: string, options?: RequestInit): Promise { + const url = `${OLLAMA_HOST}${path}`; + try { + return await fetch(url, options); + } catch (err) { + // Fallback to localhost if host.docker.internal fails + if (OLLAMA_HOST.includes('host.docker.internal')) { + const fallbackUrl = url.replace('host.docker.internal', 'localhost'); + return await fetch(fallbackUrl, options); + } + throw err; + } +} + +const server = new McpServer({ + name: 'ollama', + version: '1.0.0', +}); + +server.tool( + 'ollama_list_models', + 'List all locally installed Ollama models. Use this to see which models are available before calling ollama_generate.', + {}, + async () => { + log('Listing models...'); + writeStatus('listing', 'Listing available models'); + try { + const res = await ollamaFetch('/api/tags'); + if (!res.ok) { + return { + content: [{ type: 'text' as const, text: `Ollama API error: ${res.status} ${res.statusText}` }], + isError: true, + }; + } + + const data = await res.json() as { models?: Array<{ name: string; size: number; modified_at: string }> }; + const models = data.models || []; + + if (models.length === 0) { + return { content: [{ type: 'text' as const, text: 'No models installed. Run `ollama pull ` on the host to install one.' }] }; + } + + const list = models + .map(m => `- ${m.name} (${(m.size / 1e9).toFixed(1)}GB)`) + .join('\n'); + + log(`Found ${models.length} models`); + return { content: [{ type: 'text' as const, text: `Installed models:\n${list}` }] }; + } catch (err) { + return { + content: [{ type: 'text' as const, text: `Failed to connect to Ollama at ${OLLAMA_HOST}: ${err instanceof Error ? err.message : String(err)}` }], + isError: true, + }; + } + }, +); + +server.tool( + 'ollama_generate', + 'Send a prompt to a local Ollama model and get a response. Good for cheaper/faster tasks like summarization, translation, or general queries. Use ollama_list_models first to see available models.', + { + model: z.string().describe('The model name (e.g., "llama3.2", "mistral", "gemma2")'), + prompt: z.string().describe('The prompt to send to the model'), + system: z.string().optional().describe('Optional system prompt to set model behavior'), + }, + async (args) => { + log(`>>> Generating with ${args.model} (${args.prompt.length} chars)...`); + writeStatus('generating', `Generating with ${args.model}`); + try { + const body: Record = { + model: args.model, + prompt: args.prompt, + stream: false, + }; + if (args.system) { + body.system = args.system; + } + + const res = await ollamaFetch('/api/generate', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + }); + + if (!res.ok) { + const errorText = await res.text(); + return { + content: [{ type: 'text' as const, text: `Ollama error (${res.status}): ${errorText}` }], + isError: true, + }; + } + + const data = await res.json() as { response: string; total_duration?: number; eval_count?: number }; + + let meta = ''; + if (data.total_duration) { + const secs = (data.total_duration / 1e9).toFixed(1); + meta = `\n\n[${args.model} | ${secs}s${data.eval_count ? ` | ${data.eval_count} tokens` : ''}]`; + log(`<<< Done: ${args.model} | ${secs}s | ${data.eval_count || '?'} tokens | ${data.response.length} chars`); + writeStatus('done', `${args.model} | ${secs}s | ${data.eval_count || '?'} tokens`); + } else { + log(`<<< Done: ${args.model} | ${data.response.length} chars`); + writeStatus('done', `${args.model} | ${data.response.length} chars`); + } + + return { content: [{ type: 'text' as const, text: data.response + meta }] }; + } catch (err) { + return { + content: [{ type: 'text' as const, text: `Failed to call Ollama: ${err instanceof Error ? err.message : String(err)}` }], + isError: true, + }; + } + }, +); + +const transport = new StdioServerTransport(); +await server.connect(transport); diff --git a/scripts/ollama-watch.sh b/scripts/ollama-watch.sh new file mode 100755 index 000000000..1aa4a93db --- /dev/null +++ b/scripts/ollama-watch.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# Watch NanoClaw IPC for Ollama activity and show macOS notifications +# Usage: ./scripts/ollama-watch.sh + +cd "$(dirname "$0")/.." || exit 1 + +echo "Watching for Ollama activity..." +echo "Press Ctrl+C to stop" +echo "" + +LAST_TIMESTAMP="" + +while true; do + # Check all group IPC dirs for ollama_status.json + for status_file in data/ipc/*/ollama_status.json; do + [ -f "$status_file" ] || continue + + TIMESTAMP=$(python3 -c "import json; print(json.load(open('$status_file'))['timestamp'])" 2>/dev/null) + [ -z "$TIMESTAMP" ] && continue + [ "$TIMESTAMP" = "$LAST_TIMESTAMP" ] && continue + + LAST_TIMESTAMP="$TIMESTAMP" + STATUS=$(python3 -c "import json; d=json.load(open('$status_file')); print(d['status'])" 2>/dev/null) + DETAIL=$(python3 -c "import json; d=json.load(open('$status_file')); print(d.get('detail',''))" 2>/dev/null) + + case "$STATUS" in + generating) + osascript -e "display notification \"$DETAIL\" with title \"NanoClaw → Ollama\" sound name \"Submarine\"" 2>/dev/null + echo "$(date +%H:%M:%S) 🔄 $DETAIL" + ;; + done) + osascript -e "display notification \"$DETAIL\" with title \"NanoClaw ← Ollama ✓\" sound name \"Glass\"" 2>/dev/null + echo "$(date +%H:%M:%S) ✅ $DETAIL" + ;; + listing) + echo "$(date +%H:%M:%S) 📋 Listing models..." + ;; + esac + done + sleep 0.5 +done diff --git a/src/container-runner.ts b/src/container-runner.ts index 368394085..b19226158 100644 --- a/src/container-runner.ts +++ b/src/container-runner.ts @@ -377,7 +377,12 @@ export async function runContainerAgent( const chunk = data.toString(); const lines = chunk.trim().split('\n'); for (const line of lines) { - if (line) logger.debug({ container: group.folder }, line); + if (!line) continue; + if (line.includes('[OLLAMA]')) { + logger.info({ container: group.folder }, line); + } else { + logger.debug({ container: group.folder }, line); + } } // Don't reset timeout on stderr — SDK writes debug logs continuously. // Timeout only resets on actual output (OUTPUT_MARKER in stdout). From 54a8648c9573e643cf748e6962d2cefbcc082a4c Mon Sep 17 00:00:00 2001 From: Gary Walker Date: Thu, 26 Mar 2026 12:08:54 +1100 Subject: [PATCH 2/5] feat: add model management tools to add-ollama-tool skill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds four new MCP tools to the existing ollama integration, consolidating model management (from #1331) into the single add-ollama-tool skill as requested by @gavrielc: - ollama_pull_model — pull a model from the Ollama registry - ollama_delete_model — delete a local model to free disk space - ollama_show_model — inspect modelfile, parameters, and architecture - ollama_list_running — list models loaded in memory with VRAM/processor info All four tools follow the existing patterns in this file: OLLAMA_HOST env var, ollamaFetch() with host.docker.internal fallback, log() and writeStatus() helpers. No changes to index.ts or container-runner.ts needed — OLLAMA_HOST is already forwarded via sdkEnv. Also updates SKILL.md description, tool list, verify steps, and adds a troubleshooting entry for large-model pull timeouts. Closes #1331. Co-Authored-By: Claude Sonnet 4.6 --- .claude/skills/add-ollama-tool/SKILL.md | 29 +++- .../agent-runner/src/ollama-mcp-stdio.ts | 134 ++++++++++++++++++ 2 files changed, 156 insertions(+), 7 deletions(-) diff --git a/.claude/skills/add-ollama-tool/SKILL.md b/.claude/skills/add-ollama-tool/SKILL.md index a347b4993..d9b63a5f1 100644 --- a/.claude/skills/add-ollama-tool/SKILL.md +++ b/.claude/skills/add-ollama-tool/SKILL.md @@ -1,15 +1,19 @@ --- name: add-ollama-tool -description: Add Ollama MCP server so the container agent can call local models for cheaper/faster tasks like summarization, translation, or general queries. +description: Add Ollama MCP server so the container agent can call local models and manage the Ollama model library. --- # Add Ollama Integration -This skill adds a stdio-based MCP server that exposes local Ollama models as tools for the container agent. Claude remains the orchestrator but can offload work to local models. +This skill adds a stdio-based MCP server that exposes local Ollama models as tools for the container agent. Claude remains the orchestrator but can offload work to local models, and can also manage the model library directly. Tools added: -- `ollama_list_models` — lists installed Ollama models -- `ollama_generate` — sends a prompt to a specified model and returns the response +- `ollama_list_models` — list installed models with name, size, family, and last modified date +- `ollama_generate` — send a prompt to a specified model and return the response +- `ollama_pull_model` — pull (download) a model from the Ollama registry by name +- `ollama_delete_model` — delete a locally installed model to free disk space +- `ollama_show_model` — show model details: modelfile, parameters, template, and architecture info +- `ollama_list_running` — list models currently loaded in memory with memory usage and processor type ## Phase 1: Pre-flight @@ -106,7 +110,7 @@ launchctl kickstart -k gui/$(id -u)/com.nanoclaw # macOS ## Phase 4: Verify -### Test via WhatsApp +### Test inference Tell the user: @@ -114,6 +118,12 @@ Tell the user: > > The agent should use `ollama_list_models` to find available models, then `ollama_generate` to get a response. +### Test model management + +> Send a message like: "pull the gemma3:1b model" or "which ollama models are currently loaded in memory?" +> +> The agent should call `ollama_pull_model` or `ollama_list_running` respectively. + ### Monitor activity (optional) Run the watcher script for macOS notifications when Ollama is used: @@ -129,9 +139,10 @@ tail -f logs/nanoclaw.log | grep -i ollama ``` Look for: -- `Agent output: ... Ollama ...` — agent used Ollama successfully -- `[OLLAMA] >>> Generating` — generation started (if log surfacing works) +- `[OLLAMA] >>> Generating` — generation started - `[OLLAMA] <<< Done` — generation completed +- `[OLLAMA] Pulling model:` — pull in progress +- `[OLLAMA] Deleted:` — model removed ## Troubleshooting @@ -151,3 +162,7 @@ The agent is trying to run `ollama` CLI inside the container instead of using th ### Agent doesn't use Ollama tools The agent may not know about the tools. Try being explicit: "use the ollama_generate tool with gemma3:1b to answer: ..." + +### `ollama_pull_model` times out on large models + +Large models (7B+) can take several minutes. The tool uses `stream: false` so it blocks until complete — this is intentional. For very large pulls, use the host CLI directly: `ollama pull ` diff --git a/container/agent-runner/src/ollama-mcp-stdio.ts b/container/agent-runner/src/ollama-mcp-stdio.ts index 7d29bb2fc..379398a4f 100644 --- a/container/agent-runner/src/ollama-mcp-stdio.ts +++ b/container/agent-runner/src/ollama-mcp-stdio.ts @@ -143,5 +143,139 @@ server.tool( }, ); +server.tool( + 'ollama_pull_model', + 'Pull (download) a model from the Ollama registry by name. Returns the final status once the pull is complete. Use model names like "llama3.2", "mistral", "gemma2:9b".', + { + model: z.string().describe('Model name to pull, e.g. "llama3.2", "mistral", "gemma2:9b"'), + }, + async (args) => { + log(`Pulling model: ${args.model}...`); + writeStatus('pulling', `Pulling ${args.model}`); + try { + const res = await ollamaFetch('/api/pull', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ model: args.model, stream: false }), + }); + if (!res.ok) { + const errorText = await res.text(); + return { + content: [{ type: 'text' as const, text: `Ollama error (${res.status}): ${errorText}` }], + isError: true, + }; + } + const data = await res.json() as { status: string }; + log(`Pull complete: ${args.model} — ${data.status}`); + writeStatus('done', `Pulled ${args.model}`); + return { content: [{ type: 'text' as const, text: `Pull complete: ${args.model} — ${data.status}` }] }; + } catch (err) { + return { + content: [{ type: 'text' as const, text: `Failed to pull model: ${err instanceof Error ? err.message : String(err)}` }], + isError: true, + }; + } + }, +); + +server.tool( + 'ollama_delete_model', + 'Delete a locally installed Ollama model to free up disk space.', + { + model: z.string().describe('Model name to delete, e.g. "llama3.2", "mistral:latest"'), + }, + async (args) => { + log(`Deleting model: ${args.model}...`); + writeStatus('deleting', `Deleting ${args.model}`); + try { + const res = await ollamaFetch('/api/delete', { + method: 'DELETE', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ model: args.model }), + }); + if (!res.ok) { + const errorText = await res.text(); + return { + content: [{ type: 'text' as const, text: `Ollama error (${res.status}): ${errorText}` }], + isError: true, + }; + } + log(`Deleted: ${args.model}`); + writeStatus('done', `Deleted ${args.model}`); + return { content: [{ type: 'text' as const, text: `Deleted model: ${args.model}` }] }; + } catch (err) { + return { + content: [{ type: 'text' as const, text: `Failed to delete model: ${err instanceof Error ? err.message : String(err)}` }], + isError: true, + }; + } + }, +); + +server.tool( + 'ollama_show_model', + 'Show details for a locally installed Ollama model: modelfile, parameters, template, system prompt, and architecture info (context length, parameter count, etc.).', + { + model: z.string().describe('Model name to inspect, e.g. "llama3.2", "mistral:latest"'), + }, + async (args) => { + log(`Showing model info: ${args.model}...`); + try { + const res = await ollamaFetch('/api/show', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ model: args.model }), + }); + if (!res.ok) { + const errorText = await res.text(); + return { + content: [{ type: 'text' as const, text: `Ollama error (${res.status}): ${errorText}` }], + isError: true, + }; + } + const data = await res.json(); + return { content: [{ type: 'text' as const, text: JSON.stringify(data, null, 2) }] }; + } catch (err) { + return { + content: [{ type: 'text' as const, text: `Failed to show model info: ${err instanceof Error ? err.message : String(err)}` }], + isError: true, + }; + } + }, +); + +server.tool( + 'ollama_list_running', + 'List Ollama models currently loaded in memory with their memory usage, processor type (CPU/GPU), and time until they are unloaded.', + {}, + async () => { + log('Listing running models...'); + try { + const res = await ollamaFetch('/api/ps'); + if (!res.ok) { + return { + content: [{ type: 'text' as const, text: `Ollama API error: ${res.status} ${res.statusText}` }], + isError: true, + }; + } + const data = await res.json() as { models?: Array<{ name: string; size_vram: number; processor: string; expires_at: string }> }; + const models = data.models || []; + if (models.length === 0) { + return { content: [{ type: 'text' as const, text: 'No models currently loaded in memory.' }] }; + } + const list = models + .map(m => `- ${m.name} (${(m.size_vram / 1e9).toFixed(1)}GB ${m.processor}, unloads at ${m.expires_at})`) + .join('\n'); + log(`${models.length} model(s) running`); + return { content: [{ type: 'text' as const, text: `Models loaded in memory:\n${list}` }] }; + } catch (err) { + return { + content: [{ type: 'text' as const, text: `Failed to list running models: ${err instanceof Error ? err.message : String(err)}` }], + isError: true, + }; + } + }, +); + const transport = new StdioServerTransport(); await server.connect(transport); From 474346e21470a55c9c8b4b24b5bf6fc819a297ff Mon Sep 17 00:00:00 2001 From: Gary Walker Date: Mon, 30 Mar 2026 16:09:56 +1100 Subject: [PATCH 3/5] fix: recover from stale Claude Code session IDs instead of retrying infinitely When Claude Code exits with code 1 during a session resume, the group's session ID is now cleared from the database and the query is retried with a fresh session. This prevents the infinite retry loop that occurred when a stale/corrupt session ID was stored in SQLite. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/db.ts | 4 ++++ src/index.ts | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/src/db.ts b/src/db.ts index 0896f4185..04da21ba5 100644 --- a/src/db.ts +++ b/src/db.ts @@ -526,6 +526,10 @@ export function setSession(groupFolder: string, sessionId: string): void { ).run(groupFolder, sessionId); } +export function deleteSession(groupFolder: string): void { + db.prepare('DELETE FROM sessions WHERE group_folder = ?').run(groupFolder); +} + export function getAllSessions(): Record { const rows = db .prepare('SELECT group_folder, session_id FROM sessions') diff --git a/src/index.ts b/src/index.ts index 3f5e71086..e65c92151 100644 --- a/src/index.ts +++ b/src/index.ts @@ -30,6 +30,7 @@ import { getAllChats, getAllRegisteredGroups, getAllSessions, + deleteSession, getAllTasks, getMessagesSince, getNewMessages, @@ -355,6 +356,51 @@ async function runAgent( } if (output.status === 'error') { + // Detect stale/corrupt session: container failed while resuming an existing session. + // Clear the session and retry once with a fresh session to avoid infinite retry loops. + if (sessionId) { + logger.warn( + { group: group.name, staleSessionId: sessionId, error: output.error }, + 'Container failed with existing session — clearing stale session and retrying with fresh session', + ); + delete sessions[group.folder]; + deleteSession(group.folder); + + const freshOutput = await runContainerAgent( + group, + { + prompt, + sessionId: undefined, + groupFolder: group.folder, + chatJid, + isMain, + assistantName: ASSISTANT_NAME, + }, + (proc, containerName) => + queue.registerProcess(chatJid, proc, containerName, group.folder), + wrappedOnOutput, + ); + + if (freshOutput.newSessionId) { + sessions[group.folder] = freshOutput.newSessionId; + setSession(group.folder, freshOutput.newSessionId); + } + + if (freshOutput.status === 'error') { + logger.error( + { group: group.name, error: freshOutput.error }, + 'Container agent error on fresh session retry', + ); + return 'error'; + } + + logger.info( + { group: group.name, newSessionId: freshOutput.newSessionId }, + 'Fresh session retry succeeded', + ); + return 'success'; + } + logger.error( { group: group.name, error: output.error }, 'Container agent error', From 38009be2632fb7f11d7927016caebb250ac762db Mon Sep 17 00:00:00 2001 From: Gary Walker Date: Mon, 30 Mar 2026 23:03:44 +1100 Subject: [PATCH 4/5] fix: auto-recover from stale Claude Code session on exit code 1 When Claude Code exits with code 1 during a session resume because the session transcript file no longer exists (ENOENT on .jsonl), clear the stale session from SQLite and retry once with a fresh session. Detection is targeted: only triggers on ENOENT referencing a .jsonl file or explicit "session not found" errors. Transient failures (network, API) fall through to the normal backoff retry path. Also removes unrelated ollama files that were mixed in during rebase. Co-Authored-By: Claude Opus 4.6 (1M context) --- container/agent-runner/src/index.ts | 7 +- .../agent-runner/src/ollama-mcp-stdio.ts | 281 ------------------ scripts/ollama-watch.sh | 41 --- src/container-runner.ts | 7 +- src/index.ts | 16 +- 5 files changed, 14 insertions(+), 338 deletions(-) delete mode 100644 container/agent-runner/src/ollama-mcp-stdio.ts delete mode 100755 scripts/ollama-watch.sh diff --git a/container/agent-runner/src/index.ts b/container/agent-runner/src/index.ts index ec181ed23..25554f989 100644 --- a/container/agent-runner/src/index.ts +++ b/container/agent-runner/src/index.ts @@ -409,8 +409,7 @@ async function runQuery( 'TeamCreate', 'TeamDelete', 'SendMessage', 'TodoWrite', 'ToolSearch', 'Skill', 'NotebookEdit', - 'mcp__nanoclaw__*', - 'mcp__ollama__*' + 'mcp__nanoclaw__*' ], env: sdkEnv, permissionMode: 'bypassPermissions', @@ -426,10 +425,6 @@ async function runQuery( NANOCLAW_IS_MAIN: containerInput.isMain ? '1' : '0', }, }, - ollama: { - command: 'node', - args: [path.join(path.dirname(mcpServerPath), 'ollama-mcp-stdio.js')], - }, }, hooks: { PreCompact: [{ hooks: [createPreCompactHook(containerInput.assistantName)] }], diff --git a/container/agent-runner/src/ollama-mcp-stdio.ts b/container/agent-runner/src/ollama-mcp-stdio.ts deleted file mode 100644 index 379398a4f..000000000 --- a/container/agent-runner/src/ollama-mcp-stdio.ts +++ /dev/null @@ -1,281 +0,0 @@ -/** - * Ollama MCP Server for NanoClaw - * Exposes local Ollama models as tools for the container agent. - * Uses host.docker.internal to reach the host's Ollama instance from Docker. - */ - -import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; -import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; -import { z } from 'zod'; - -import fs from 'fs'; -import path from 'path'; - -const OLLAMA_HOST = process.env.OLLAMA_HOST || 'http://host.docker.internal:11434'; -const OLLAMA_STATUS_FILE = '/workspace/ipc/ollama_status.json'; - -function log(msg: string): void { - console.error(`[OLLAMA] ${msg}`); -} - -function writeStatus(status: string, detail?: string): void { - try { - const data = { status, detail, timestamp: new Date().toISOString() }; - const tmpPath = `${OLLAMA_STATUS_FILE}.tmp`; - fs.mkdirSync(path.dirname(OLLAMA_STATUS_FILE), { recursive: true }); - fs.writeFileSync(tmpPath, JSON.stringify(data)); - fs.renameSync(tmpPath, OLLAMA_STATUS_FILE); - } catch { /* best-effort */ } -} - -async function ollamaFetch(path: string, options?: RequestInit): Promise { - const url = `${OLLAMA_HOST}${path}`; - try { - return await fetch(url, options); - } catch (err) { - // Fallback to localhost if host.docker.internal fails - if (OLLAMA_HOST.includes('host.docker.internal')) { - const fallbackUrl = url.replace('host.docker.internal', 'localhost'); - return await fetch(fallbackUrl, options); - } - throw err; - } -} - -const server = new McpServer({ - name: 'ollama', - version: '1.0.0', -}); - -server.tool( - 'ollama_list_models', - 'List all locally installed Ollama models. Use this to see which models are available before calling ollama_generate.', - {}, - async () => { - log('Listing models...'); - writeStatus('listing', 'Listing available models'); - try { - const res = await ollamaFetch('/api/tags'); - if (!res.ok) { - return { - content: [{ type: 'text' as const, text: `Ollama API error: ${res.status} ${res.statusText}` }], - isError: true, - }; - } - - const data = await res.json() as { models?: Array<{ name: string; size: number; modified_at: string }> }; - const models = data.models || []; - - if (models.length === 0) { - return { content: [{ type: 'text' as const, text: 'No models installed. Run `ollama pull ` on the host to install one.' }] }; - } - - const list = models - .map(m => `- ${m.name} (${(m.size / 1e9).toFixed(1)}GB)`) - .join('\n'); - - log(`Found ${models.length} models`); - return { content: [{ type: 'text' as const, text: `Installed models:\n${list}` }] }; - } catch (err) { - return { - content: [{ type: 'text' as const, text: `Failed to connect to Ollama at ${OLLAMA_HOST}: ${err instanceof Error ? err.message : String(err)}` }], - isError: true, - }; - } - }, -); - -server.tool( - 'ollama_generate', - 'Send a prompt to a local Ollama model and get a response. Good for cheaper/faster tasks like summarization, translation, or general queries. Use ollama_list_models first to see available models.', - { - model: z.string().describe('The model name (e.g., "llama3.2", "mistral", "gemma2")'), - prompt: z.string().describe('The prompt to send to the model'), - system: z.string().optional().describe('Optional system prompt to set model behavior'), - }, - async (args) => { - log(`>>> Generating with ${args.model} (${args.prompt.length} chars)...`); - writeStatus('generating', `Generating with ${args.model}`); - try { - const body: Record = { - model: args.model, - prompt: args.prompt, - stream: false, - }; - if (args.system) { - body.system = args.system; - } - - const res = await ollamaFetch('/api/generate', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(body), - }); - - if (!res.ok) { - const errorText = await res.text(); - return { - content: [{ type: 'text' as const, text: `Ollama error (${res.status}): ${errorText}` }], - isError: true, - }; - } - - const data = await res.json() as { response: string; total_duration?: number; eval_count?: number }; - - let meta = ''; - if (data.total_duration) { - const secs = (data.total_duration / 1e9).toFixed(1); - meta = `\n\n[${args.model} | ${secs}s${data.eval_count ? ` | ${data.eval_count} tokens` : ''}]`; - log(`<<< Done: ${args.model} | ${secs}s | ${data.eval_count || '?'} tokens | ${data.response.length} chars`); - writeStatus('done', `${args.model} | ${secs}s | ${data.eval_count || '?'} tokens`); - } else { - log(`<<< Done: ${args.model} | ${data.response.length} chars`); - writeStatus('done', `${args.model} | ${data.response.length} chars`); - } - - return { content: [{ type: 'text' as const, text: data.response + meta }] }; - } catch (err) { - return { - content: [{ type: 'text' as const, text: `Failed to call Ollama: ${err instanceof Error ? err.message : String(err)}` }], - isError: true, - }; - } - }, -); - -server.tool( - 'ollama_pull_model', - 'Pull (download) a model from the Ollama registry by name. Returns the final status once the pull is complete. Use model names like "llama3.2", "mistral", "gemma2:9b".', - { - model: z.string().describe('Model name to pull, e.g. "llama3.2", "mistral", "gemma2:9b"'), - }, - async (args) => { - log(`Pulling model: ${args.model}...`); - writeStatus('pulling', `Pulling ${args.model}`); - try { - const res = await ollamaFetch('/api/pull', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ model: args.model, stream: false }), - }); - if (!res.ok) { - const errorText = await res.text(); - return { - content: [{ type: 'text' as const, text: `Ollama error (${res.status}): ${errorText}` }], - isError: true, - }; - } - const data = await res.json() as { status: string }; - log(`Pull complete: ${args.model} — ${data.status}`); - writeStatus('done', `Pulled ${args.model}`); - return { content: [{ type: 'text' as const, text: `Pull complete: ${args.model} — ${data.status}` }] }; - } catch (err) { - return { - content: [{ type: 'text' as const, text: `Failed to pull model: ${err instanceof Error ? err.message : String(err)}` }], - isError: true, - }; - } - }, -); - -server.tool( - 'ollama_delete_model', - 'Delete a locally installed Ollama model to free up disk space.', - { - model: z.string().describe('Model name to delete, e.g. "llama3.2", "mistral:latest"'), - }, - async (args) => { - log(`Deleting model: ${args.model}...`); - writeStatus('deleting', `Deleting ${args.model}`); - try { - const res = await ollamaFetch('/api/delete', { - method: 'DELETE', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ model: args.model }), - }); - if (!res.ok) { - const errorText = await res.text(); - return { - content: [{ type: 'text' as const, text: `Ollama error (${res.status}): ${errorText}` }], - isError: true, - }; - } - log(`Deleted: ${args.model}`); - writeStatus('done', `Deleted ${args.model}`); - return { content: [{ type: 'text' as const, text: `Deleted model: ${args.model}` }] }; - } catch (err) { - return { - content: [{ type: 'text' as const, text: `Failed to delete model: ${err instanceof Error ? err.message : String(err)}` }], - isError: true, - }; - } - }, -); - -server.tool( - 'ollama_show_model', - 'Show details for a locally installed Ollama model: modelfile, parameters, template, system prompt, and architecture info (context length, parameter count, etc.).', - { - model: z.string().describe('Model name to inspect, e.g. "llama3.2", "mistral:latest"'), - }, - async (args) => { - log(`Showing model info: ${args.model}...`); - try { - const res = await ollamaFetch('/api/show', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ model: args.model }), - }); - if (!res.ok) { - const errorText = await res.text(); - return { - content: [{ type: 'text' as const, text: `Ollama error (${res.status}): ${errorText}` }], - isError: true, - }; - } - const data = await res.json(); - return { content: [{ type: 'text' as const, text: JSON.stringify(data, null, 2) }] }; - } catch (err) { - return { - content: [{ type: 'text' as const, text: `Failed to show model info: ${err instanceof Error ? err.message : String(err)}` }], - isError: true, - }; - } - }, -); - -server.tool( - 'ollama_list_running', - 'List Ollama models currently loaded in memory with their memory usage, processor type (CPU/GPU), and time until they are unloaded.', - {}, - async () => { - log('Listing running models...'); - try { - const res = await ollamaFetch('/api/ps'); - if (!res.ok) { - return { - content: [{ type: 'text' as const, text: `Ollama API error: ${res.status} ${res.statusText}` }], - isError: true, - }; - } - const data = await res.json() as { models?: Array<{ name: string; size_vram: number; processor: string; expires_at: string }> }; - const models = data.models || []; - if (models.length === 0) { - return { content: [{ type: 'text' as const, text: 'No models currently loaded in memory.' }] }; - } - const list = models - .map(m => `- ${m.name} (${(m.size_vram / 1e9).toFixed(1)}GB ${m.processor}, unloads at ${m.expires_at})`) - .join('\n'); - log(`${models.length} model(s) running`); - return { content: [{ type: 'text' as const, text: `Models loaded in memory:\n${list}` }] }; - } catch (err) { - return { - content: [{ type: 'text' as const, text: `Failed to list running models: ${err instanceof Error ? err.message : String(err)}` }], - isError: true, - }; - } - }, -); - -const transport = new StdioServerTransport(); -await server.connect(transport); diff --git a/scripts/ollama-watch.sh b/scripts/ollama-watch.sh deleted file mode 100755 index 1aa4a93db..000000000 --- a/scripts/ollama-watch.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash -# Watch NanoClaw IPC for Ollama activity and show macOS notifications -# Usage: ./scripts/ollama-watch.sh - -cd "$(dirname "$0")/.." || exit 1 - -echo "Watching for Ollama activity..." -echo "Press Ctrl+C to stop" -echo "" - -LAST_TIMESTAMP="" - -while true; do - # Check all group IPC dirs for ollama_status.json - for status_file in data/ipc/*/ollama_status.json; do - [ -f "$status_file" ] || continue - - TIMESTAMP=$(python3 -c "import json; print(json.load(open('$status_file'))['timestamp'])" 2>/dev/null) - [ -z "$TIMESTAMP" ] && continue - [ "$TIMESTAMP" = "$LAST_TIMESTAMP" ] && continue - - LAST_TIMESTAMP="$TIMESTAMP" - STATUS=$(python3 -c "import json; d=json.load(open('$status_file')); print(d['status'])" 2>/dev/null) - DETAIL=$(python3 -c "import json; d=json.load(open('$status_file')); print(d.get('detail',''))" 2>/dev/null) - - case "$STATUS" in - generating) - osascript -e "display notification \"$DETAIL\" with title \"NanoClaw → Ollama\" sound name \"Submarine\"" 2>/dev/null - echo "$(date +%H:%M:%S) 🔄 $DETAIL" - ;; - done) - osascript -e "display notification \"$DETAIL\" with title \"NanoClaw ← Ollama ✓\" sound name \"Glass\"" 2>/dev/null - echo "$(date +%H:%M:%S) ✅ $DETAIL" - ;; - listing) - echo "$(date +%H:%M:%S) 📋 Listing models..." - ;; - esac - done - sleep 0.5 -done diff --git a/src/container-runner.ts b/src/container-runner.ts index 5f2218032..f6f86b12f 100644 --- a/src/container-runner.ts +++ b/src/container-runner.ts @@ -400,12 +400,7 @@ export async function runContainerAgent( const chunk = data.toString(); const lines = chunk.trim().split('\n'); for (const line of lines) { - if (!line) continue; - if (line.includes('[OLLAMA]')) { - logger.info({ container: group.folder }, line); - } else { - logger.debug({ container: group.folder }, line); - } + if (line) logger.debug({ container: group.folder }, line); } // Don't reset timeout on stderr — SDK writes debug logs continuously. // Timeout only resets on actual output (OUTPUT_MARKER in stdout). diff --git a/src/index.ts b/src/index.ts index 897d0ebf0..f6a662add 100644 --- a/src/index.ts +++ b/src/index.ts @@ -403,12 +403,20 @@ async function runAgent( } if (output.status === 'error') { - // Detect stale/corrupt session: container failed while resuming an existing session. - // Clear the session and retry once with a fresh session to avoid infinite retry loops. - if (sessionId) { + // Detect stale/corrupt session: the SDK throws ENOENT when the session + // transcript file (.jsonl) doesn't exist inside the container. This + // happens after container restarts since the filesystem is ephemeral. + // Only clear + retry for this specific signal — transient errors + // (network, API) should fall through to the normal backoff path. + const isStaleSession = + sessionId && + output.error && + /ENOENT.*\.jsonl|session.*not found/i.test(output.error); + + if (isStaleSession) { logger.warn( { group: group.name, staleSessionId: sessionId, error: output.error }, - 'Container failed with existing session — clearing stale session and retrying with fresh session', + 'Stale session detected (ENOENT on session transcript) — clearing and retrying with fresh session', ); delete sessions[group.folder]; deleteSession(group.folder); From 001ee6ec4876a89e8e57c3446a46b9e8dae8b587 Mon Sep 17 00:00:00 2001 From: gavrielc Date: Tue, 31 Mar 2026 01:17:27 +0300 Subject: [PATCH 5/5] fix: correct stale session regex and remove duplicate retry logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original regex didn't match the actual error ("No conversation found with session ID: ..."). Added `no conversation found` pattern. Removed the inline retry — clearing the session and returning 'error' lets the existing group-queue.ts backoff loop retry with a fresh session naturally. Simpler, no duplicate error paths. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/index.ts | 47 ++++++----------------------------------------- 1 file changed, 6 insertions(+), 41 deletions(-) diff --git a/src/index.ts b/src/index.ts index f6a662add..e186c403a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -403,57 +403,22 @@ async function runAgent( } if (output.status === 'error') { - // Detect stale/corrupt session: the SDK throws ENOENT when the session - // transcript file (.jsonl) doesn't exist inside the container. This - // happens after container restarts since the filesystem is ephemeral. - // Only clear + retry for this specific signal — transient errors - // (network, API) should fall through to the normal backoff path. + // Detect stale/corrupt session — clear it so the next retry starts fresh. + // The session .jsonl can go missing after a crash mid-write, manual + // deletion, or disk-full. The existing backoff in group-queue.ts + // handles the retry; we just need to remove the broken session ID. const isStaleSession = sessionId && output.error && - /ENOENT.*\.jsonl|session.*not found/i.test(output.error); + /no conversation found|ENOENT.*\.jsonl|session.*not found/i.test(output.error); if (isStaleSession) { logger.warn( { group: group.name, staleSessionId: sessionId, error: output.error }, - 'Stale session detected (ENOENT on session transcript) — clearing and retrying with fresh session', + 'Stale session detected — clearing for next retry', ); delete sessions[group.folder]; deleteSession(group.folder); - - const freshOutput = await runContainerAgent( - group, - { - prompt, - sessionId: undefined, - groupFolder: group.folder, - chatJid, - isMain, - assistantName: ASSISTANT_NAME, - }, - (proc, containerName) => - queue.registerProcess(chatJid, proc, containerName, group.folder), - wrappedOnOutput, - ); - - if (freshOutput.newSessionId) { - sessions[group.folder] = freshOutput.newSessionId; - setSession(group.folder, freshOutput.newSessionId); - } - - if (freshOutput.status === 'error') { - logger.error( - { group: group.name, error: freshOutput.error }, - 'Container agent error on fresh session retry', - ); - return 'error'; - } - - logger.info( - { group: group.name, newSessionId: freshOutput.newSessionId }, - 'Fresh session retry succeeded', - ); - return 'success'; } logger.error(