docs: update token count to 196k tokens · 98% of context window

chore: bump version to 2.1.17
Merge pull request #2759 from assapin/fix/budget-error-surfaced-to-user
2026-06-18 18:29:35 +08:00 · 2026-06-16 11:15:10 +00:00 · 2026-06-16 11:15:04 +00:00 · 2026-06-16 14:14:48 +03:00 · 2026-06-16 11:35:45 +03:00 · 2026-06-16 09:55:25 +03:00
7 changed files with 137 additions and 31 deletions
@@ -4,6 +4,7 @@ All notable changes to NanoClaw will be documented in this file.

 ## [Unreleased]

+- **Budget/billing-exhausted LLM turns now reach the user instead of being silently dropped.** When a turn ends in a non-retryable provider error (e.g. an Anthropic `403 billing_error`) with no `<message>` wrapping, the agent-runner delivers the provider's notice to the originating channel and stops re-nudging the failing gateway. `providers/claude.ts` now surfaces the SDK's `is_error` flag (and the error subtype's `errors[]` text); `poll-loop.ts` delivers that text and skips the re-wrap retry. Fixes the case where a spend-limit notice produced silence plus a turn-after-turn retry loop.
 - [BREAKING] **`@onecli-sh/sdk` 0.5.0 -> 2.2.1 — requires a OneCLI server with the `/v1` API** (older servers 404 every SDK call). The sanctioned gateway and CLI versions are pinned in `versions.json`. **The gateway is a separate component — updating NanoClaw does not upgrade it for you:** `/update-nanoclaw` upgrades it when the pin moves, otherwise upgrade manually. **Migration:** [docs/onecli-upgrades.md](docs/onecli-upgrades.md).
 - **New agent provider: Codex (OpenAI) — run `/add-codex`.** Full runtime via `codex app-server` (planning, MCP tools, server-side history, resume). Trunk ships the seams and the skill; the payload installs from the `providers` branch (the skill, the setup picker, or `--step provider-auth codex`). Auth is vault-only — no credential ever enters a container.
 - **Setup can now select, install, and authenticate a non-default agent provider.** A provider registry feeds the setup picker, an installer pulls the provider's payload from its branch, a vault auth walkthrough runs (`--step provider-auth`), and the picked provider is set on the first agent (a DB property) before its first spawn. Default (Claude) installs are unaffected — picking Claude changes nothing.
@@ -4,8 +4,9 @@ import { initTestSessionDb, closeSessionDb, getInboundDb, getOutboundDb } from '
 import { getPendingMessages, markCompleted } from './db/messages-in.js';
 import { getUndeliveredMessages } from './db/messages-out.js';
 import { formatMessages, extractRouting } from './formatter.js';
-import { isCorruptionError } from './poll-loop.js';
+import { isCorruptionError, processQuery } from './poll-loop.js';
 import { MockProvider } from './providers/mock.js';
+import type { AgentQuery, ProviderEvent } from './providers/types.js';

 beforeEach(() => {
  initTestSessionDb();
@@ -379,6 +380,64 @@ describe('end-to-end with mock provider', () => {
  });
 });

+/**
+ * Build a one-shot stub query that yields init + a single result event, then
+ * ends. `pushes` records any follow-ups the loop tried to inject (e.g. the
+ * re-wrap nudge), so a test can assert the loop did NOT re-hammer.
+ */
+function makeResultQuery(result: ProviderEvent): { query: AgentQuery; pushes: string[] } {
+  const pushes: string[] = [];
+  async function* events(): AsyncGenerator<ProviderEvent> {
+    yield { type: 'init', continuation: 'sess-1' };
+    yield result;
+  }
+  return {
+    pushes,
+    query: {
+      push: (m: string) => {
+        pushes.push(m);
+      },
+      end: () => {},
+      events: events(),
+      abort: () => {},
+    },
+  };
+}
+
+const ERR_ROUTING = {
+  platformId: 'chan-1',
+  channelType: 'discord',
+  threadId: null,
+  inReplyTo: 'm1',
+};
+
+describe('error result with no <message> envelope', () => {
+  it('delivers a budget/billing error to the triggering channel and does not nudge', async () => {
+    const budgetText = 'Spending limit reached. Add your own key at https://example.com/keys';
+    const { query, pushes } = makeResultQuery({ type: 'result', text: budgetText, isError: true });
+
+    await processQuery(query, ERR_ROUTING, ['m1'], 'claude', undefined, 'prompt', undefined);
+
+    const out = getUndeliveredMessages();
+    expect(out).toHaveLength(1);
+    expect(JSON.parse(out[0].content).text).toBe(budgetText);
+    expect(out[0].platform_id).toBe('chan-1');
+    expect(out[0].channel_type).toBe('discord');
+    // No re-wrap nudge — an error result must not re-hammer the gateway.
+    expect(pushes).toHaveLength(0);
+  });
+
+  it('still nudges (and does not deliver) a normal unwrapped result', async () => {
+    const { query, pushes } = makeResultQuery({ type: 'result', text: 'bare text, no envelope' });
+
+    await processQuery(query, ERR_ROUTING, ['m1'], 'claude', undefined, 'prompt', undefined);
+
+    expect(getUndeliveredMessages()).toHaveLength(0);
+    expect(pushes).toHaveLength(1);
+    expect(pushes[0]).toContain('was not delivered');
+  });
+});
+
 describe('isCorruptionError', () => {
  it('matches the Docker Desktop macOS torn-read symptom', () => {
    expect(isCorruptionError('database disk image is malformed')).toBe(true);
@@ -323,7 +323,7 @@ interface QueryResult {
  continuation?: string;
 }

-async function processQuery(
+export async function processQuery(
  query: AgentQuery,
  routing: RoutingContext,
  initialBatchIds: string[],
@@ -482,28 +482,43 @@ async function processQuery(
        // at all — either way the turn is finished.
        markCompleted(initialBatchIds);
        if (event.text) {
-          const { hasUnwrapped } = dispatchResultText(event.text, routing);
-          const willRetryWrapping = hasUnwrapped && !unwrappedNudged;
-          notifyExchangeComplete(onExchangeComplete, {
-            prompt: archivePrompts[0] ?? initialPrompt,
-            result: event.text,
-            continuation: queryContinuation ?? initialContinuation,
-            status: hasUnwrapped ? 'undelivered' : 'completed',
-          });
-          if (willRetryWrapping) {
-            unwrappedNudged = true;
-            const destinations = getAllDestinations();
-            const names = destinations.map((d) => d.name).join(', ');
-            query.push(
-              `<system>Your response was not delivered — it was not wrapped in <message to="name">...</message> blocks. ` +
-                `All output must be wrapped: use <message to="name"> for content to send, or <internal> for scratchpad. ` +
-                `Your destinations: ${names}. ` +
-                `Please re-send your response with the correct wrapping.</system>`,
-            );
+          const { sent, hasUnwrapped } = dispatchResultText(event.text, routing);
+          if (sent === 0 && event.isError === true) {
+            // Non-retryable error turn (e.g. a 403 billing_error) with no
+            // <message> envelope: deliver the notice instead of dropping it as
+            // scratchpad, and skip the re-wrap nudge — it would just re-hammer
+            // the failing gateway turn after turn.
+            deliverErrorResult(event.text, routing);
+            notifyExchangeComplete(onExchangeComplete, {
+              prompt: archivePrompts[0] ?? initialPrompt,
+              result: event.text,
+              continuation: queryContinuation ?? initialContinuation,
+              status: 'error',
+            });
+            archivePrompts.shift();
+          } else {
+            const willRetryWrapping = hasUnwrapped && !unwrappedNudged;
+            notifyExchangeComplete(onExchangeComplete, {
+              prompt: archivePrompts[0] ?? initialPrompt,
+              result: event.text,
+              continuation: queryContinuation ?? initialContinuation,
+              status: hasUnwrapped ? 'undelivered' : 'completed',
+            });
+            if (willRetryWrapping) {
+              unwrappedNudged = true;
+              const destinations = getAllDestinations();
+              const names = destinations.map((d) => d.name).join(', ');
+              query.push(
+                `<system>Your response was not delivered — it was not wrapped in <message to="name">...</message> blocks. ` +
+                  `All output must be wrapped: use <message to="name"> for content to send, or <internal> for scratchpad. ` +
+                  `Your destinations: ${names}. ` +
+                  `Please re-send your response with the correct wrapping.</system>`,
+              );
+            }
+            // The wrapping-retry result answers the SAME user prompt — keep it
+            // queued so the retry archives against it, not the nudge text.
+            if (!willRetryWrapping) archivePrompts.shift();
          }
-          // The wrapping-retry result answers the SAME user prompt — keep it
-          // queued so the retry archives against it, not the nudge text.
-          if (!willRetryWrapping) archivePrompts.shift();
        } else {
          archivePrompts.shift();
        }
@@ -557,6 +572,26 @@ function handleEvent(event: ProviderEvent, _routing: RoutingContext): void {
  }
 }

+/**
+ * Deliver a turn's text straight to the channel the batch arrived on. Used when
+ * a turn ends in a provider error (e.g. a non-retryable 403 billing_error) with
+ * no <message> envelope: the notice would otherwise be dropped as scratchpad.
+ * This is the same user-facing write the outer catch block does, minus the
+ * `Error:` prefix — the provider's text is already a user-facing message.
+ */
+function deliverErrorResult(text: string, routing: RoutingContext): void {
+  log('Error result with no <message> envelope — delivering to channel');
+  writeMessageOut({
+    id: generateId(),
+    in_reply_to: routing.inReplyTo,
+    kind: 'chat',
+    platform_id: routing.platformId,
+    channel_type: routing.channelType,
+    thread_id: routing.threadId,
+    content: JSON.stringify({ text }),
+  });
+}
+
 /**
 * Parse the agent's final text for <message to="name">...</message> blocks
 * and dispatch each one to its resolved destination. Text outside of blocks
@@ -440,8 +440,13 @@ export class ClaudeProvider implements AgentProvider {
        if (message.type === 'system' && message.subtype === 'init') {
          yield { type: 'init', continuation: message.session_id };
        } else if (message.type === 'result') {
-          const text = 'result' in message ? (message as { result?: string }).result ?? null : null;
-          yield { type: 'result', text };
+          // `result` text exists only on subtype:"success"; error subtypes
+          // (e.g. a non-retryable 403 billing_error) carry their message in
+          // `errors[]` instead. Surface either so the poll-loop can deliver a
+          // billing/quota notice to the user rather than dropping the turn.
+          const m = message as { result?: string; is_error?: boolean; errors?: string[] };
+          const text = m.result ?? (m.errors && m.errors.length > 0 ? m.errors.join('\n') : null);
+          yield { type: 'result', text, isError: m.is_error === true };
        } else if (message.type === 'system' && (message as { subtype?: string }).subtype === 'api_retry') {
          yield { type: 'error', message: 'API retry', retryable: true };
        } else if (message.type === 'system' && (message as { subtype?: string }).subtype === 'rate_limit_event') {
@@ -125,7 +125,13 @@ export interface AgentQuery {

 export type ProviderEvent =
  | { type: 'init'; continuation: string }
-  | { type: 'result'; text: string | null }
+  /**
+   * A completed turn. `isError` is set when the underlying SDK flagged the
+   * turn as an error (e.g. a non-retryable Anthropic 403 billing_error). The
+   * poll-loop uses it to surface the result text to the user instead of
+   * dropping it as un-wrapped scratchpad, and to skip the re-wrap nudge.
+   */
+  | { type: 'result'; text: string | null; isError?: boolean }
  | { type: 'error'; message: string; retryable: boolean; classification?: string }
  | { type: 'progress'; message: string }
  /**
@@ -1,6 +1,6 @@
 {
  "name": "nanoclaw",
-  "version": "2.1.16",
+  "version": "2.1.17",
  "description": "Personal Claude assistant. Lightweight, secure, customizable.",
  "type": "module",
  "packageManager": "pnpm@10.33.0",
@@ -1,5 +1,5 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="90" height="20" role="img" aria-label="195k tokens, 98% of context window">
-  <title>195k tokens, 98% of context window</title>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="90" height="20" role="img" aria-label="196k tokens, 98% of context window">
+  <title>196k tokens, 98% of context window</title>
  <linearGradient id="s" x2="0" y2="100%">
    <stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
    <stop offset="1" stop-opacity=".1"/>
@@ -15,8 +15,8 @@
      <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" font-size="11">
        <text aria-hidden="true" x="26" y="15" fill="#010101" fill-opacity=".3">tokens</text>
        <text x="26" y="14">tokens</text>
-        <text aria-hidden="true" x="71" y="15" fill="#010101" fill-opacity=".3">195k</text>
-        <text x="71" y="14">195k</text>
+        <text aria-hidden="true" x="71" y="15" fill="#010101" fill-opacity=".3">196k</text>
+        <text x="71" y="14">196k</text>
      </g>
    </g>
  </a>
Author	SHA1	Message	Date
github-actions[bot]	ee7f891698	docs: update token count to 196k tokens · 98% of context window	2026-06-16 11:15:10 +00:00
github-actions[bot]	7fde348e2b	chore: bump version to 2.1.17	2026-06-16 11:15:04 +00:00
Gabi Simons	122135e6dc	Merge pull request #2759 from assapin/fix/budget-error-surfaced-to-user fix(agent-runner): deliver budget/billing error turns instead of dropping them	2026-06-16 14:14:48 +03:00
Gabi Simons	8563fb0681	Merge remote-tracking branch 'origin/main' into fix/budget-error-surfaced-to-user # Conflicts: # CHANGELOG.md	2026-06-16 11:35:45 +03:00
omri-maya	0155ab1943	Merge pull request #2775 from nanocoai/docs/onecli-gateway-upgrade-notice docs(changelog): clarify the OneCLI gateway is a separate, operator-driven upgrade	2026-06-16 09:55:25 +03:00
Gabi Simons	59c4d33adc	Merge branch 'main' into fix/budget-error-surfaced-to-user	2026-06-15 17:42:01 +03:00
Gabi Simons	e03c5c194a	Merge branch 'main' into fix/budget-error-surfaced-to-user	2026-06-15 12:17:20 +03:00
assafpin	01433bae32	fix(agent-runner): deliver budget/billing error turns instead of dropping them A turn that ends in a non-retryable provider error (e.g. an Anthropic 403 billing_error) comes back from the streaming SDK as a result with is_error=true and no <message> envelope. dispatchResultText treated it as scratchpad and dropped it, then the poll-loop pushed a re-wrap nudge -> new turn -> same error, re-hammering the gateway until idle-kill. The user saw silence. - providers/claude.ts: surface is_error on the result event, and fall back to errors[] for the message text (error subtypes carry no result). - poll-loop.ts: when a result has no <message> blocks and is_error, deliver the notice verbatim to the originating channel and skip the nudge. Verified live (real agent image + SDK, 403 mock): the notice is delivered to the channel and the retry loop is gone. Refs #2751	2026-06-14 12:56:02 +03:00