fix(credentials): translate auth errors and require OneCLI for spawn

Two related fixes for the case where credentials aren't usable:

1. Replace Claude Code's "Not logged in / Invalid API key · Please run
   /login" output with a host-aware message. The user can't run /login
   from chat, so the raw text is unhelpful. Provider gains an optional
   isAuthRequired() classifier; the poll-loop substitutes the message
   on both result-text and error paths.

2. Treat OneCLI gateway failure as a transient hard error instead of
   spawning a credential-less container. The catch in container-runner
   now propagates; router and host-sweep wrap wakeContainer to log and
   leave the inbound row pending so the next 60s sweep tick retries.
   Router also stops the typing indicator on failure.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
gavrielc
2026-04-29 17:02:15 +03:00
parent f8c3d02348
commit 5f34e26240
6 changed files with 82 additions and 27 deletions
+33 -11
View File
@@ -21,6 +21,20 @@ function generateId(): string {
return `msg-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
}
const AUTH_REQUIRED_USER_TEXT =
"I can't reach my Anthropic credentials right now. The operator running NanoClaw needs to re-run setup, or run `claude` in the project directory on the machine I'm running on.";
function writeAuthRequiredMessage(routing: RoutingContext): void {
writeMessageOut({
id: generateId(),
kind: 'chat',
platform_id: routing.platformId,
channel_type: routing.channelType,
thread_id: routing.threadId,
content: JSON.stringify({ text: AUTH_REQUIRED_USER_TEXT }),
});
}
export interface PollLoopConfig {
provider: AgentProvider;
/**
@@ -171,7 +185,7 @@ export async function runPollLoop(config: PollLoopConfig): Promise<void> {
const skippedSet = new Set(skipped);
const processingIds = ids.filter((id) => !commandIds.includes(id) && !skippedSet.has(id));
try {
const result = await processQuery(query, routing, processingIds, config.providerName);
const result = await processQuery(query, routing, processingIds, config.provider, config.providerName);
if (result.continuation && result.continuation !== continuation) {
continuation = result.continuation;
setContinuation(config.providerName, continuation);
@@ -189,15 +203,18 @@ export async function runPollLoop(config: PollLoopConfig): Promise<void> {
clearContinuation(config.providerName);
}
// Write error response so the user knows something went wrong
writeMessageOut({
id: generateId(),
kind: 'chat',
platform_id: routing.platformId,
channel_type: routing.channelType,
thread_id: routing.threadId,
content: JSON.stringify({ text: `Error: ${errMsg}` }),
});
if (config.provider.isAuthRequired?.(errMsg)) {
writeAuthRequiredMessage(routing);
} else {
writeMessageOut({
id: generateId(),
kind: 'chat',
platform_id: routing.platformId,
channel_type: routing.channelType,
thread_id: routing.threadId,
content: JSON.stringify({ text: `Error: ${errMsg}` }),
});
}
}
// Ensure completed even if processQuery ended without a result event
@@ -249,6 +266,7 @@ async function processQuery(
query: AgentQuery,
routing: RoutingContext,
initialBatchIds: string[],
provider: AgentProvider,
providerName: string,
): Promise<QueryResult> {
let queryContinuation: string | undefined;
@@ -310,7 +328,11 @@ async function processQuery(
// at all — either way the turn is finished.
markCompleted(initialBatchIds);
if (event.text) {
dispatchResultText(event.text, routing);
if (provider.isAuthRequired?.(event.text)) {
writeAuthRequiredMessage(routing);
} else {
dispatchResultText(event.text, routing);
}
}
}
}
@@ -236,6 +236,14 @@ const CLAUDE_CODE_AUTO_COMPACT_WINDOW = '165000';
*/
const STALE_SESSION_RE = /no conversation found|ENOENT.*\.jsonl|session.*not found/i;
/**
* Auth-required detection. Matches Claude Code's output when no usable
* credential is available — "Not logged in · Please run /login" or
* "Invalid API key · Please run /login". The user can't run /login from
* chat, so the poll-loop substitutes a host-aware message.
*/
const AUTH_REQUIRED_RE = /(Not logged in|Invalid API key)[\s\S]*?Please run \/login/i;
export class ClaudeProvider implements AgentProvider {
readonly supportsNativeSlashCommands = true;
@@ -259,6 +267,10 @@ export class ClaudeProvider implements AgentProvider {
return STALE_SESSION_RE.test(msg);
}
isAuthRequired(text: string): boolean {
return AUTH_REQUIRED_RE.test(text);
}
query(input: QueryInput): AgentQuery {
const stream = new MessageStream();
stream.push(input.prompt);
@@ -14,6 +14,14 @@ export interface AgentProvider {
* (missing transcript, unknown session, etc.) and should be cleared.
*/
isSessionInvalid(err: unknown): boolean;
/**
* True if the given text/error indicates the underlying SDK or CLI has no
* usable Anthropic auth (e.g. Claude Code's "Not logged in · Please run
* /login"). The poll-loop swaps the raw output for a host-aware message
* since the user can't run /login from chat.
*/
isAuthRequired?(text: string): boolean;
}
/**
+11 -13
View File
@@ -435,20 +435,18 @@ async function buildContainerArgs(
}
// OneCLI gateway — injects HTTPS_PROXY + certs so container API calls
// are routed through the agent vault for credential injection.
try {
if (agentIdentifier) {
await onecli.ensureAgent({ name: agentGroup.name, identifier: agentIdentifier });
}
const onecliApplied = await onecli.applyContainerConfig(args, { addHostMapping: false, agent: agentIdentifier });
if (onecliApplied) {
log.info('OneCLI gateway applied', { containerName });
} else {
log.warn('OneCLI gateway not applied — container will have no credentials', { containerName });
}
} catch (err) {
log.warn('OneCLI gateway error — container will have no credentials', { containerName, err });
// are routed through the agent vault for credential injection. Treated as
// a transient hard failure: if we can't wire the gateway, we don't spawn.
// The caller (router or host-sweep) catches the throw, leaves the inbound
// message pending, and the next sweep tick retries.
if (agentIdentifier) {
await onecli.ensureAgent({ name: agentGroup.name, identifier: agentIdentifier });
}
const onecliApplied = await onecli.applyContainerConfig(args, { addHostMapping: false, agent: agentIdentifier });
if (!onecliApplied) {
throw new Error('OneCLI gateway not applied — refusing to spawn container without credentials');
}
log.info('OneCLI gateway applied', { containerName });
// Host gateway
args.push(...hostGatewayArgs());
+8 -1
View File
@@ -168,7 +168,14 @@ async function sweepSession(session: Session): Promise<void> {
const dueCount = countDueMessages(inDb);
if (dueCount > 0 && !isContainerRunning(session.id)) {
log.info('Waking container for due messages', { sessionId: session.id, count: dueCount });
await wakeContainer(session);
try {
await wakeContainer(session);
} catch (err) {
// Transient spawn failure (e.g. OneCLI gateway down). Leave messages
// pending so the next sweep tick retries; don't abort the rest of
// the sweep cycle for other sessions.
log.warn('wakeContainer failed — will retry on next sweep', { sessionId: session.id, err });
}
}
const alive = isContainerRunning(session.id);
+10 -2
View File
@@ -27,7 +27,7 @@ import {
getMessagingGroupWithAgentCount,
} from './db/messaging-groups.js';
import { findSessionForAgent } from './db/sessions.js';
import { startTypingRefresh } from './modules/typing/index.js';
import { startTypingRefresh, stopTypingRefresh } from './modules/typing/index.js';
import { log } from './log.js';
import { resolveSession, writeSessionMessage, writeOutboundDirect } from './session-manager.js';
import { wakeContainer } from './container-runner.js';
@@ -450,7 +450,15 @@ async function deliverToAgent(
startTypingRefresh(session.id, session.agent_group_id, event.channelType, event.platformId, event.threadId);
const freshSession = getSession(session.id);
if (freshSession) {
await wakeContainer(freshSession);
try {
await wakeContainer(freshSession);
} catch (err) {
// Transient spawn failure (e.g. OneCLI gateway down). The inbound
// row is already persisted — host-sweep will retry the wake on its
// next tick. Don't bubble out of the channel adapter.
log.warn('wakeContainer failed — host-sweep will retry', { sessionId: freshSession.id, err });
stopTypingRefresh(freshSession.id);
}
}
}
}