mirror of
https://github.com/qwibitai/nanoclaw.git
synced 2026-06-12 18:11:51 +08:00
Compare commits
74 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2be6dbcd34 | |||
| 3650e6a1be | |||
| 540f06677e | |||
| 255360693c | |||
| 4c2418369e | |||
| 523ff2e7d5 | |||
| 046f754c7b | |||
| ae74cd8869 | |||
| 36cbf17e10 | |||
| 4459ab2e54 | |||
| 9e6238d28f | |||
| d1bda5d15b | |||
| 7eddc7d8c9 | |||
| 991ef986f8 | |||
| 0f2557e2bc | |||
| 4e6552ed55 | |||
| 978b998ee6 | |||
| 83951d7c01 | |||
| 76ef097521 | |||
| 1c85fd6e50 | |||
| 42275ede1f | |||
| 53e1989529 | |||
| 6f2142d7c7 | |||
| 79a0226962 | |||
| 0b31695e92 | |||
| 421f8707d2 | |||
| 67ccd9e74c | |||
| f69af07c57 | |||
| 93a302b5db | |||
| eef285ba3b | |||
| a806534199 | |||
| 0ac8073e34 | |||
| 539a2b3c63 | |||
| fccaadf24c | |||
| 3329270c67 | |||
| f16ea0c783 | |||
| 1c024bc976 | |||
| 6c26f3ef08 | |||
| ab6ab6936c | |||
| 501afb4beb | |||
| 9040dbb86e | |||
| d8748e3a45 | |||
| 41a720dd59 | |||
| 6ae83f48ac | |||
| dc34ceb83d | |||
| ad3dfad3f5 | |||
| 0bdc6d2bb2 | |||
| 820cd8ece6 | |||
| e44d497cdf | |||
| ac37ecbfd6 | |||
| c6627d32e2 | |||
| 51bf403b22 | |||
| 265953ffec | |||
| 6227bd1a5b | |||
| 28032bc0ec | |||
| 3e3a2945a5 | |||
| f3fc18e56e | |||
| d85efea229 | |||
| c5b22cb308 | |||
| 1592369201 | |||
| 6420c0e254 | |||
| aef8d38b36 | |||
| 6d6f813deb | |||
| f9c86d0af2 | |||
| 9edb33dd3a | |||
| 8ba5261ae8 | |||
| 8c84dec8e9 | |||
| 092487d7ad | |||
| 87850aa7f8 | |||
| 526170fd47 | |||
| 2d9375531b | |||
| e734e5cddd | |||
| 728c6a641b | |||
| 8385236c30 |
@@ -111,8 +111,8 @@ Run `/manage-channels` to wire the GitHub channel to an agent group, or insert m
|
||||
|
||||
```sql
|
||||
-- Create messaging group (one per repo)
|
||||
INSERT INTO messaging_groups (id, channel_type, platform_id, name, is_group, unknown_sender_policy, created_at)
|
||||
VALUES ('mg-github-myrepo', 'github', 'github:owner/repo', 'owner/repo', 1, '<policy>', datetime('now'));
|
||||
INSERT INTO messaging_groups (id, channel_type, platform_id, instance, name, is_group, unknown_sender_policy, created_at)
|
||||
VALUES ('mg-github-myrepo', 'github', 'github:owner/repo', 'github', 'owner/repo', 1, '<policy>', datetime('now'));
|
||||
|
||||
-- Wire to agent group
|
||||
INSERT INTO messaging_group_agents (id, messaging_group_id, agent_group_id, trigger_rules, response_scope, session_mode, priority, created_at)
|
||||
|
||||
@@ -119,8 +119,8 @@ Run `/manage-channels` to wire the Linear channel to an agent group, or insert m
|
||||
|
||||
```sql
|
||||
-- Create messaging group (one per team)
|
||||
INSERT INTO messaging_groups (id, channel_type, platform_id, name, is_group, unknown_sender_policy, created_at)
|
||||
VALUES ('mg-linear-eng', 'linear', 'linear:ENG', 'Engineering', 1, 'public', datetime('now'));
|
||||
INSERT INTO messaging_groups (id, channel_type, platform_id, instance, name, is_group, unknown_sender_policy, created_at)
|
||||
VALUES ('mg-linear-eng', 'linear', 'linear:ENG', 'linear', 'Engineering', 1, 'public', datetime('now'));
|
||||
|
||||
-- Wire to agent group
|
||||
INSERT INTO messaging_group_agents (id, messaging_group_id, agent_group_id, trigger_rules, response_scope, session_mode, priority, created_at)
|
||||
|
||||
@@ -28,6 +28,15 @@ Two phases: **Extract** (build the migration guide) and **Upgrade** (use it). If
|
||||
|
||||
---
|
||||
|
||||
# Phase 0: Refresh this skill first
|
||||
|
||||
The migration process itself evolves, so run its newest version before doing anything else:
|
||||
- Ensure the `upstream` remote exists (default `https://github.com/nanocoai/nanoclaw.git`) and fetch: `git fetch upstream --prune`. Detect the upstream branch (`main` or `master`).
|
||||
- Refresh this skill from upstream: `git checkout upstream/<branch> -- .claude/skills/migrate-nanoclaw/`
|
||||
- Re-read `.claude/skills/migrate-nanoclaw/SKILL.md`. If it changed, **follow the updated version from the top** instead of this one.
|
||||
|
||||
This is the only working-tree change expected before the preflight check below; changes limited to `.claude/skills/migrate-nanoclaw/` are this self-refresh — ignore them in the 1.0 clean-tree check and proceed.
|
||||
|
||||
# Phase 1: Extract
|
||||
|
||||
## 1.0 Preflight
|
||||
@@ -464,6 +473,11 @@ Point the branch at the upgraded state with `git reset --hard <upgrade-commit>`
|
||||
|
||||
Run `pnpm install && pnpm run build` in the main tree to confirm.
|
||||
|
||||
Stamp the upgrade marker (required — without it the startup tripwire stops the host on next start). Only do this after the build above succeeds:
|
||||
```bash
|
||||
pnpm exec tsx scripts/upgrade-state.ts set "" migrate-nanoclaw
|
||||
```
|
||||
|
||||
Restart the service. Service labels are per-install — derive them from `setup/lib/install-slug.sh`:
|
||||
```bash
|
||||
source setup/lib/install-slug.sh
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
# Remove the PR Factory (recipe)
|
||||
|
||||
## 0. Delete the recipe-owned guard tests FIRST
|
||||
|
||||
Before touching any component, delete the two composed-stack guard tests. They assert the *whole* stack is present and in sync — so as soon as the first component starts coming out, they go red and every later component's removal runs against a failing test tree:
|
||||
|
||||
```bash
|
||||
rm -f src/recipe-pr-factory-stack.test.ts src/skill-sync.test.ts
|
||||
```
|
||||
|
||||
(`sync-skill-files.sh` and `docs/pr-factory.md` are deleted in the final cleanup step below; only the two `.test.ts` guards must go up front.)
|
||||
|
||||
**During full-recipe removal, ignore the `## Validate` step at the end of each component's REMOVE.md** — those per-component builds run mid-teardown, while sibling components still reference seams the current component is removing, so they will be red and that is expected. Only the recipe-level validation at the very end of this file is binding.
|
||||
|
||||
## 1. Run the component REMOVE.mds in reverse apply order
|
||||
|
||||
Each component's REMOVE.md reverses everything that component installed (files, barrel lines, dependencies, env keys); follow them in this order, skipping components that were never applied — and skipping each one's trailing `## Validate` block per the note above:
|
||||
|
||||
1. `skills/slack-canvas/REMOVE.md`
|
||||
2. `skills/vm-test-orchestrator/REMOVE.md`
|
||||
3. `skills/gh-action-approval/REMOVE.md`
|
||||
4. `skills/pr-factory-core/REMOVE.md`
|
||||
5. `skills/slack-bots/REMOVE.md`
|
||||
6. `/add-slack`'s removal steps, only if the worker Slack channel itself is being removed.
|
||||
|
||||
## 2. Delete the remaining recipe-owned files
|
||||
|
||||
```bash
|
||||
rm -f docs/pr-factory.md
|
||||
```
|
||||
|
||||
`src/skill-sync.test.ts` was already deleted in step 0. The sync script behind it, `scripts/sync-skill-files.sh`, is shared manifest infrastructure: leave it in place if any other skill in the install uses a `files.txt` manifest; if this recipe was the only consumer, remove it too:
|
||||
|
||||
```bash
|
||||
rm -f scripts/sync-skill-files.sh
|
||||
```
|
||||
|
||||
Operator data is not deleted by any of the above — `data/gh-users.json`, `data/pr-activity/`, the repo mirror dir, `groups/pr-factory-worker/`, `groups/pr-factory-supervisor/`, and `groups/pr-tester/` are yours to keep or remove. The `pr_threads` table and the recorded component migrations stay in the central DB (migrations are forward-only); they are inert without the module.
|
||||
|
||||
Validate: `pnpm run build && pnpm test` — both green.
|
||||
@@ -0,0 +1,169 @@
|
||||
---
|
||||
name: pr-factory
|
||||
description: Recipe — compose the PR Factory (GitHub PR triage, review, and testing with human approval gates in Slack) from the component skills shipped inside this folder. Apply order, core-version probes, operator setup, and the composed-stack validation.
|
||||
---
|
||||
|
||||
# PR Factory (recipe)
|
||||
|
||||
The PR Factory turns incoming GitHub pull requests into Slack threads, each driven by a per-PR worker agent session that triages, reviews, and test-plans the change — with a human approving every consequential action (merges, test runs, skill edits) from an approval card in the thread. An optional supervisor bot takes feedback and improves the worker, and an optional tester bot executes approved test plans on ephemeral VMs. Everything runs inside one NanoClaw host: the webhook receiver, the thread lifecycle, the approval gates, and the VM control plane.
|
||||
|
||||
This is a **recipe**: a thin composition layer over the component skills shipped inside this folder. Each component is independently appliable and removable and carries its own SKILL.md, REMOVE.md, `files.txt` manifest, and generated `files/` mirror; the details (apply steps, credentials, guard tests, known smells) live there. Architecture: [docs/pr-factory.md](../../../../docs/pr-factory.md).
|
||||
|
||||
**Discovery note:** `recipes/` is not slash-discoverable — there is no `/pr-factory` command. A recipe applies by reading this file: point Claude at `.claude/skills/recipes/pr-factory/SKILL.md` (or run it from a `/setup`-style flow) and follow it top to bottom.
|
||||
|
||||
```
|
||||
.claude/skills/recipes/pr-factory/
|
||||
SKILL.md # this recipe
|
||||
REMOVE.md # recipe-level reversal (delegates to the components)
|
||||
files.txt + files/ # recipe-owned files: docs + composed-stack tests + sync infra
|
||||
skills/
|
||||
slack-bots/ # supervisor + tester Slack adapters (named channel instances)
|
||||
pr-factory-core/ # the engine: webhook, sessions, approvals, MCP tools, seams
|
||||
gh-action-approval/ # approval-gated gh execution (seam component)
|
||||
vm-test-orchestrator/ # ephemeral test-VM control plane (seam component)
|
||||
slack-canvas/ # markdown → Slack Canvas rendering (seam component)
|
||||
```
|
||||
|
||||
## Prerequisites — core version
|
||||
|
||||
Requires **nanoclaw ≥ 2.1.11**. The components make near-zero core edits because core already ships the hooks they register against. The probes are the real check — run each; on a failed probe, **stop** and update core first.
|
||||
|
||||
| Probe | Core capability |
|
||||
|---|---|
|
||||
| `test -f src/db/migrations/016-messaging-group-instance.ts && grep -q 'instance?: string' src/channels/adapter.ts && echo OK` | native channel-instance substrate |
|
||||
| `grep -q 'export function getDeliveryAction' src/delivery.ts && echo OK` | delivery-action read-side getter |
|
||||
| `grep -q 'byLine' src/channels/chat-sdk-bridge.ts && echo OK` | approval-card actor byline |
|
||||
| `grep -q 'justWoke' src/host-sweep.ts && echo OK` | host-sweep wake grace period |
|
||||
| `grep -q 'export function registerApprovalResolvedHandler' src/modules/approvals/primitive.ts && echo OK` | approval-resolved hook |
|
||||
| `awk '/export function writeOutboundDirect/{f=1} f&&/openOutboundDbRw/{print "OK"; exit} /^}/{if(f)f=0}' src/session-manager.ts` | writeOutboundDirect opens read-write |
|
||||
| `grep -q 'export function registerWebhookHandler' src/webhook-server.ts && echo OK` | raw webhook-route registry |
|
||||
|
||||
The component SKILL.mds re-probe the subset each one depends on; this table is the full set.
|
||||
|
||||
## Apply order
|
||||
|
||||
**Apply the recipe as a unit — all components, in this order.** Each component degrades gracefully on its own (a missing canvas provider falls back to `.md` uploads, a missing test orchestrator answers "not installed"), so they read as "optional" individually. But the recipe ships composed-stack guard tests (`recipe-pr-factory-stack.test.ts`, `skill-sync.test.ts`) that assume the whole stack is present — a partial apply leaves those red. So for a recipe install, apply every component below; treat "what you lose without component X" as a description of graceful degradation, not an invitation to skip it.
|
||||
|
||||
Order is load-bearing: `slack-bots` patches the adapter `/add-slack` installs, `pr-factory-core` imports `slack-bots`' instance constants, and the three seam components register on seams owned by `pr-factory-core`. Apply each component by following its own SKILL.md.
|
||||
|
||||
1. **`/add-slack`** (stock channel skill) — the worker bot. When `/add-slack` reaches its dependency-install step, **install `@chat-adapter/slack` exactly pinned at 4.26.0** — `pnpm install @chat-adapter/slack@4.26.0 --save-exact`. The exact pin is load-bearing: 4.27.0 pulls `chat@4.27.0` types that fail the build against core's `chat@^4.24.0` resolution, and a caret range re-resolves forward and breaks the build later. Verify the resolved version before continuing:
|
||||
|
||||
```bash
|
||||
node -p "require('@chat-adapter/slack/package.json').version" # must print 4.26.0
|
||||
grep '"@chat-adapter/slack"' package.json # must read "4.26.0" (no ^ or ~)
|
||||
```
|
||||
|
||||
If it shows anything other than `4.26.0`, re-run the pinned install above before moving on.
|
||||
2. **`skills/slack-bots`** — supervisor + tester Slack apps as named channel instances, sibling-echo suppression, the bot_id→instance legacy-upgrade migration.
|
||||
3. **`skills/pr-factory-core`** — the engine. Inert until `GITHUB_WEBHOOK_SECRET` is set.
|
||||
4. **`skills/gh-action-approval`** — credentialed `gh` execution. (Absent, `credentialed_gh` calls answer "component not installed".)
|
||||
5. **`skills/vm-test-orchestrator`** — the test-VM control plane. (Absent, approved test plans answer "no test orchestrator installed".)
|
||||
6. **`skills/slack-canvas`** — Canvas rendering. (Absent, plans and reviews post as plain text + `.md` uploads.)
|
||||
|
||||
Finally copy in the recipe-owned files (idempotent, like every apply step; run from the repo root, like every command in this bundle):
|
||||
|
||||
```bash
|
||||
RECIPE=.claude/skills/recipes/pr-factory
|
||||
cp $RECIPE/files/docs/pr-factory.md docs/pr-factory.md
|
||||
cp $RECIPE/files/src/recipe-pr-factory-stack.test.ts src/recipe-pr-factory-stack.test.ts
|
||||
cp $RECIPE/files/scripts/sync-skill-files.sh scripts/sync-skill-files.sh && chmod +x scripts/sync-skill-files.sh
|
||||
cp $RECIPE/files/src/skill-sync.test.ts src/skill-sync.test.ts
|
||||
```
|
||||
|
||||
`sync-skill-files.sh` + `skill-sync.test.ts` are the manifest/mirror infrastructure every component's `files/` folder is generated by; the stack test is described under Validate.
|
||||
|
||||
## Operator setup
|
||||
|
||||
Summary only — each item is detailed in the named component's SKILL.md:
|
||||
|
||||
- **Three Slack apps** in one workspace: worker (`/add-slack`), supervisor + tester (`skills/slack-bots` → Credentials). Webhook URLs `/webhook/slack`, `/webhook/slack-supervisor`, `/webhook/slack-tester`.
|
||||
- **GitHub webhook**: set `GITHUB_WEBHOOK_SECRET` in `.env` and add a Pull requests webhook on the repo pointing at `/webhook/github` (`skills/pr-factory-core` → Configuration).
|
||||
- **Channels + repo env**: `PR_FACTORY_SLACK_CHANNEL_ID`, optional `PR_FACTORY_SUPERVISOR_SLACK_CHANNEL_ID`, `PR_FACTORY_DEFAULT_REPO` (`skills/pr-factory-core`).
|
||||
- **Approver roles (required)**: core silently ignores approval-card clicks from users without a `user_roles` row. `pnpm run ncl roles grant --user 'slack:U0XXXXXXX' --role admin` for every human who will click cards (`skills/pr-factory-core` → "Grant approver roles").
|
||||
- **gh auth + approver mapping**: install `gh`, log in each approver's account, create `data/gh-users.json` from the shipped sample — keys are **namespaced** (`"slack:U0XXX": "gh-login"`) with no bare-id fallback (`skills/gh-action-approval`).
|
||||
- **VM pool knobs**: `PR_FACTORY_TEST_VM_TEMPLATE` (required for test runs), `PR_FACTORY_TEST_SSH_HOST`, `TEST_VM_SSH_USER`, `TEST_VM_NAME_PREFIX`, `TEST_VM_HOST_TEMPLATE` — defaults are exe.dev's conventions; any SSH-driven provider works (`skills/vm-test-orchestrator`). Tester needs the operator-created `pr-tester` agent group.
|
||||
|
||||
## Validate
|
||||
|
||||
```bash
|
||||
pnpm run build
|
||||
pnpm test
|
||||
pnpm exec tsc -p container/agent-runner/tsconfig.json --noEmit
|
||||
(cd container/agent-runner && bun test)
|
||||
```
|
||||
|
||||
All suites green. `src/recipe-pr-factory-stack.test.ts` is the composed-stack leg: it runs the full migration chain (instance substrate + the two component migrations) on a fresh DB, bootstraps the PR Factory entities on that composed schema, asserts the delivery file-transform slot has exactly one registrant across all modules, and runs `sync-skill-files.sh --all --check` so a drifted component mirror fails CI. Each component's own guard tests cover its integration points.
|
||||
|
||||
## What you get
|
||||
|
||||
- **Per-PR worker flow** — webhook → Slack thread (status reactions 🟢⚪🔴🟣) → per-thread agent session seeded with the diff → triage report → review → test plan. The default triage/review/test-plan workflow is seeded into `groups/pr-factory-worker/CLAUDE.local.md` (edit it to tune trusted contributors, merge policy, review depth) or replaced wholesale via `PR_FACTORY_REVIEW_SKILL` — see "Tailoring the bots" below.
|
||||
- **Approval cards for every consequential action** — send-to-testing, retry, skill edits, and every `gh` write (merge, close, comment), executed with the approving human's gh identity.
|
||||
- **Supervisor bot** — its own Slack identity; takes feedback in an admin channel or @-mentioned in PR threads, proposes worker skill/instruction edits behind a diff + approval card. Approved edits apply to the next PR the worker triages.
|
||||
- **VM test runs** — approved plans clone an ephemeral VM from a template, check out the PR, build, boot, and hand the VM to the tester agent; PASS wakes the worker to propose a merge, FAIL to analyze.
|
||||
- **Canvases** — test plans, results, and review writeups render as Slack Canvases instead of file uploads (paid Slack plan; falls back to `.md` uploads otherwise).
|
||||
|
||||
## Tailoring the bots — your own container skills
|
||||
|
||||
The shipped triage/review/test-plan workflow is deliberately generic. The PR Factory gets sharply better when the operator replaces it with skills written for **their** repo — its review dimensions, its triage rules, its test environments. The mechanism is all core's:
|
||||
|
||||
- **Container skills** live at `container/skills/<name>/SKILL.md` (read-only at `/app/skills` in every agent container). Each group's container config has a `skills` selection (default `'all'`) that controls which ones are symlinked into that group's `~/.claude/skills` at spawn — so a new skill folder reaches the worker on its next container start, no config change needed.
|
||||
- **Group-private skills**: a directory at `groups/<folder>/.claude/skills/<name>/` is discovered as a project-level skill (the agent's cwd is `/workspace/agent`, the group folder). Use this for a skill only one group should ever see — but note it sits outside the supervisor's edit loop below.
|
||||
- **Precedence**: with `PR_FACTORY_REVIEW_SKILL=<name>` set, every PR trigger opens with `Use the /<name> skill to triage this pull request.` and the generic defaults seeded into `groups/pr-factory-worker/CLAUDE.local.md` are ignored. Without it, the seeded instructions run — editing them in place is the lighter path when the defaults are close.
|
||||
- **Iteration loop**: `container/skills/` is what the supervisor bot edits — `propose_skill_edit` writes `container/skills/<skill>/<file>` behind a diff + approval card; the edit applies to the next PR the worker triages (running sessions keep their old read-only skill view until they next spawn). Routing repo-specific workflow into a container skill (rather than CLAUDE.local.md) is what makes the feedback loop reviewable.
|
||||
|
||||
### Interview the operator, then generate
|
||||
|
||||
Run this as a conversation — one cluster of questions per skill, then write the files:
|
||||
|
||||
1. **Review standards** — what does a good review catch in this repo? Which dimensions matter (correctness, security, migration safety, API stability, performance, docs)? Any house rules — error-handling patterns, layering, naming? What severity scale gates a merge? → the **review skill**: the entry point named in `PR_FACTORY_REVIEW_SKILL`, owning the full triage → review → test-plan pipeline (keep the shipped hard constraints: GitHub writes only via `credentialed_gh`, output to the PR thread, the `[PR_CONTEXT: …]` tag is authoritative).
|
||||
2. **Triage categories and routing** — what kinds of PRs arrive (features, fixes, dep bumps, docs, vendor syncs)? Which classes auto-merge, which auto-close, who are the trusted authors? What's the merge strategy? → the triage stage of that skill, or a separate **triage skill** it invokes.
|
||||
3. **Test environments and depth** — what exists (unit suites, integration rigs, a staging VM, devices)? What depth is conventional per change type, and what can't be tested automatically? → a **test-planning skill** that fixes plan depth and the plan-file format.
|
||||
|
||||
Then: write each as `container/skills/<name>/SKILL.md`, set `PR_FACTORY_REVIEW_SKILL=<review-skill-name>` in `.env`, restart the host, and point future tuning at the supervisor bot ("the worker keeps missing X — fix the skill") so every refinement lands as a diff behind an approval card.
|
||||
|
||||
### Worked example — review skill skeleton
|
||||
|
||||
`container/skills/acme-review/SKILL.md`:
|
||||
|
||||
```markdown
|
||||
---
|
||||
name: acme-review
|
||||
description: Triage and review a pull request against acme/widgets' standards. Used by the PR Factory worker for every incoming PR.
|
||||
---
|
||||
|
||||
# acme/widgets PR review
|
||||
|
||||
Triage first (per the categories below), then review the diff dimension by
|
||||
dimension. Verdict first, then findings — most severe first, each with file:line.
|
||||
|
||||
## Dimensions
|
||||
|
||||
1. **Migration safety** — anything under `migrations/` must be backward-compatible
|
||||
one release back; destructive ops (DROP/ALTER) without a two-step plan are Must-fix.
|
||||
2. **API stability** — exported types and HTTP routes are frozen; a breaking change
|
||||
needs a v2 route, not an edit.
|
||||
3. **Error handling** — no swallowed errors; failures propagate to the route-level
|
||||
handler. A bare `catch {}` is Must-fix.
|
||||
|
||||
## Severity
|
||||
|
||||
Must-fix (blocks merge) · Should-fix (request changes) · Nit (comment only).
|
||||
```
|
||||
|
||||
Then `PR_FACTORY_REVIEW_SKILL=acme-review` in `.env` and restart. Triage and test-planning skills follow the same shape.
|
||||
|
||||
## Upgrading a legacy bot_id install
|
||||
|
||||
For installs that ran an earlier PR Factory build on the old `bot_id` multi-bot substrate. Boot order matters — **never boot bare core on such a DB** (migration 016 crash-loops on the supervisor/tester rows):
|
||||
|
||||
1. Stop the host.
|
||||
2. Check out a tree with this recipe **fully applied** (all components).
|
||||
3. Boot. The two component migrations run first: `module-slack-bots-bot-id-to-instance` maps `bot_id` rows to instances and rewrites the Chat SDK state namespaces; `module-pr-factory-pr-threads-v2` drops the dead `bot_id` column from `pr_threads`.
|
||||
4. Verify the Slack webhook URLs — they are byte-identical (`/webhook/slack-supervisor`, `/webhook/slack-tester`), so the Slack app consoles need zero changes.
|
||||
5. Expect at most one re-@mention per subscribed thread (`chat_sdk_locks` is cleared; it is TTL-bound state).
|
||||
6. **Re-key `data/gh-users.json` to namespaced ids** (`"U0XXX"` → `"slack:U0XXX"`). An un-migrated mapping silently degrades every approver to the default gh credentials — there is no bare-id fallback.
|
||||
7. Operator data carries by hand: `data/gh-users.json`, the repo mirror dir, `groups/pr-tester/`, the OneCLI vault.
|
||||
|
||||
## Remove
|
||||
|
||||
[REMOVE.md](REMOVE.md) — runs the component REMOVE.mds in reverse apply order, then deletes the recipe-owned files.
|
||||
@@ -0,0 +1,8 @@
|
||||
# pr-factory recipe — files the recipe owns itself (components own theirs in
|
||||
# skills/<component>/files.txt): the architecture doc, the composed-stack
|
||||
# tests, and the manifest/mirror sync infrastructure every files/ folder in
|
||||
# this bundle is generated by.
|
||||
docs/pr-factory.md
|
||||
src/recipe-pr-factory-stack.test.ts
|
||||
scripts/sync-skill-files.sh
|
||||
src/skill-sync.test.ts
|
||||
@@ -0,0 +1,539 @@
|
||||
# PR Factory
|
||||
|
||||
The PR Factory automatically triages, reviews, and tests incoming GitHub pull requests, with humans approving every consequential action from Slack. It is composed by the recipe at [`.claude/skills/recipes/pr-factory/`](../.claude/skills/recipes/pr-factory/SKILL.md) from five component skills on top of the stock `/add-slack` channel: `slack-bots`, `pr-factory-core`, and the optional `gh-action-approval`, `vm-test-orchestrator`, and `slack-canvas`.
|
||||
|
||||
Everything runs inside this NanoClaw host: the GitHub webhook receiver, the Slack thread lifecycle, the approval gates, and the test orchestration. Test runs execute on ephemeral VMs cloned per PR over SSH; results come back through the tester agent's `submit_test_results` MCP tool — there is no secondary "orchestrator VM", no SCP inbox/outbox.
|
||||
|
||||
```
|
||||
GitHub webhook ──▶ NanoClaw host ──▶ Slack thread per PR (worker bot)
|
||||
│ │ humans approve cards
|
||||
│ ▼
|
||||
├─▶ worker agent container (triage → review → test plan)
|
||||
├─▶ supervisor agent container (feedback loop, own bot)
|
||||
└─▶ test orchestrator ──ssh──▶ ephemeral VM per PR
|
||||
▲ │
|
||||
└── tester agent container (runs plan, submits results)
|
||||
```
|
||||
|
||||
**One instance, one repository.** A factory instance serves a single repository — the one in `PR_FACTORY_DEFAULT_REPO`, the one its GitHub webhook is attached to. All run state (per-PR sessions, test VMs, the 30-minute timeouts) is keyed per-PR *within that repo*; PR numbers collide across repos, so one instance cannot safely fan out to several. Cover more repositories by running more instances, each with its own channels, bots, and `PR_FACTORY_DEFAULT_REPO`.
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Components and Seams](#components-and-seams)
|
||||
2. [Environment Variables](#environment-variables)
|
||||
3. [Module Initialization](#module-initialization)
|
||||
4. [Bootstrap: Agent Groups, Messaging Groups, and Wiring](#bootstrap)
|
||||
5. [Database: pr_threads Table](#database-pr_threads-table)
|
||||
6. [Webhook Receiver](#webhook-receiver)
|
||||
7. [PR Handler: From Webhook to Agent Session](#pr-handler)
|
||||
8. [The Worker's Review Workflow](#the-workers-review-workflow)
|
||||
9. [The Supervisor Agent](#the-supervisor-agent)
|
||||
10. [The Tester Agent and Test Orchestration](#test-orchestration)
|
||||
11. [MCP Tools (Container-Side)](#mcp-tools-container-side)
|
||||
12. [Delivery Actions and Approval Handlers (Host-Side)](#delivery-actions-and-approval-handlers-host-side)
|
||||
13. [Testing Approval Gate](#testing-approval-gate)
|
||||
14. [Skill Edit Approval Gate](#skill-edit-approval-gate)
|
||||
15. [GitHub CLI Approval Gate](#github-cli-approval-gate)
|
||||
16. [Slack: Three Bots, One Channel Type](#slack-three-bots-one-channel-type)
|
||||
17. [Slack Canvases](#slack-canvases)
|
||||
18. [Activity Log](#activity-log)
|
||||
19. [File Map](#file-map)
|
||||
20. [Manual Operations](#manual-operations)
|
||||
|
||||
---
|
||||
|
||||
## Components and Seams
|
||||
|
||||
`pr-factory-core` is the engine; the three seam components register against seams core owns, at import time, and core degrades gracefully when any is absent (so a partial install still runs — but the recipe is meant to be applied whole, since its composed-stack tests assume every component is present):
|
||||
|
||||
| Seam (core file) | Registered by | Without the component |
|
||||
|---|---|---|
|
||||
| `src/modules/pr-factory/gh-action.ts` (`setGhActionHandler`) | `gh-action-approval` | `credentialed_gh` calls notify the agent that the component is missing |
|
||||
| `src/modules/pr-factory/test-orchestration.ts` (`registerTestOrchestrator`) | `vm-test-orchestrator` | approved test plans answer "no test orchestrator installed" |
|
||||
| `src/modules/pr-factory/canvas.ts` (`registerCanvasProvider`) | `slack-canvas` | test plans/results post as plain text + `.md` file upload |
|
||||
|
||||
The `slack-bots` component owns the `SUPERVISOR_INSTANCE` / `TESTER_INSTANCE` constants (`'slack-supervisor'` / `'slack-tester'`); core imports both. Each seam holds a single provider — a declared smell in core's SKILL.md, acceptable while exactly one component implements each.
|
||||
|
||||
---
|
||||
|
||||
## Environment Variables
|
||||
|
||||
All read from `.env` (via `readEnvFile`) or `process.env`. The module is **inert** if `GITHUB_WEBHOOK_SECRET` is unset.
|
||||
|
||||
| Variable | Required | Purpose |
|
||||
|----------|----------|---------|
|
||||
| `GITHUB_WEBHOOK_SECRET` | Yes (to enable) | HMAC-SHA256 secret for GitHub webhook signature verification |
|
||||
| `PR_FACTORY_SLACK_CHANNEL_ID` | Yes | Bare Slack channel ID (e.g. `C0XXXXXXX`) where PR threads are created |
|
||||
| `SLACK_BOT_TOKEN` | Yes | Worker Slack app's bot token (the one installed by `/add-slack`) |
|
||||
| `PR_FACTORY_SUPERVISOR_SLACK_CHANNEL_ID` | No | Bare Slack channel ID for the supervisor's admin channel. Enables the supervisor agent group |
|
||||
| `SLACK_SUPERVISOR_BOT_TOKEN` / `SLACK_SUPERVISOR_SIGNING_SECRET` | No* | Supervisor Slack app credentials (`slack-bots` component) |
|
||||
| `SLACK_TESTER_BOT_TOKEN` / `SLACK_TESTER_SIGNING_SECRET` | No** | Tester Slack app credentials (`slack-bots` component) |
|
||||
| `PR_FACTORY_DEFAULT_REPO` | No | Repo assumed when an MCP action omits `repo`. No built-in default — set it (e.g. `acme/widgets`) or always pass `repo` explicitly |
|
||||
| `PR_FACTORY_REPO_MIRROR_DIR` | No | Local clone refreshed before each triage (default: `data/repo-mirror`; no-op when absent) |
|
||||
| `PR_FACTORY_REVIEW_SKILL` | No | Operator-supplied container skill that owns the review workflow (see [The Worker's Review Workflow](#the-workers-review-workflow)) |
|
||||
| `PR_FACTORY_GH_REPO_ALLOWLIST` | No | The repos the approved `gh` actions may touch — a comma-separated `owner/name` list; an approved `gh` command referencing a repo outside it is refused before execution (`gh-action-approval`). This is a write-target guard, **not** a multi-repo switch: a factory instance still serves the one repo in `PR_FACTORY_DEFAULT_REPO` |
|
||||
| `PR_FACTORY_TEST_VM_TEMPLATE` | For testing | Template VM cloned per test run. Test runs fail gracefully without it |
|
||||
| `PR_FACTORY_TEST_SSH_HOST` | No | VM control-plane SSH host (default: `exe.dev`) |
|
||||
| `PR_FACTORY_TEST_SSH_KEY` | No | SSH identity file for the control plane (default: ssh's own identities) |
|
||||
| `TEST_VM_SSH_USER` | No | Login user on cloned VMs (default: `exedev`) |
|
||||
| `TEST_VM_NAME_PREFIX` | No | VM name = `<prefix><pr-number>` (default: `nctest-`) |
|
||||
| `TEST_VM_HOST_TEMPLATE` | No | Per-VM hostname; `{name}` expands to the VM name (default: `{name}.exe.xyz`) |
|
||||
| `WEBHOOK_PORT` | No | Port for the shared webhook server (default: `3000`) |
|
||||
|
||||
\* Required when `PR_FACTORY_SUPERVISOR_SLACK_CHANNEL_ID` is set.
|
||||
\*\* Required when the tester agent group (`pr-tester`) is in use.
|
||||
|
||||
The `TEST_VM_*` / `PR_FACTORY_TEST_*` defaults are exe.dev's conventions; any provider whose control plane speaks `cp <template> <name>` / `tag <name> ephemeral` / `rm <name>` over SSH and gives each VM a DNS-resolvable hostname works by overriding the knobs.
|
||||
|
||||
If `NANOCLAW_EGRESS_LOCKDOWN` is enabled (default off), worker containers cannot reach GitHub and tester containers cannot SSH to test VMs — leave it off for PR Factory groups or allowlist those hosts.
|
||||
|
||||
---
|
||||
|
||||
## Module Initialization
|
||||
|
||||
**File:** `src/modules/pr-factory/index.ts`
|
||||
|
||||
The module self-registers when imported by `src/modules/index.ts`. Even in inert mode (no `GITHUB_WEBHOOK_SECRET`), two things still bind at import time because they live at module top level:
|
||||
|
||||
- the **approval handlers** (`pr_send_to_testing`, `pr_retry_test`, `pr_propose_skill_edit` in core; `pr_gh` in `gh-action-approval`) — registered by their own files on import;
|
||||
- the **approval-resolved hook** — `registerApprovalResolvedHandler` clears the 👀 awaiting-approval reaction when an admin *rejects* an approval card on a PR-thread session (approve paths clear it inside each handler).
|
||||
|
||||
With the env trio present (`GITHUB_WEBHOOK_SECRET`, `PR_FACTORY_SLACK_CHANNEL_ID`, `SLACK_BOT_TOKEN`), the gated block runs:
|
||||
|
||||
### Phase 1: registration (immediate)
|
||||
|
||||
Six `registerDeliveryAction` calls (see [the table](#delivery-actions-and-approval-handlers-host-side)). The container tools omit `repo` when the agent doesn't pass one; the default is applied **here, host-side**, from `PR_FACTORY_DEFAULT_REPO` — the container never sees that env var.
|
||||
|
||||
### Phase 2: adapter-ready callback (deferred)
|
||||
|
||||
Once the Slack delivery adapter is connected (`onDeliveryAdapterReady`), the module:
|
||||
|
||||
1. **Bootstraps** agent groups, messaging groups, and wirings (`bootstrapPrFactory`).
|
||||
2. **Registers** the GitHub webhook handler at `/webhook/github`.
|
||||
3. **Initializes the orchestrator pair** (`initOrchestrator`) — only when the `vm-test-orchestrator` component is installed *and* the operator-created `pr-tester` agent group and its messaging group exist; otherwise logs "Test orchestrator disabled" with which pieces are missing.
|
||||
4. **Registers** a shutdown handler (`shutdownOrchestrator` + the orchestrator module's `shutdown`, which destroys all live test VMs).
|
||||
|
||||
### Guard chain
|
||||
|
||||
```
|
||||
GITHUB_WEBHOOK_SECRET missing? → debug log, module disabled
|
||||
PR_FACTORY_SLACK_CHANNEL_ID missing? → warn, disabled
|
||||
SLACK_BOT_TOKEN missing? → warn, disabled
|
||||
All present? → enabled
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Bootstrap
|
||||
|
||||
**File:** `src/modules/pr-factory/bootstrap.ts`
|
||||
|
||||
Runs on every boot (from the adapter-ready callback). All operations are idempotent and self-correcting: existing wirings with drifted `engage_mode` / `session_mode` values are updated, not skipped. Messaging groups are keyed by `(channel_type, platform_id, instance)` on the channel-instance substrate (core migration 016) and resolved with exact-instance lookups — the worker, supervisor, and tester rows share one Slack channel without shadowing each other.
|
||||
|
||||
### Worker
|
||||
|
||||
1. Agent group `pr-factory-worker`, created via `initGroupFilesystem(ag, { instructions })` — the default triage/review/test-plan workflow is seeded into `groups/pr-factory-worker/CLAUDE.local.md` once, never overwritten.
|
||||
2. Messaging group `slack:<WORKER_CHANNEL_ID>` on the **default instance** (`'slack'`), `unknown_sender_policy: 'public'`.
|
||||
3. Wiring: `engage_mode: 'mention-sticky'`, `session_mode: 'per-thread'`, `sender_scope: 'all'`, `ignored_message_policy: 'drop'`. The PR handler pre-subscribes each new PR thread, so in-thread replies engage the worker without an @-mention.
|
||||
4. **Foreign-wiring cleanup** — any other agent group wired to the worker's row is dropped (prevents legacy groups double-engaging).
|
||||
|
||||
### Supervisor (when `PR_FACTORY_SUPERVISOR_SLACK_CHANNEL_ID` is set)
|
||||
|
||||
1. Agent group `pr-factory-supervisor`, seeded with `SUPERVISOR_INSTRUCTIONS`.
|
||||
2. Admin-channel messaging group (instance `'slack-supervisor'`): `engage_mode: 'pattern'` (`.`), `session_mode: 'shared'`.
|
||||
3. PR-channel messaging group (instance `'slack-supervisor'`): `engage_mode: 'mention'`, `ignored_message_policy: 'accumulate'`, `session_mode: 'per-thread'`.
|
||||
|
||||
### Tester (when the operator has created the `pr-tester` agent group)
|
||||
|
||||
The tester agent group's instructions describe the operator's test environment, so the group itself is **operator-created** (folder `pr-tester`). When it exists, bootstrap ensures the tester's PR-channel messaging group (instance `'slack-tester'`) with `engage_mode: 'mention'`, `ignored_message_policy: 'accumulate'`, `session_mode: 'per-thread'`. The orchestrator resolves tester sessions against this row.
|
||||
|
||||
---
|
||||
|
||||
## Database: pr_threads Table
|
||||
|
||||
**Migration:** `src/db/migrations/module-pr-factory-pr-threads-v2.ts` (name `module-pr-factory-pr-threads-v2`)
|
||||
**CRUD:** `src/db/pr-threads.ts`
|
||||
|
||||
Central index mapping PR threads to sessions, in the central DB (`data/v2.db`). The delivering bot is resolved per messaging group via `messaging_groups.instance`, so the table carries no bot identity column. (The `-v2` migration name is deliberate: the runner dedupes by name, and installs upgraded from the legacy bot_id substrate have the v1 name recorded — the new name is what makes the column-drop arm run there.)
|
||||
|
||||
```sql
|
||||
CREATE TABLE pr_threads (
|
||||
channel_id TEXT NOT NULL, -- e.g. "slack:C0XXXXXXX"
|
||||
thread_ts TEXT NOT NULL, -- bare Slack thread timestamp
|
||||
channel_type TEXT NOT NULL, -- "slack"
|
||||
repo_full_name TEXT NOT NULL, -- e.g. "acme/widgets"
|
||||
pr_number INTEGER NOT NULL,
|
||||
session_id TEXT NOT NULL,
|
||||
created_at TEXT NOT NULL,
|
||||
PRIMARY KEY (channel_id, thread_ts)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_pr_threads_repo_pr ON pr_threads (repo_full_name, pr_number);
|
||||
CREATE INDEX idx_pr_threads_session ON pr_threads (session_id);
|
||||
```
|
||||
|
||||
| Function | Key | Used by |
|
||||
|----------|-----|---------|
|
||||
| `getPrThread(channelId, threadTs)` | PK | CRUD primitive (covered by `pr-threads.test.ts`) |
|
||||
| `getPrThreadByRepoPr(repo, prNumber)` | `idx_pr_threads_repo_pr` | handler (dedup, close, synchronize), orchestrator |
|
||||
| `getPrThreadBySession(sessionId)` | `idx_pr_threads_session` | testing-approval, gh-action-approval, reactions |
|
||||
| `updatePrThreadSession(channelId, threadTs, sessionId)` | PK | handler (synchronize repoints the row) |
|
||||
| `deletePrThread(channelId, threadTs)` | PK | CRUD primitive (covered by `pr-threads.test.ts`) |
|
||||
|
||||
---
|
||||
|
||||
## Webhook Receiver
|
||||
|
||||
**File:** `src/modules/pr-factory/webhook.ts`
|
||||
|
||||
Registers a **raw HTTP handler** at `/webhook/github` on the shared webhook server via core's `registerWebhookHandler` (`src/webhook-server.ts`) — the same server that serves the Chat SDK adapter routes.
|
||||
|
||||
1. **Method check** — only POST (405 otherwise).
|
||||
2. **HMAC-SHA256 verification** — `x-hub-signature-256` vs `sha256=HMAC(secret, body)` with `crypto.timingSafeEqual`; 401 on failure.
|
||||
3. **Immediate 200** — before processing, so GitHub doesn't time out.
|
||||
4. **Event filter** — only `x-github-event: pull_request`.
|
||||
5. **Action filter** — `opened`, `synchronize`, `closed`, `ready_for_review`, `converted_to_draft`.
|
||||
6. **Parse and dispatch** a `PREvent` to the callback.
|
||||
|
||||
---
|
||||
|
||||
## PR Handler
|
||||
|
||||
**File:** `src/modules/pr-factory/handler.ts`
|
||||
|
||||
`handlePullRequest()` dispatches per action:
|
||||
|
||||
- **`closed`** — swap the thread reaction to 🟣 (merged) or 🔴 (closed), destroy the PR's test VM if one exists. No new session.
|
||||
- **`converted_to_draft`** — swap to ⚪ (draft).
|
||||
- **`ready_for_review`** — swap ⚪→🟢 and re-triage in the same thread (or fall through to the opened flow if no thread exists yet).
|
||||
- **`synchronize`** (new commits) — skipped for drafts; otherwise kill the old container, delete the old session, resolve a fresh session **in the same Slack thread**, repoint `pr_threads` (`updatePrThreadSession`), re-fetch the diff, write a new trigger.
|
||||
- **`opened`** (and unseen PRs arriving via other actions):
|
||||
1. Dedup: an existing `pr_threads` row for (repo, pr#) means a redelivered webhook — no-op.
|
||||
2. Fetch PR stats + touched areas (GitHub API) and post the **thread opener** to the PR channel via `chat.postMessage` with the worker bot token; the response `ts` becomes the thread.
|
||||
3. React 🟢 (or ⚪ for drafts).
|
||||
4. `resolveSession(workerAgentGroupId, workerMessagingGroupId, 'slack:<channel>:<ts>', 'per-thread')`.
|
||||
5. `createPrThread(...)` recording (channel, thread) ↔ (repo, pr#, session).
|
||||
6. **Drafts stop here** — triage is deferred to `ready_for_review`.
|
||||
7. Pre-subscribe the worker to the thread (mention-sticky wiring).
|
||||
8. Refresh the repo mirror (`PR_FACTORY_REPO_MIRROR_DIR`) — best-effort `git fetch` + `reset --hard origin/main`.
|
||||
9. Fetch the diff (truncated at 50k chars), write the trigger message (with the triage directive — see the next section) to the session's inbound DB, wake the container.
|
||||
|
||||
Per-PR worker sessions have no dedicated fast teardown: idle workers are reaped by host-sweep's 30-minute staleness ceiling.
|
||||
|
||||
### The PR_CONTEXT contract
|
||||
|
||||
Every trigger message ends with:
|
||||
|
||||
```
|
||||
[PR_CONTEXT: channel=slack:CXXXX thread=1700000000.000100 repo=org/name pr=42]
|
||||
```
|
||||
|
||||
The worker's instructions (and any operator review skill) parse this tag for repo/PR identifiers and test-plan file naming; it is a cross-process contract — change it nowhere or everywhere.
|
||||
|
||||
### GitHub API access through OneCLI
|
||||
|
||||
Diff/stats fetches route through the OneCLI gateway's HTTP forward proxy so the gateway injects a vault-stored GitHub PAT:
|
||||
|
||||
1. undici's own `fetch` + `ProxyAgent` (Node's built-in fetch rejects external dispatchers — the reason for the pinned `undici` dependency);
|
||||
2. the OneCLI agent access token embedded as proxy Basic auth;
|
||||
3. the gateway CA (`~/.onecli/gateway-ca.pem`) trusted by the ProxyAgent.
|
||||
|
||||
If any piece is unavailable the module falls back to direct unauthenticated GitHub calls (60 req/h instead of 5000).
|
||||
|
||||
### Security model: the GitHub token must be read-only
|
||||
|
||||
The PAT in the vault — the one injected into worker/tester containers and used for these host-side fetches — **must be fine-grained and read-only** (`Contents: read`, `Pull requests: read`, `Metadata: read`; no write, no merge, no admin), scoped to the single repository the factory serves. This is the boundary that makes the autonomous parts of the pipeline safe:
|
||||
|
||||
- The worker reads diffs and runs read-only `gh` lookups with this token. It cannot comment, label, approve, close, or merge with it — those calls 403 at GitHub.
|
||||
- **All writes go through `credentialed_gh` only**, behind a human approval card, executed under the *approving human's* gh credentials (the `gh-action-approval` component), not the injected token.
|
||||
|
||||
So even if a malicious PR diff or a confused worker tries to act on GitHub directly, the worst it can do is read. Provisioning a write-capable token defeats the entire approval-gate design — it would let an agent merge to `main` without a human ever clicking a card. Treat "the injected GitHub token is read-only" as a hard invariant, not a recommendation.
|
||||
|
||||
---
|
||||
|
||||
## The Worker's Review Workflow
|
||||
|
||||
The worker runs inside a container as a standard NanoClaw agent. Its triage/review/test-plan behavior is **group instructions, not shipped container skills** — two override levels:
|
||||
|
||||
1. **Default (shipped):** `src/modules/pr-factory/worker-instructions.ts` is seeded into `groups/pr-factory-worker/CLAUDE.local.md` on first bootstrap and never overwritten. It carries a three-stage triage workflow (high-level read → author assessment → categorize and decide CLOSE / MERGE / REVIEW), a review stage, and a test-plan stage, plus the hard constraints (never act on GitHub directly — every write goes through `credentialed_gh`; all output to the PR thread). **Edit that file** to tune trusted contributors, merge policy, and review depth for your repo.
|
||||
2. **Operator skill:** write the skill to `container/skills/<skill-name>/` and set `PR_FACTORY_REVIEW_SKILL=<skill-name>`. Every PR trigger then opens with `Use the /<skill-name> skill …` and the seeded defaults are ignored. This is the path for operators who maintain their own tuned review pipeline as a container skill. With the worker group's default `skills: 'all'` selection the new folder reaches the worker on its next container start — no container-config change is needed unless the group uses an explicit `skills` allowlist.
|
||||
|
||||
Test plan files are written to `/workspace/agent/test-plans/` (host path: `groups/pr-factory-worker/test-plans/`) with the `.md.pending` suffix; the host's testing gate looks them up by PR number.
|
||||
|
||||
---
|
||||
|
||||
## The Supervisor Agent
|
||||
|
||||
**File:** `src/modules/pr-factory/supervisor.ts` (instructions only; wiring lives in bootstrap)
|
||||
|
||||
A separate agent group (`pr-factory-supervisor`) speaking as its own Slack bot. It improves the worker based on human feedback:
|
||||
|
||||
- **Admin channel** — shared session, engages on every message.
|
||||
- **PR threads** — engages when @-mentioned, sees accumulated thread history.
|
||||
|
||||
Its MCP tool: `propose_skill_edit` (see below). A proposed edit is posted as a diff for human approval and, on accept, written under `container/skills/`. Skill edits apply to the **next** PR each affected worker session triages — running sessions keep their old read-only skill view until they next spawn; there is no force-rerun of an in-flight session.
|
||||
|
||||
---
|
||||
|
||||
## Test Orchestration
|
||||
|
||||
Two modules split the responsibility across a seam; they communicate only via callbacks wired in `initOrchestrator`.
|
||||
|
||||
### test-orchestrator.ts — VM lifecycle (everything SSH; `vm-test-orchestrator` component)
|
||||
|
||||
- Implements the `TestOrchestratorModule` contract and registers itself on core's seam at import time.
|
||||
- Sequential queue: one test run at a time (`submitTest` enqueues).
|
||||
- Per run: `ssh <control-plane> cp <template> <prefix><pr>` → wait for SSH → check out the PR branch in `~/nanoclaw` on the VM → `pnpm run build` + restart the systemd unit → wait for stable `active` → `onVmReady`.
|
||||
- VM naming: `<TEST_VM_NAME_PREFIX><pr>` reachable at `TEST_VM_HOST_TEMPLATE` with `{name}` expanded; login as `TEST_VM_SSH_USER`. Pool capped at 20 (oldest destroyed first). VMs are tagged `ephemeral`.
|
||||
- On any setup failure: VM destroyed, `onRunFailed`.
|
||||
- `destroyVm` on PR close/merge; `shutdown` destroys everything.
|
||||
|
||||
The template VM is operator-prepared: project at `~/nanoclaw` with an `origin` that serves `pull/<n>/head` refs, buildable with `pnpm run build`, running as a systemd user service whose unit name contains `nanoclaw`, and the host's control-plane key authorized on cloned VMs.
|
||||
|
||||
### orchestrator.ts — NanoClaw coordination (never SSHes; core)
|
||||
|
||||
- `onVmReady` → resolves a **tester session** in the PR's thread (tester agent group + tester-instance messaging group), writes the plan + VM host as a trigger, wakes the tester, arms a **30-minute timeout** (timeout → `cancelRun` destroys the VM and posts to the thread).
|
||||
- `handleTestResults` (the `pr_submit_test_results` delivery action) → cancels the timeout, `completeRun` (VM stays alive for investigation), posts a results summary into the worker session's `outbound.db` via `writeOutboundDirect` (as a Slack Canvas link when canvas creation succeeds), then wakes the **worker**:
|
||||
- `PASS` → prompt to propose merge via `credentialed_gh`;
|
||||
- `FAIL` / `PARTIAL` → prompt to analyze whether failures are PR-related.
|
||||
- `onRunFailed` → posts the failure into the thread and offers a **Retry Test** approval card (`pr_retry_test`).
|
||||
|
||||
---
|
||||
|
||||
## MCP Tools (Container-Side)
|
||||
|
||||
**File:** `container/agent-runner/src/mcp-tools/pr-factory.ts`
|
||||
|
||||
Four tools, registered in every container via the mcp-tools barrel. Each writes a `kind: 'system'` row to `messages_out`; the host's delivery loop dispatches the `action` string to the matching registered handler. In a non-PR-factory install the actions are unregistered and dropped with "Unknown system action".
|
||||
|
||||
| Tool | Action emitted | Args |
|
||||
|------|----------------|------|
|
||||
| `propose_skill_edit` | `pr_propose_skill_edit` | `{ skill_name, file_name, content }` |
|
||||
| `send_to_testing` | `pr_send_to_testing` | `{}` (plan located via session → pr_threads → file naming) |
|
||||
| `credentialed_gh` | `pr_gh` | `{ command?, commands?, description }` — `command` is normalized into `commands` |
|
||||
| `submit_test_results` | `pr_submit_test_results` | `{ pr_number, repo?, verdict: PASS\|PARTIAL\|FAIL, content }` |
|
||||
|
||||
When the agent omits `repo`, the field is omitted from the payload too — the host applies `PR_FACTORY_DEFAULT_REPO`. The container never bakes in a repo default.
|
||||
|
||||
---
|
||||
|
||||
## Delivery Actions and Approval Handlers (Host-Side)
|
||||
|
||||
**Registered in:** `src/modules/pr-factory/index.ts` (delivery actions) and the individual gate files (approval handlers).
|
||||
|
||||
| Delivery action | Handler | Effect |
|
||||
|--------|-------------|--------|
|
||||
| `pr_send_to_testing` | `testing-approval.handleSendToTesting` | Post plan canvas + approval card |
|
||||
| `pr_propose_skill_edit` | `skill-edit-approval.handleProposeSkillEdit` | Post diff + approval card |
|
||||
| `pr_gh` | gh-action seam → `gh-action-approval.handleGh` | Post command preview + approval card |
|
||||
| `pr_submit_test_results` | `orchestrator.handleTestResults` | Post results, wake worker per verdict |
|
||||
|
||||
| Approval action | Fires on Accept |
|
||||
|-----------------|------------------|
|
||||
| `pr_send_to_testing` | Read plan file, `submitTest` to the queue, delete the file |
|
||||
| `pr_retry_test` | Re-submit the same plan to the queue |
|
||||
| `pr_propose_skill_edit` | Re-validate path, write the file under `container/skills/` |
|
||||
| `pr_gh` | Execute the gh command(s) sequentially |
|
||||
|
||||
Rejecting any card resolves through core's response handler; pr-factory's approval-resolved hook then clears the thread's 👀 reaction.
|
||||
|
||||
**Approver roles are required.** Core's `isAuthorizedApprovalClick` silently ignores card clicks from users without a `user_roles` row — the symptom is a card that does nothing, with only a host-log warning. Grant every approver a role: `pnpm run ncl roles grant --user 'slack:U0XXXXXXX' --role admin`.
|
||||
|
||||
---
|
||||
|
||||
## Testing Approval Gate
|
||||
|
||||
**File:** `src/modules/pr-factory/testing-approval.ts`
|
||||
|
||||
1. **Worker** writes the plan to `/workspace/agent/test-plans/pr-{N}-thread-{tsSafe}.md.pending` (`tsSafe` = thread ts with `.` → `-`) and calls `send_to_testing`.
|
||||
2. **Host** locates the file by PR number (via the session's `pr_threads` row), renders the plan as a **Slack Canvas** in the PR thread (file-upload fallback when canvas creation fails or the `slack-canvas` component is absent), dismisses any stale approval cards for the session, and posts a **Send to Testing / Reject** card. The thread gets the 👀 reaction.
|
||||
3. **Accept** → plan content is read and `submitTest({ prNumber, repo, planContent })` enqueues the run; the `.md.pending` file is deleted.
|
||||
4. **Reject** → the card resolves and the 👀 clears; the plan file stays in place until the worker produces a new one.
|
||||
|
||||
On a VM setup failure, `postRetryCard` offers **Retry Test / Dismiss** with the same plan content (`pr_retry_test`).
|
||||
|
||||
---
|
||||
|
||||
## Skill Edit Approval Gate
|
||||
|
||||
**File:** `src/modules/pr-factory/skill-edit-approval.ts`
|
||||
|
||||
1. **Supervisor** reads the current skill from `/app/skills/` (read-only mount) and calls `propose_skill_edit` with the full new content.
|
||||
2. **Host** validates the path (must resolve inside `container/skills/`), computes a unified diff (`diff -u`; full content for new files), posts it as a `.diff` file in the supervisor's thread, then posts an **Apply Edit / Reject** card.
|
||||
3. **Accept** → path re-validated, directories created as needed, file written. Running containers keep their old read-only view; the change applies on the next container spawn.
|
||||
|
||||
Path traversal is rejected at both proposal and approval time.
|
||||
|
||||
---
|
||||
|
||||
## GitHub CLI Approval Gate
|
||||
|
||||
**File:** `src/modules/pr-factory/gh-action-approval.ts` (`gh-action-approval` component)
|
||||
|
||||
1. Agent calls `credentialed_gh` with command(s) starting with `gh ` and a description.
|
||||
2. Host posts the description in the thread, then an approval card showing the exact command block.
|
||||
3. **Accept** → commands run sequentially via `execFile('gh', args)` (quote-aware tokenization, leading `gh ` stripped), stopping at the first failure. When `PR_FACTORY_GH_REPO_ALLOWLIST` is set, commands explicitly referencing a repo outside the list are refused before execution. Merge failures get guidance appended (branch protection vs command error). Results are sent back to the agent via `ctx.notify`.
|
||||
|
||||
### Approver credential mapping
|
||||
|
||||
`data/gh-users.json` (operator-created, never in the repo; sample at `src/modules/pr-factory/gh-users.sample.json`) maps **namespaced** approver ids to gh CLI account logins:
|
||||
|
||||
```json
|
||||
{ "slack:U0XXXXXXX": "their-gh-login" }
|
||||
```
|
||||
|
||||
Keys are exactly the namespaced user ids core's approval flow reports (`<channel>:<handle>`); there is no bare-id fallback. When the approver maps to an account, the host reads that account's `oauth_token` from `~/.config/gh/hosts.yml` and passes it as `GH_TOKEN` to the subprocess, so the action is attributed to the human who approved it. The read is lazy and fail-soft: a missing/malformed mapping file just means default `gh` credentials (logged as "No gh account mapping for approver").
|
||||
|
||||
> **Known smell:** threading tokens out of `gh`'s hosts.yml into a subprocess environment bypasses the OneCLI gateway (skill-guidelines anti-pattern #5). The redesign direction is to route `gh` through the gateway's forward proxy with per-approver vault credentials. Carried with explicit sign-off; declared in the `gh-action-approval` SKILL.md.
|
||||
|
||||
---
|
||||
|
||||
## Slack: Three Bots, One Channel Type
|
||||
|
||||
Three Slack apps in one workspace, on core's native channel-instance substrate (core migration 016; `slack-bots` component). One instance value per adapter drives the registry key, the webhook route, the Chat SDK state namespace, and the `messaging_groups.instance` column:
|
||||
|
||||
| Bot | Adapter | Instance | Webhook path |
|
||||
|-----|---------|----------|--------------|
|
||||
| Worker | `src/channels/slack.ts` (stock `/add-slack`) | `slack` (default) | `/webhook/slack` |
|
||||
| Supervisor | `src/channels/slack-supervisor.ts` | `slack-supervisor` (`SUPERVISOR_INSTANCE`) | `/webhook/slack-supervisor` |
|
||||
| Tester | `src/channels/slack-tester.ts` | `slack-tester` (`TESTER_INSTANCE`) | `/webhook/slack-tester` |
|
||||
|
||||
The router disambiguates inbound events by `(channel_type, instance)`. The three adapters share a sibling-bot ID set (`src/channels/slack-bot-ids.ts`): each registers its own bot user id at factory time and wraps its bridge in `withSiblingEchoGuard`, so sibling-authored messages are dropped across all four Chat SDK dispatch paths — no echo loops. A router-side helper (`src/channels/sibling-mention.ts`) additionally keeps `@pr-tester …` follow-ups in a sticky worker thread from engaging the worker. Humans @-mention each bot distinctly.
|
||||
|
||||
Thread status reactions (worker bot): 🟢 open · ⚪ draft · 🔴 closed · 🟣 merged · 👀 awaiting approval (`src/modules/pr-factory/reactions.ts`).
|
||||
|
||||
---
|
||||
|
||||
## Slack Canvases
|
||||
|
||||
**File:** `src/modules/pr-factory/slack-canvas.ts` (`slack-canvas` component)
|
||||
|
||||
Registers the Slack Canvas API client on core's canvas seam (`canvases.create` → `canvases.access.set` → `files.info` permalink), and a delivery file transform on core's `registerFileTransform` hook: `.md` outbox attachments from the worker's Slack sessions deliver as canvas links appended to the message text instead of file uploads. Non-`.md` files, non-worker sessions, and provider failures pass through / fall back to the original upload. Requires `canvases:write` + `files:read` scopes on the worker app and a paid Slack plan; on free plans everything falls back to `.md` uploads.
|
||||
|
||||
---
|
||||
|
||||
## Activity Log
|
||||
|
||||
`src/modules/pr-factory/activity-log.ts` appends NDJSON events to `data/pr-activity/<owner>/<repo>/<pr>.log` (`prLog(prNumber, repo, event, details)`). Events without a resolvable repo (no `PR_FACTORY_DEFAULT_REPO`) land under `data/pr-activity/unconfigured/`.
|
||||
|
||||
```bash
|
||||
tail -f data/pr-activity/<owner>/<repo>/42.log # single PR
|
||||
tail -f data/pr-activity/<owner>/<repo>/*.log # all PRs
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Operational notes for adopters
|
||||
|
||||
Practical things to know before you run this on a real repo.
|
||||
|
||||
### `gh` in the worker container
|
||||
|
||||
The default worker workflow runs read-only `gh` lookups inside the worker container. The stock agent image **does not ship `gh`** (its apt block installs `git`, `curl`, `chromium`, `tini`, `unzip`; the Node-CLI block installs `claude-code` / `agent-browser` / `vercel`). Either add `gh` to `container/Dockerfile` (pinned via a new `ARG`, then `./container/build.sh`) or supply a review skill that uses the GitHub REST API through the OneCLI proxy instead. Whichever you pick, the read-only GitHub credential must reach the container so the calls authenticate — and writes still go only through `credentialed_gh`.
|
||||
|
||||
### Tester → VM SSH access
|
||||
|
||||
The tester agent SSHes from inside its container to the cloned VM (`TEST_VM_SSH_USER@<TEST_VM_HOST_TEMPLATE>`) to run the plan. Provision a private key into the `pr-tester` group's container with the matching public key in the VM's `authorized_keys` (the host's own key handles the host→control-plane clone/reap leg separately). Add a `known_hosts` entry or `StrictHostKeyChecking accept-new` so the first connection doesn't hang on a prompt. Without this the VM reports ready but the tester can't log in, and the run hits the 30-minute timeout. Details in the `vm-test-orchestrator` SKILL.md.
|
||||
|
||||
### Host restart drops in-flight test runs
|
||||
|
||||
The test queue, the per-run timeouts, and the live-VM registry are all in-process state — a host restart loses them. An approved test run that was executing when the host went down is **not resumed**: its timeout timer is gone (so no timeout message ever posts), the queue is empty on reboot, and the VM that was cloned for it is **orphaned** — still running on the provider, no longer tracked by the pool. Recovery is manual: list ephemeral VMs on the control plane and `rm` any that no longer correspond to an open PR (they are tagged `ephemeral`; `destroyVm` runs on PR close/merge for tracked ones, but an orphan won't be reaped automatically). Re-approve the test plan from the PR thread to start a fresh run.
|
||||
|
||||
### Cost and scale
|
||||
|
||||
- **One container per PR.** Each open PR gets its own worker session/container; a burst of PRs is a burst of concurrent containers and LLM sessions.
|
||||
- **Full-diff LLM sessions.** The worker is seeded with the PR diff (truncated at 50k chars) and reasons over it — token cost scales with diff size and review depth, per PR.
|
||||
- **Re-triage on every push.** A `synchronize` event (new commits) kills the session and re-runs triage against the fresh diff, so an actively-pushed PR is re-reviewed repeatedly.
|
||||
- **Single-repo throughput.** A factory instance serves one repo (see "One instance, one repository"); throughput is bounded by that repo's PR/push rate and your container/LLM concurrency budget. Scale out with more instances for more repos, not more load per instance.
|
||||
|
||||
### Single trust domain
|
||||
|
||||
The factory registers host-wide delivery actions (`pr_*`) and seam providers that **trust every agent group on the host** — any agent group that emits a `pr_*` system action is serviced, and the gh/skill-edit/test gates act on whatever session calls them. There is no per-group authorization on these actions beyond the human approval cards. Run the PR Factory on a host you control, with agent groups you trust; do not co-locate it with untrusted or third-party agent groups.
|
||||
|
||||
---
|
||||
|
||||
## File Map
|
||||
|
||||
### Host (src/modules/pr-factory/) — by component
|
||||
|
||||
| File | Component | Purpose |
|
||||
|------|-----------|---------|
|
||||
| `index.ts` | core | Module entry: env gating, six delivery actions, adapter-ready bootstrap + webhook + orchestrator init, approval-resolved hook, shutdown |
|
||||
| `bootstrap.ts` | core | Idempotent entity setup (worker / supervisor / tester), instance-keyed lookups, foreign-wiring cleanup, drift correction |
|
||||
| `defaults.ts` | core | `DEFAULT_REPO`, `REPO_MIRROR_DIR`, `REVIEW_SKILL` + `triageDirective()` |
|
||||
| `worker-instructions.ts` | core | Default triage/review/test-plan group instructions (the operator override point) |
|
||||
| `webhook.ts` | core | GitHub webhook: HMAC, event/action filter, `PREvent` parsing |
|
||||
| `handler.ts` | core | Per-PR lifecycle: opener, session, pr_threads, trigger, wake; synchronize/close/draft handling; OneCLI proxy GitHub fetches |
|
||||
| `supervisor.ts` | core | `SUPERVISOR_FOLDER` + `SUPERVISOR_INSTRUCTIONS` |
|
||||
| `testing-approval.ts` | core | Testing gate + retry card |
|
||||
| `skill-edit-approval.ts` | core | Skill-edit gate (traversal-guarded writes into `container/skills/`) |
|
||||
| `orchestrator.ts` | core | Tester wake, 30-min timeout, results → worker |
|
||||
| `reactions.ts` | core | Thread status reactions + 👀 helpers |
|
||||
| `dismiss-approvals.ts` | core | One-active-card-per-thread dismissal |
|
||||
| `activity-log.ts` | core | Per-PR NDJSON activity log |
|
||||
| `gh-action.ts` | core (seam) | `setGhActionHandler` / `dispatchGhAction` |
|
||||
| `test-orchestration.ts` | core (seam) | `registerTestOrchestrator` / `getTestOrchestrator` + module contract |
|
||||
| `canvas.ts` | core (seam) | `registerCanvasProvider` / `createCanvas` |
|
||||
| `gh-action-approval.ts` (+ `gh-users.sample.json`) | gh-action-approval | GitHub CLI gate + approver credential mapping |
|
||||
| `test-orchestrator.ts` | vm-test-orchestrator | VM lifecycle, sequential queue, pool |
|
||||
| `slack-canvas.ts` | slack-canvas | Canvas provider + `.md` → canvas delivery transform |
|
||||
|
||||
### Database
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `src/db/pr-threads.ts` | CRUD for pr_threads |
|
||||
| `src/db/migrations/module-pr-factory-pr-threads-v2.ts` | Creates pr_threads (drops the legacy bot column on legacy-substrate upgrades) |
|
||||
| `src/db/migrations/module-slack-bots-bot-id-to-instance.ts` | Legacy-upgrade: bot_id substrate → instance substrate |
|
||||
| `src/db/sessions.ts` | (+4 appended pending_approvals helpers) |
|
||||
|
||||
### Container
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `container/agent-runner/src/mcp-tools/pr-factory.ts` | The six MCP tools |
|
||||
|
||||
### Channels
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `src/channels/slack-supervisor.ts` / `slack-tester.ts` | Supervisor/tester adapters as named instances |
|
||||
| `src/channels/slack-bot-ids.ts` | Shared sibling-bot id set + echo guard |
|
||||
| `src/channels/sibling-mention.ts` | Sticky-thread sibling-mention suppression |
|
||||
|
||||
---
|
||||
|
||||
## Manual Operations
|
||||
|
||||
### Re-run a PR from scratch
|
||||
|
||||
There is no in-place "clear and retrigger" — a skill edit applies to the next PR the worker triages, and a fresh push (a `synchronize` event) re-triages an open PR in the same thread against the updated diff. To force a clean re-run, close and reopen the PR on GitHub, or push a no-op commit; the webhook drives a fresh session either way.
|
||||
|
||||
### Inspect pr_threads
|
||||
|
||||
Use the sanctioned query wrapper, not the sqlite3 binary:
|
||||
|
||||
```bash
|
||||
pnpm exec tsx scripts/q.ts data/v2.db "SELECT * FROM pr_threads WHERE pr_number = 42"
|
||||
```
|
||||
|
||||
### Check pending test plans
|
||||
|
||||
```bash
|
||||
ls -la groups/pr-factory-worker/test-plans/
|
||||
```
|
||||
|
||||
### Check test-VM control-plane connectivity
|
||||
|
||||
```bash
|
||||
ssh -o ConnectTimeout=5 "${PR_FACTORY_TEST_SSH_HOST:-exe.dev}" ls
|
||||
```
|
||||
|
||||
### View module logs
|
||||
|
||||
```bash
|
||||
grep 'PR factory\|pr_' logs/nanoclaw.log | tail -50
|
||||
grep 'PR factory\|pr_' logs/nanoclaw.error.log | tail -20
|
||||
```
|
||||
@@ -0,0 +1,109 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# sync-skill-files.sh — generate (or check) a skill's files/ mirror from the
|
||||
# canonical in-tree files.
|
||||
#
|
||||
# Each skill that owns code lists its files in its skill folder's files.txt
|
||||
# (one repo-relative path per line; blank lines and #-comments ignored).
|
||||
# Skill folders live at any of three layers:
|
||||
#
|
||||
# .claude/skills/<name>/ (top-level skills)
|
||||
# .claude/skills/recipes/<recipe>/ (a recipe's own files)
|
||||
# .claude/skills/recipes/<recipe>/skills/<name>/ (recipe components)
|
||||
#
|
||||
# The canonical copy is the in-tree file; this script copies each listed path
|
||||
# into <skill-folder>/files/<repo-relative-path> so the skill folder carries
|
||||
# a generated mirror, never a hand-maintained duplicate.
|
||||
#
|
||||
# Usage:
|
||||
# scripts/sync-skill-files.sh <skill-path> [--check]
|
||||
# scripts/sync-skill-files.sh --all [--check]
|
||||
#
|
||||
# <skill-path> is relative to .claude/skills/ — e.g. `add-foo`,
|
||||
# `recipes/pr-factory`, or `recipes/pr-factory/skills/slack-bots`.
|
||||
#
|
||||
# --check: byte-compare instead of copy; exit 1 listing drifted or missing
|
||||
# mirror files.
|
||||
set -euo pipefail
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
SKILLS_DIR="$REPO_ROOT/.claude/skills"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <skill-path>|--all [--check]" >&2
|
||||
exit 2
|
||||
}
|
||||
|
||||
[ $# -ge 1 ] || usage
|
||||
|
||||
TARGET="$1"
|
||||
shift
|
||||
CHECK=0
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--check) CHECK=1 ;;
|
||||
*) usage ;;
|
||||
esac
|
||||
done
|
||||
|
||||
sync_skill() {
|
||||
local name="$1"
|
||||
local manifest="$SKILLS_DIR/$name/files.txt"
|
||||
local mirror_root="$SKILLS_DIR/$name/files"
|
||||
local failed=0
|
||||
|
||||
if [ ! -f "$manifest" ]; then
|
||||
echo "error: $manifest not found" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
while IFS= read -r line || [ -n "$line" ]; do
|
||||
# Strip comments and surrounding whitespace; skip blank lines.
|
||||
line="${line%%#*}"
|
||||
line="$(echo "$line" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
|
||||
[ -n "$line" ] || continue
|
||||
|
||||
local src="$REPO_ROOT/$line"
|
||||
local dst="$mirror_root/$line"
|
||||
|
||||
if [ ! -f "$src" ]; then
|
||||
echo "[$name] MISSING in tree: $line" >&2
|
||||
failed=1
|
||||
continue
|
||||
fi
|
||||
|
||||
if [ "$CHECK" -eq 1 ]; then
|
||||
if [ ! -f "$dst" ]; then
|
||||
echo "[$name] MISSING mirror: $line" >&2
|
||||
failed=1
|
||||
elif ! cmp -s "$src" "$dst"; then
|
||||
echo "[$name] DRIFTED: $line" >&2
|
||||
failed=1
|
||||
fi
|
||||
else
|
||||
mkdir -p "$(dirname "$dst")"
|
||||
cp "$src" "$dst"
|
||||
echo "[$name] synced: $line"
|
||||
fi
|
||||
done < "$manifest"
|
||||
|
||||
return "$failed"
|
||||
}
|
||||
|
||||
if [ "$TARGET" = "--all" ]; then
|
||||
status=0
|
||||
found=0
|
||||
for manifest in "$SKILLS_DIR"/*/files.txt "$SKILLS_DIR"/recipes/*/files.txt "$SKILLS_DIR"/recipes/*/skills/*/files.txt; do
|
||||
[ -f "$manifest" ] || continue
|
||||
found=1
|
||||
dir="${manifest%/files.txt}"
|
||||
name="${dir#"$SKILLS_DIR"/}"
|
||||
sync_skill "$name" || status=1
|
||||
done
|
||||
if [ "$found" -eq 0 ]; then
|
||||
echo "No skill manifests (files.txt at either skill layer) found — nothing to sync."
|
||||
fi
|
||||
exit "$status"
|
||||
fi
|
||||
|
||||
sync_skill "$TARGET"
|
||||
@@ -0,0 +1,196 @@
|
||||
/**
|
||||
* pr-factory recipe — composed-stack guards.
|
||||
*
|
||||
* Each component's own tests prove it works alone; this suite proves the
|
||||
* components compose. It runs the FULL migration barrel on a fresh DB
|
||||
* (core instance substrate + both component migrations), imports the REAL
|
||||
* modules barrel with the PR Factory env primed, fires the real bootstrap
|
||||
* through the delivery-adapter-ready callback, and asserts the
|
||||
* cross-component invariants no single component test owns:
|
||||
*
|
||||
* 1. the migration chain composes in barrel order on a fresh DB and yields
|
||||
* the composed schema (instance column, v2 pr_threads);
|
||||
* 2. bootstrap on that schema lands all three bot instances on ONE Slack
|
||||
* channel row-set with exact-instance resolution (the UNIQUE triple
|
||||
* from 016 holding under the recipe's full wiring);
|
||||
* 3. core's single-slot delivery file transform has exactly one registrant
|
||||
* across every module in the tree (a second registrant would silently
|
||||
* clobber the slack-canvas conversion);
|
||||
* 4. every skill manifest's files/ mirror is in sync
|
||||
* (scripts/sync-skill-files.sh --all --check) — canon edits that skip
|
||||
* the sync script fail here, not in review.
|
||||
*/
|
||||
import { spawnSync } from 'child_process';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
import { afterAll, beforeAll, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
vi.mock('./container-runner.js', () => ({
|
||||
wakeContainer: vi.fn().mockResolvedValue(undefined),
|
||||
isContainerRunning: vi.fn().mockReturnValue(false),
|
||||
killContainer: vi.fn(),
|
||||
buildAgentGroupImage: vi.fn().mockResolvedValue(undefined),
|
||||
}));
|
||||
|
||||
vi.mock('./config.js', async () => {
|
||||
const actual = await vi.importActual<typeof import('./config.js')>('./config.js');
|
||||
return {
|
||||
...actual,
|
||||
DATA_DIR: '/tmp/nanoclaw-test-prf-stack/data',
|
||||
GROUPS_DIR: '/tmp/nanoclaw-test-prf-stack/groups',
|
||||
};
|
||||
});
|
||||
|
||||
const REPO_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
|
||||
const TEST_DIR = '/tmp/nanoclaw-test-prf-stack';
|
||||
const WORKER_CHANNEL = 'C0STACKWORK';
|
||||
const SUPERVISOR_CHANNEL = 'C0STACKADMIN';
|
||||
const WORKER_PLATFORM_ID = `slack:${WORKER_CHANNEL}`;
|
||||
const PORT = 21000 + Math.floor(Math.random() * 20000);
|
||||
|
||||
let db: import('better-sqlite3').Database;
|
||||
let closeDb: () => void;
|
||||
let stopWebhookServer: () => Promise<void>;
|
||||
|
||||
beforeAll(async () => {
|
||||
fs.rmSync(TEST_DIR, { recursive: true, force: true });
|
||||
fs.mkdirSync(TEST_DIR, { recursive: true });
|
||||
|
||||
// The reactions/canvas paths call Slack over fetch; none of these legs
|
||||
// assert on Slack, so a generic ok response keeps them quiet.
|
||||
vi.stubGlobal(
|
||||
'fetch',
|
||||
vi.fn(async () => new Response(JSON.stringify({ ok: true }), { status: 200 })),
|
||||
);
|
||||
|
||||
// Prime env BEFORE the barrel import — registration is import-time.
|
||||
process.env.GITHUB_WEBHOOK_SECRET = 'stack-secret';
|
||||
process.env.PR_FACTORY_SLACK_CHANNEL_ID = WORKER_CHANNEL;
|
||||
process.env.PR_FACTORY_SUPERVISOR_SLACK_CHANNEL_ID = SUPERVISOR_CHANNEL;
|
||||
process.env.SLACK_BOT_TOKEN = 'xoxb-stack-test';
|
||||
process.env.WEBHOOK_PORT = String(PORT);
|
||||
|
||||
const dbMod = await import('./db/index.js');
|
||||
db = dbMod.initTestDb();
|
||||
dbMod.runMigrations(db);
|
||||
closeDb = dbMod.closeDb;
|
||||
|
||||
// The operator-created tester agent group exists before boot, so the
|
||||
// composed bootstrap wires all three instances.
|
||||
dbMod.createAgentGroup({
|
||||
id: 'ag-stack-tester',
|
||||
name: 'PR Tester',
|
||||
folder: 'pr-tester',
|
||||
agent_provider: null,
|
||||
created_at: new Date().toISOString(),
|
||||
});
|
||||
|
||||
await import('./modules/index.js'); // the real modules barrel
|
||||
const delivery = await import('./delivery.js');
|
||||
const webhookServer = await import('./webhook-server.js');
|
||||
stopWebhookServer = webhookServer.stopWebhookServer;
|
||||
|
||||
// Setting the adapter fires onDeliveryAdapterReady → the real pr-factory
|
||||
// bootstrap runs against the freshly migrated DB.
|
||||
delivery.setDeliveryAdapter({
|
||||
async deliver() {
|
||||
return 'plat-msg-stack';
|
||||
},
|
||||
});
|
||||
await new Promise((r) => setTimeout(r, 50)); // adapter-ready callbacks are async
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await stopWebhookServer?.();
|
||||
closeDb?.();
|
||||
vi.unstubAllGlobals();
|
||||
delete process.env.GITHUB_WEBHOOK_SECRET;
|
||||
delete process.env.PR_FACTORY_SLACK_CHANNEL_ID;
|
||||
delete process.env.PR_FACTORY_SUPERVISOR_SLACK_CHANNEL_ID;
|
||||
delete process.env.SLACK_BOT_TOKEN;
|
||||
delete process.env.WEBHOOK_PORT;
|
||||
fs.rmSync(TEST_DIR, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
describe('pr-factory recipe — composed stack', () => {
|
||||
it('fresh-DB migration chain: both component migrations compose with the core train, in order', () => {
|
||||
const recorded = db.prepare('SELECT name, version FROM schema_version ORDER BY version').all() as Array<{
|
||||
name: string;
|
||||
version: number;
|
||||
}>;
|
||||
const versionOf = (name: string): number => {
|
||||
const row = recorded.find((r) => r.name === name);
|
||||
expect(row, `migration '${name}' recorded in schema_version`).toBeDefined();
|
||||
return row!.version;
|
||||
};
|
||||
|
||||
// slack-bots' fork-upgrade shim runs immediately before 016;
|
||||
// pr-factory-core's table migration runs last.
|
||||
expect(versionOf('module-slack-bots-bot-id-to-instance')).toBeLessThan(versionOf('messaging-group-instance'));
|
||||
expect(versionOf('messaging-group-instance')).toBeLessThan(versionOf('module-pr-factory-pr-threads-v2'));
|
||||
|
||||
// Composed schema shape.
|
||||
const mgCols = (db.pragma('table_info(messaging_groups)') as Array<{ name: string }>).map((c) => c.name);
|
||||
expect(mgCols).toContain('instance');
|
||||
const prCols = (db.pragma('table_info(pr_threads)') as Array<{ name: string }>).map((c) => c.name);
|
||||
expect(prCols).toContain('repo_full_name');
|
||||
expect(prCols).not.toContain('bot_id');
|
||||
});
|
||||
|
||||
it('bootstrap on the composed tree: worker, supervisor, and tester instances coexist on one channel', async () => {
|
||||
const { getMessagingGroupByPlatform } = await import('./db/messaging-groups.js');
|
||||
const { getAgentGroupByFolder } = await import('./db/agent-groups.js');
|
||||
const { WORKER_FOLDER } = await import('./modules/pr-factory/bootstrap.js');
|
||||
const { SUPERVISOR_INSTANCE } = await import('./channels/slack-supervisor.js');
|
||||
const { TESTER_INSTANCE } = await import('./channels/slack-tester.js');
|
||||
|
||||
// Three rows share the PR channel's platform_id, distinguished only by
|
||||
// instance — exact lookups must return three distinct rows.
|
||||
const worker = getMessagingGroupByPlatform('slack', WORKER_PLATFORM_ID, 'slack');
|
||||
const supervisor = getMessagingGroupByPlatform('slack', WORKER_PLATFORM_ID, SUPERVISOR_INSTANCE);
|
||||
const tester = getMessagingGroupByPlatform('slack', WORKER_PLATFORM_ID, TESTER_INSTANCE);
|
||||
expect(worker).toBeDefined();
|
||||
expect(supervisor).toBeDefined();
|
||||
expect(tester).toBeDefined();
|
||||
expect(new Set([worker!.id, supervisor!.id, tester!.id]).size).toBe(3);
|
||||
|
||||
// The supervisor's admin channel rides its own platform_id.
|
||||
const admin = getMessagingGroupByPlatform('slack', `slack:${SUPERVISOR_CHANNEL}`, SUPERVISOR_INSTANCE);
|
||||
expect(admin).toBeDefined();
|
||||
|
||||
// Bootstrap created the worker agent group on the composed schema.
|
||||
const workerAg = getAgentGroupByFolder(WORKER_FOLDER);
|
||||
expect(workerAg).toBeDefined();
|
||||
});
|
||||
|
||||
it('exactly one module registers on the single-slot delivery file transform', () => {
|
||||
const registrants: string[] = [];
|
||||
const walk = (dir: string): void => {
|
||||
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
||||
const full = path.join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
walk(full);
|
||||
continue;
|
||||
}
|
||||
if (!entry.name.endsWith('.ts') || entry.name.endsWith('.test.ts')) continue;
|
||||
// delivery.ts owns the slot (declaration + application), not a registration.
|
||||
if (path.relative(REPO_ROOT, full) === path.join('src', 'delivery.ts')) continue;
|
||||
if (/\bregisterFileTransform\s*\(/.test(fs.readFileSync(full, 'utf8'))) {
|
||||
registrants.push(path.relative(REPO_ROOT, full));
|
||||
}
|
||||
}
|
||||
};
|
||||
walk(path.join(REPO_ROOT, 'src'));
|
||||
expect(registrants).toEqual([path.join('src', 'modules', 'pr-factory', 'slack-canvas.ts')]);
|
||||
});
|
||||
|
||||
it('every skill manifest mirror is in sync (sync-skill-files.sh --all --check)', () => {
|
||||
const res = spawnSync('bash', [path.join(REPO_ROOT, 'scripts', 'sync-skill-files.sh'), '--all', '--check'], {
|
||||
encoding: 'utf8',
|
||||
});
|
||||
expect(res.stderr).toBe('');
|
||||
expect(res.status).toBe(0);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,178 @@
|
||||
/**
|
||||
* Skill files.txt manifest / mirror drift guard.
|
||||
*
|
||||
* Every skill that owns in-tree code lists its files in its skill folder's
|
||||
* `files.txt` (one repo-relative path per line) and carries a generated
|
||||
* mirror under `<skill-folder>/files/`, produced by
|
||||
* `scripts/sync-skill-files.sh <skill-path>`. Skill folders live at any of
|
||||
* three layers:
|
||||
*
|
||||
* .claude/skills/<name>/ (top-level skills)
|
||||
* .claude/skills/recipes/<recipe>/ (a recipe's own files)
|
||||
* .claude/skills/recipes/<recipe>/skills/<name>/ (recipe components)
|
||||
*
|
||||
* The in-tree file is canonical. This test asserts, for every manifest at
|
||||
* every layer:
|
||||
* 1. every listed path exists in the tree, and
|
||||
* 2. each listed file matches its mirror byte-for-byte (re-run the sync
|
||||
* script after editing canon — a missing files/ mirror is a failure,
|
||||
* not a skip).
|
||||
*
|
||||
* A fixture-driven suite additionally guards the discovery and the script's
|
||||
* `--all` glob: dropping any layer from the scan goes red even while the
|
||||
* repo has no manifests at that layer yet.
|
||||
*/
|
||||
import { spawnSync } from 'child_process';
|
||||
import fs from 'fs';
|
||||
import os from 'os';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
import { describe, it, expect, afterAll } from 'vitest';
|
||||
|
||||
const REPO_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
|
||||
const SYNC_SCRIPT = path.join(REPO_ROOT, 'scripts', 'sync-skill-files.sh');
|
||||
|
||||
interface Manifest {
|
||||
skill: string;
|
||||
files: string[];
|
||||
mirrorDir: string;
|
||||
}
|
||||
|
||||
/** Skill folders at all three layers, relative to .claude/skills/. */
|
||||
function skillDirs(skillsDir: string): string[] {
|
||||
if (!fs.existsSync(skillsDir)) return [];
|
||||
const dirs = fs.readdirSync(skillsDir).filter((name) => name !== 'recipes');
|
||||
const recipesDir = path.join(skillsDir, 'recipes');
|
||||
if (fs.existsSync(recipesDir)) {
|
||||
for (const recipe of fs.readdirSync(recipesDir)) {
|
||||
dirs.push(path.join('recipes', recipe));
|
||||
const componentsDir = path.join(recipesDir, recipe, 'skills');
|
||||
if (!fs.existsSync(componentsDir)) continue;
|
||||
for (const component of fs.readdirSync(componentsDir)) {
|
||||
dirs.push(path.join('recipes', recipe, 'skills', component));
|
||||
}
|
||||
}
|
||||
}
|
||||
return dirs;
|
||||
}
|
||||
|
||||
function readManifests(root: string): Manifest[] {
|
||||
const skillsDir = path.join(root, '.claude', 'skills');
|
||||
return skillDirs(skillsDir)
|
||||
.filter((rel) => fs.existsSync(path.join(skillsDir, rel, 'files.txt')))
|
||||
.map((rel) => ({
|
||||
skill: rel,
|
||||
mirrorDir: path.join(skillsDir, rel, 'files'),
|
||||
files: fs
|
||||
.readFileSync(path.join(skillsDir, rel, 'files.txt'), 'utf8')
|
||||
.split('\n')
|
||||
.map((line) => line.replace(/#.*$/, '').trim())
|
||||
.filter((line) => line.length > 0),
|
||||
}));
|
||||
}
|
||||
|
||||
describe('skill file manifests', () => {
|
||||
const manifests = readManifests(REPO_ROOT);
|
||||
|
||||
it('scans the skills directory', () => {
|
||||
expect(fs.existsSync(path.join(REPO_ROOT, '.claude', 'skills'))).toBe(true);
|
||||
});
|
||||
|
||||
it.each(manifests.map((m) => [m.skill, m] as const))(
|
||||
'%s: every listed path exists in the tree',
|
||||
(_skill, manifest) => {
|
||||
const missing = manifest.files.filter((f) => !fs.existsSync(path.join(REPO_ROOT, f)));
|
||||
expect(missing).toEqual([]);
|
||||
},
|
||||
);
|
||||
|
||||
it.each(manifests.map((m) => [m.skill, m] as const))(
|
||||
'%s: files/ mirror matches the in-tree canon byte-for-byte',
|
||||
(_skill, manifest) => {
|
||||
const drifted: string[] = [];
|
||||
for (const f of manifest.files) {
|
||||
const canon = path.join(REPO_ROOT, f);
|
||||
const mirror = path.join(manifest.mirrorDir, f);
|
||||
if (!fs.existsSync(mirror)) {
|
||||
drifted.push(`${f} (mirror missing — run scripts/sync-skill-files.sh ${manifest.skill})`);
|
||||
continue;
|
||||
}
|
||||
if (!fs.readFileSync(canon).equals(fs.readFileSync(mirror))) {
|
||||
drifted.push(`${f} (drifted — run scripts/sync-skill-files.sh ${manifest.skill})`);
|
||||
}
|
||||
}
|
||||
expect(drifted).toEqual([]);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
describe('skill-sync infra covers all three skill layers (fixture)', () => {
|
||||
// A throwaway repo with one manifest per layer. The sync script computes
|
||||
// REPO_ROOT from its own location, so it gets copied into the fixture.
|
||||
const fixtureRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-sync-fixture-'));
|
||||
const topDir = path.join(fixtureRoot, '.claude', 'skills', 'demo-top');
|
||||
const recipeDir = path.join(fixtureRoot, '.claude', 'skills', 'recipes', 'demo-recipe');
|
||||
const nestedDir = path.join(fixtureRoot, '.claude', 'skills', 'recipes', 'demo-recipe', 'skills', 'demo-component');
|
||||
|
||||
fs.mkdirSync(path.join(fixtureRoot, 'scripts'), { recursive: true });
|
||||
fs.copyFileSync(SYNC_SCRIPT, path.join(fixtureRoot, 'scripts', 'sync-skill-files.sh'));
|
||||
|
||||
fs.mkdirSync(path.join(fixtureRoot, 'src'), { recursive: true });
|
||||
fs.writeFileSync(path.join(fixtureRoot, 'src', 'top-canon.ts'), 'export const layer = "top";\n');
|
||||
fs.writeFileSync(path.join(fixtureRoot, 'src', 'recipe-canon.ts'), 'export const layer = "recipe";\n');
|
||||
fs.writeFileSync(path.join(fixtureRoot, 'src', 'nested-canon.ts'), 'export const layer = "nested";\n');
|
||||
|
||||
fs.mkdirSync(topDir, { recursive: true });
|
||||
fs.writeFileSync(path.join(topDir, 'files.txt'), 'src/top-canon.ts\n');
|
||||
fs.mkdirSync(recipeDir, { recursive: true });
|
||||
fs.writeFileSync(path.join(recipeDir, 'files.txt'), 'src/recipe-canon.ts\n');
|
||||
fs.mkdirSync(nestedDir, { recursive: true });
|
||||
fs.writeFileSync(path.join(nestedDir, 'files.txt'), 'src/nested-canon.ts\n');
|
||||
|
||||
afterAll(() => {
|
||||
fs.rmSync(fixtureRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function runSync(...args: string[]): { status: number | null; stderr: string } {
|
||||
const res = spawnSync('bash', [path.join(fixtureRoot, 'scripts', 'sync-skill-files.sh'), ...args], {
|
||||
encoding: 'utf8',
|
||||
});
|
||||
return { status: res.status, stderr: res.stderr };
|
||||
}
|
||||
|
||||
it('discovery sees manifests at all three layers', () => {
|
||||
const manifests = readManifests(fixtureRoot);
|
||||
expect(manifests.map((m) => m.skill).sort()).toEqual([
|
||||
'demo-top',
|
||||
path.join('recipes', 'demo-recipe'),
|
||||
path.join('recipes', 'demo-recipe', 'skills', 'demo-component'),
|
||||
]);
|
||||
});
|
||||
|
||||
it('--all syncs mirrors at all three layers', () => {
|
||||
const res = runSync('--all');
|
||||
expect(res.status).toBe(0);
|
||||
expect(fs.readFileSync(path.join(topDir, 'files', 'src', 'top-canon.ts'), 'utf8')).toBe(
|
||||
'export const layer = "top";\n',
|
||||
);
|
||||
expect(fs.readFileSync(path.join(recipeDir, 'files', 'src', 'recipe-canon.ts'), 'utf8')).toBe(
|
||||
'export const layer = "recipe";\n',
|
||||
);
|
||||
expect(fs.readFileSync(path.join(nestedDir, 'files', 'src', 'nested-canon.ts'), 'utf8')).toBe(
|
||||
'export const layer = "nested";\n',
|
||||
);
|
||||
});
|
||||
|
||||
it('--all --check flags drift at the nested layer', () => {
|
||||
fs.appendFileSync(path.join(fixtureRoot, 'src', 'nested-canon.ts'), '// drift\n');
|
||||
const res = runSync('--all', '--check');
|
||||
expect(res.status).toBe(1);
|
||||
expect(res.stderr).toContain('DRIFTED: src/nested-canon.ts');
|
||||
|
||||
// Re-sync the single nested skill by path, then --check passes again.
|
||||
const resync = runSync('recipes/demo-recipe/skills/demo-component');
|
||||
expect(resync.status).toBe(0);
|
||||
expect(runSync('--all', '--check').status).toBe(0);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,45 @@
|
||||
# Remove gh-action-approval
|
||||
|
||||
Reverses every change the apply made. After removal, `credentialed_gh` calls degrade gracefully: core's seam notifies the agent that the component is not installed.
|
||||
|
||||
## 1. Delete the copied files
|
||||
|
||||
```bash
|
||||
rm -f src/modules/pr-factory/gh-action-approval.ts
|
||||
rm -f src/modules/pr-factory/gh-users.sample.json
|
||||
rm -f src/modules/pr-factory/gh-action.test.ts
|
||||
```
|
||||
|
||||
## 2. Delete the barrel line
|
||||
|
||||
In `src/modules/index.ts`, delete the line `import './pr-factory/gh-action-approval.js';`.
|
||||
|
||||
## 3. Remove the environment line
|
||||
|
||||
Delete `PR_FACTORY_GH_REPO_ALLOWLIST` from `.env` if present.
|
||||
|
||||
## 4. Operator data
|
||||
|
||||
`data/gh-users.json` maps real chat handles to gh logins — delete it unless the install will re-apply this component:
|
||||
|
||||
```bash
|
||||
rm -f data/gh-users.json
|
||||
```
|
||||
|
||||
The mapped gh accounts' host logins (`gh auth login`, `~/.config/gh/hosts.yml`) are operator-managed; revoke or keep them per your credential policy.
|
||||
|
||||
## 5. Pending cards
|
||||
|
||||
Any open `pr_gh` approval cards become dead after removal (the handler is gone). Resolve or dismiss outstanding cards before restarting, or delete the rows through the sanctioned query wrapper — `pnpm exec tsx scripts/q.ts data/v2.db "DELETE FROM pending_approvals WHERE action = 'pr_gh';"` — and accept the stale Slack cards.
|
||||
|
||||
## 6. Restart and validate
|
||||
|
||||
> **Skip this step during full-recipe removal.** When tearing down the whole PR Factory, the remaining components still reference seams that are being removed — a build mid-teardown is expected to be red. Only the recipe-level validation at the end binds. Run the block below only when removing `gh-action-approval` in isolation.
|
||||
|
||||
```bash
|
||||
launchctl kickstart -k gui/$(id -u)/com.nanoclaw # macOS
|
||||
# systemctl --user restart nanoclaw # Linux
|
||||
pnpm run build && pnpm test
|
||||
```
|
||||
|
||||
All green, with the gh-action test gone from the run.
|
||||
@@ -0,0 +1,113 @@
|
||||
---
|
||||
name: gh-action-approval
|
||||
description: PR Factory component — approval-gated GitHub CLI execution. Registers the executor on pr-factory-core's gh-action seam, posts each agent-proposed `gh` command behind a human approval card, and on approve runs it with the approver's gh credentials (namespaced gh-users mapping) under an optional repo allowlist.
|
||||
---
|
||||
|
||||
# gh-action-approval (PR Factory component)
|
||||
|
||||
The execution half of the PR Factory's credentialed-GitHub flow. `pr-factory-core` owns the `pr_gh` delivery action and a seam (`src/modules/pr-factory/gh-action.ts`); without this component every `credentialed_gh` MCP call answers "component not installed". With it:
|
||||
|
||||
1. The agent's `credentialed_gh` call lands on core's `pr_gh` action and dispatches here through the seam.
|
||||
2. The command(s) + reason post as a preview in the PR's Slack thread, followed by an approval card.
|
||||
3. On **Approve**, the host runs `gh <command>` — sequentially, stopping at the first failure, with merge-failure guidance for branch-protection errors — and reports the output back to the agent.
|
||||
4. The approver's namespaced user id (`<channel>:<handle>`) resolves to a gh account via the operator-created `data/gh-users.json`, so merges and comments are attributed to the human who clicked, not to a bot identity.
|
||||
|
||||
Skill-owned files: `src/modules/pr-factory/gh-action-approval.ts` (the executor + `pr_gh` approval handler) and `src/modules/pr-factory/gh-users.sample.json` (mapping template). Integration surface: one appended barrel line in `src/modules/index.ts`. No new dependencies — `gh` is a host binary prerequisite.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Probe each before applying; stop on a failed probe and do what it names first.
|
||||
|
||||
1. **The `pr-factory-core` component is applied** (this component registers on its seam and reuses its approval plumbing):
|
||||
|
||||
```bash
|
||||
grep -q 'export function setGhActionHandler' src/modules/pr-factory/gh-action.ts && echo OK
|
||||
```
|
||||
|
||||
If it fails: apply the `pr-factory-core` component first.
|
||||
|
||||
2. **The GitHub CLI is installed on the host** (commands run via `execFile('gh', ...)`):
|
||||
|
||||
```bash
|
||||
command -v gh && echo OK
|
||||
```
|
||||
|
||||
If it fails: install `gh` (e.g. `brew install gh`) and log in the default account with `gh auth login`.
|
||||
|
||||
Each step below is idempotent: if the file or line is already present, leave it as is and continue.
|
||||
|
||||
## Apply
|
||||
|
||||
All copy sources are under this component's folder; run every command from the repo root:
|
||||
|
||||
```bash
|
||||
SKILL=.claude/skills/recipes/pr-factory/skills/gh-action-approval
|
||||
```
|
||||
|
||||
### 1. Copy the module and the mapping template
|
||||
|
||||
```bash
|
||||
cp $SKILL/files/src/modules/pr-factory/gh-action-approval.ts src/modules/pr-factory/gh-action-approval.ts
|
||||
cp $SKILL/files/src/modules/pr-factory/gh-users.sample.json src/modules/pr-factory/gh-users.sample.json
|
||||
```
|
||||
|
||||
### 2. Append the modules-barrel line (`src/modules/index.ts`)
|
||||
|
||||
After the `import './pr-factory/index.js';` line, append (skip if already present):
|
||||
|
||||
```typescript
|
||||
import './pr-factory/gh-action-approval.js';
|
||||
```
|
||||
|
||||
### 3. Copy the guard test
|
||||
|
||||
```bash
|
||||
cp $SKILL/files/src/modules/pr-factory/gh-action.test.ts src/modules/pr-factory/gh-action.test.ts
|
||||
```
|
||||
|
||||
| Test | Guards |
|
||||
|------|--------|
|
||||
| `src/modules/pr-factory/gh-action.test.ts` | The barrel line via the REAL modules barrel and both registrations through core's read sides: `dispatchGhAction` reaches the installed executor (preview + card + `pending_approvals` row, routed through the messaging group's instance), and `getApprovalHandler('pr_gh')` drives execution against a PATH-shimmed `gh` — tokenization, stop-on-first-failure + merge guidance, the repo-allowlist refusal, the NAMESPACED gh-users lookup with NO bare-id fallback (GH_TOKEN from a HOME-sandboxed hosts.yml), and the fail-soft missing-mapping path |
|
||||
|
||||
## Configuration
|
||||
|
||||
### Approver → gh account mapping (`data/gh-users.json`)
|
||||
|
||||
Operator-created (real user ids are operator data — they live under gitignored `data/`, never in the repo). Start from the shipped sample:
|
||||
|
||||
```bash
|
||||
cp src/modules/pr-factory/gh-users.sample.json data/gh-users.json
|
||||
```
|
||||
|
||||
then edit. Keys are **namespaced** user ids exactly as core's approval flow reports them — `<channel>:<handle>`, e.g.:
|
||||
|
||||
```json
|
||||
{ "slack:U0XXXXXXX": "their-gh-login" }
|
||||
```
|
||||
|
||||
There is no bare-id fallback: a key like `"U0XXXXXXX"` never matches. Each mapped gh login must be logged in on the host (`gh auth login`; the token is read from `~/.config/gh/hosts.yml`). A missing file, or an unmapped approver, degrades to the default `gh` credentials — the symptom is the host-log line "No gh account mapping for approver".
|
||||
|
||||
### Repo allowlist (optional, `.env`)
|
||||
|
||||
```bash
|
||||
PR_FACTORY_GH_REPO_ALLOWLIST=acme/widgets,acme/gadgets
|
||||
```
|
||||
|
||||
When set, any approved command that explicitly references a repo (`-R`/`--repo`, a `repos/owner/name` API path, or a github.com URL) outside the list is refused before execution. Best-effort defense in depth — commands with no recognizable repo reference run against the default gh context, and the human approval card remains the primary gate. Unset = no restriction.
|
||||
|
||||
### Approver roles
|
||||
|
||||
Card clicks from users without a `user_roles` row are silently ignored by core — the role-grant step in `pr-factory-core`'s SKILL.md ("Grant approver roles") covers this component's cards too.
|
||||
|
||||
## Known smell (declared, carried with sign-off)
|
||||
|
||||
**gh credential threading (skill-guidelines anti-pattern #5).** `onGhApproved` maps the approving user to a gh account, reads that account's `oauth_token` out of `~/.config/gh/hosts.yml`, and passes it as `GH_TOKEN` in the subprocess env — credential handling outside the OneCLI gateway. Carried deliberately because it is what attributes merge actions to the human approver. **Redesign direction:** route `gh` through the OneCLI forward proxy (the same mechanism core's handler.ts already uses for api.github.com) with per-approver vault credentials, so no token ever transits the host process env. Do not extend this pattern to new commands or new credential sources.
|
||||
|
||||
## Validate
|
||||
|
||||
```bash
|
||||
pnpm run build
|
||||
pnpm test
|
||||
```
|
||||
|
||||
All green. Any failure means a step didn't apply cleanly.
|
||||
@@ -0,0 +1,7 @@
|
||||
# gh-action-approval — files this component owns outright. The modules-barrel
|
||||
# line (src/modules/index.ts) is applied as an edit per SKILL.md, not as a
|
||||
# file copy. data/gh-users.json is operator-created from the sample and never
|
||||
# ships.
|
||||
src/modules/pr-factory/gh-action-approval.ts
|
||||
src/modules/pr-factory/gh-users.sample.json
|
||||
src/modules/pr-factory/gh-action.test.ts
|
||||
+322
@@ -0,0 +1,322 @@
|
||||
/**
|
||||
* gh-action-approval component — human gate for agent-initiated GitHub CLI
|
||||
* commands. Registers on pr-factory-core's gh-action seam at import time.
|
||||
*
|
||||
* Flow:
|
||||
* 1. Agent calls the `credentialed_gh` MCP tool with raw command string(s)
|
||||
* 2. System action `pr_gh` lands here via core's seam (gh-action.ts)
|
||||
* 3. Host posts the command + reason as a preview in the Slack thread
|
||||
* 4. Host posts an approval card
|
||||
* 5. Human clicks Accept → host executes `gh <command>` with the
|
||||
* approver's credentials (see gh-users mapping below)
|
||||
* 6. Human clicks Reject → action dropped, agent notified
|
||||
*
|
||||
* KNOWN SMELL (declared in SKILL.md, carried with sign-off): command
|
||||
* execution threads the approver's gh oauth_token from
|
||||
* ~/.config/gh/hosts.yml into the subprocess env as GH_TOKEN — credential
|
||||
* handling outside the OneCLI gateway. It is what attributes merge actions
|
||||
* to the human approver. Redesign direction: route `gh` through the OneCLI
|
||||
* forward proxy with per-approver vault credentials so no token transits
|
||||
* the host process env. Do not extend this pattern to new commands or new
|
||||
* credential sources.
|
||||
*/
|
||||
import { execFile } from 'child_process';
|
||||
import fs from 'fs';
|
||||
import os from 'os';
|
||||
import path from 'path';
|
||||
|
||||
import { normalizeOptions, type RawOption } from '../../channels/ask-question.js';
|
||||
import { DATA_DIR } from '../../config.js';
|
||||
import { readEnvFile } from '../../env.js';
|
||||
import { getMessagingGroup } from '../../db/messaging-groups.js';
|
||||
import { getPrThreadBySession } from '../../db/pr-threads.js';
|
||||
import { createPendingApproval, updatePendingApprovalPlatformMessageId } from '../../db/sessions.js';
|
||||
import { getDeliveryAdapter } from '../../delivery.js';
|
||||
import { log } from '../../log.js';
|
||||
import { prLog } from './activity-log.js';
|
||||
import { dismissStaleApprovals } from './dismiss-approvals.js';
|
||||
import { setGhActionHandler } from './gh-action.js';
|
||||
import { registerApprovalHandler, notifyAgent } from '../approvals/primitive.js';
|
||||
import type { ApprovalHandlerContext } from '../approvals/primitive.js';
|
||||
import type { Session } from '../../types.js';
|
||||
import { markAwaitingApproval, clearAwaitingApproval } from './reactions.js';
|
||||
|
||||
function approvalOptions(description: string): RawOption[] {
|
||||
return [
|
||||
{ label: 'Approve', selectedLabel: `✅ ${description}`, value: 'approve' },
|
||||
{ label: 'Reject', selectedLabel: `❌ Rejected — ${description}`, value: 'reject' },
|
||||
];
|
||||
}
|
||||
|
||||
// Approver → gh CLI account mapping. Operator config at data/gh-users.json,
|
||||
// keyed by NAMESPACED user ids exactly as core's approval flow reports them
|
||||
// (`<channel>:<handle>`, e.g. {"slack:U0XXXXXXX": "their-gh-login"} — see
|
||||
// gh-users.sample.json). There is deliberately no bare-id fallback: an
|
||||
// unmapped approver always uses the default gh credentials. Read lazily and
|
||||
// fail-soft: a missing or malformed file means every approver falls back to
|
||||
// the default credentials — it must never crash module import.
|
||||
let ghUserMapCache: Record<string, string> | null = null;
|
||||
function getGhUserMap(): Record<string, string> {
|
||||
if (ghUserMapCache) return ghUserMapCache;
|
||||
const ghUsersPath = path.join(DATA_DIR, 'gh-users.json');
|
||||
try {
|
||||
ghUserMapCache = JSON.parse(fs.readFileSync(ghUsersPath, 'utf8')) as Record<string, string>;
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- fail-soft by contract: no/bad mapping file degrades to default credentials
|
||||
} catch {
|
||||
log.debug('pr-factory: no gh-users mapping at data/gh-users.json — gh commands use default credentials');
|
||||
ghUserMapCache = {};
|
||||
}
|
||||
return ghUserMapCache;
|
||||
}
|
||||
|
||||
function genId(prefix: string): string {
|
||||
return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
}
|
||||
|
||||
// Optional repo allowlist for approved gh commands
|
||||
// (PR_FACTORY_GH_REPO_ALLOWLIST, comma-separated `owner/name` entries).
|
||||
// Best-effort defense in depth — the human approval card is the primary
|
||||
// gate. When set, any command that explicitly references a repo (an
|
||||
// `-R`/`--repo` value, a `repos/owner/name` API path, or a github.com URL)
|
||||
// outside the list is refused before execution. Commands with no
|
||||
// recognizable repo reference run against the default gh context and are
|
||||
// not blocked. Unset = no restriction.
|
||||
const allowlistEnv = readEnvFile(['PR_FACTORY_GH_REPO_ALLOWLIST']);
|
||||
const REPO_ALLOWLIST = (process.env.PR_FACTORY_GH_REPO_ALLOWLIST || allowlistEnv.PR_FACTORY_GH_REPO_ALLOWLIST || '')
|
||||
.split(',')
|
||||
.map((s) => s.trim().toLowerCase())
|
||||
.filter(Boolean);
|
||||
|
||||
function repoRefsIn(command: string): string[] {
|
||||
const refs = new Set<string>();
|
||||
for (const m of command.matchAll(/(?:^|\s)(?:-R|--repo)[=\s]+["']?([\w.-]+\/[\w.-]+)/g)) refs.add(m[1].toLowerCase());
|
||||
for (const m of command.matchAll(/repos\/([\w.-]+\/[\w.-]+)/g)) refs.add(m[1].toLowerCase());
|
||||
for (const m of command.matchAll(/github\.com\/([\w.-]+\/[\w.-]+)/g)) refs.add(m[1].toLowerCase());
|
||||
return [...refs];
|
||||
}
|
||||
|
||||
/** Returns the offending repo ref when the allowlist is set and violated, else null. */
|
||||
function repoViolation(command: string): string | null {
|
||||
if (REPO_ALLOWLIST.length === 0) return null;
|
||||
for (const ref of repoRefsIn(command)) {
|
||||
if (!REPO_ALLOWLIST.includes(ref)) return ref;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Look up the GH_TOKEN for a gh CLI account from ~/.config/gh/hosts.yml.
|
||||
* Parses just the oauth_token line for the given account — the file structure
|
||||
* is stable enough that a regex is simpler than adding a YAML dependency.
|
||||
*/
|
||||
function getGhToken(ghAccount: string): string | null {
|
||||
try {
|
||||
const hostsPath = path.join(os.homedir(), '.config', 'gh', 'hosts.yml');
|
||||
const content = fs.readFileSync(hostsPath, 'utf8');
|
||||
// Match: " <account>:\n oauth_token: <token>"
|
||||
const re = new RegExp(`^\\s+${ghAccount}:\\s*\\n\\s+oauth_token:\\s*(.+)$`, 'm');
|
||||
const m = content.match(re);
|
||||
return m ? m[1].trim() : null;
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- fail-soft by contract: unreadable hosts.yml degrades to default credentials
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function gh(args: string[], env?: Record<string, string>): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
execFile('gh', args, { timeout: 30_000, env: { ...process.env, ...env } }, (err, stdout, stderr) => {
|
||||
if (err) {
|
||||
const msg = stderr?.trim() || stdout?.trim() || err.message;
|
||||
return reject(new Error(msg));
|
||||
}
|
||||
resolve(stdout.trim());
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// ── pr_gh executor (called by core through the gh-action seam) ──
|
||||
|
||||
async function handleGh(content: Record<string, unknown>, session: Session): Promise<void> {
|
||||
const description = content.description as string;
|
||||
|
||||
// Accept either `commands` (array) or `command` (string) for backwards compat
|
||||
let commands: string[];
|
||||
if (Array.isArray(content.commands) && content.commands.length > 0) {
|
||||
commands = content.commands as string[];
|
||||
} else if (typeof content.command === 'string' && content.command) {
|
||||
commands = [content.command];
|
||||
} else {
|
||||
notifyAgent(session, 'pr_gh requires command(s) and description.');
|
||||
return;
|
||||
}
|
||||
|
||||
if (!description) {
|
||||
notifyAgent(session, 'pr_gh requires command(s) and description.');
|
||||
return;
|
||||
}
|
||||
|
||||
const mg = getMessagingGroup(session.messaging_group_id!);
|
||||
if (!mg) {
|
||||
log.warn('pr_gh: messaging group not found', { sessionId: session.id });
|
||||
return;
|
||||
}
|
||||
|
||||
const adapter = getDeliveryAdapter();
|
||||
if (!adapter) {
|
||||
log.warn('pr_gh: no delivery adapter');
|
||||
return;
|
||||
}
|
||||
|
||||
const threadId = session.thread_id;
|
||||
|
||||
const preview = `\n━━━ GitHub ━━━━━━━━━━━━━━━━━━━\n\n${description}`;
|
||||
|
||||
// Post the preview text in the thread (short — full commands are in the
|
||||
// approval card). The instance arg routes through the bot identity that
|
||||
// owns this messaging group — under exact-instance dispatch an omitted
|
||||
// instance would post through the default bot.
|
||||
await adapter.deliver(
|
||||
mg.channel_type,
|
||||
mg.platform_id,
|
||||
threadId,
|
||||
'chat',
|
||||
JSON.stringify({ text: preview }),
|
||||
undefined,
|
||||
mg.instance,
|
||||
);
|
||||
|
||||
// Dismiss any existing approval cards in this thread before posting a new one
|
||||
await dismissStaleApprovals(session);
|
||||
|
||||
// Post approval card
|
||||
const approvalId = genId('appr-gh');
|
||||
const options = approvalOptions(description);
|
||||
const normalizedOptions = normalizeOptions(options);
|
||||
|
||||
createPendingApproval({
|
||||
approval_id: approvalId,
|
||||
session_id: session.id,
|
||||
request_id: approvalId,
|
||||
action: 'pr_gh',
|
||||
payload: JSON.stringify({ commands, description }),
|
||||
created_at: new Date().toISOString(),
|
||||
title: 'GitHub CLI',
|
||||
options_json: JSON.stringify(normalizedOptions),
|
||||
});
|
||||
|
||||
const commandBlock = commands.map((c) => c.replace(/```/g, "'''")).join('\n');
|
||||
const platformMsgId = await adapter.deliver(
|
||||
mg.channel_type,
|
||||
mg.platform_id,
|
||||
threadId,
|
||||
'chat-sdk',
|
||||
JSON.stringify({
|
||||
type: 'ask_question',
|
||||
questionId: approvalId,
|
||||
title: 'GitHub CLI',
|
||||
question: `${description}\n\`\`\`\n${commandBlock}\n\`\`\``,
|
||||
options,
|
||||
}),
|
||||
undefined,
|
||||
mg.instance,
|
||||
);
|
||||
if (platformMsgId) updatePendingApprovalPlatformMessageId(approvalId, platformMsgId);
|
||||
|
||||
await markAwaitingApproval(session);
|
||||
const prThread = getPrThreadBySession(session.id);
|
||||
if (prThread) prLog(prThread.pr_number, prThread.repo_full_name, 'gh_action_proposed', { commands, description });
|
||||
log.info('GH action approval card posted', { approvalId, commands, sessionId: session.id });
|
||||
}
|
||||
|
||||
// ── Approval handler (fires when human clicks Accept) ──
|
||||
|
||||
async function onGhApproved(ctx: ApprovalHandlerContext): Promise<void> {
|
||||
await clearAwaitingApproval(ctx.session);
|
||||
const prThread = getPrThreadBySession(ctx.session.id);
|
||||
|
||||
// Support both `commands` (array) and legacy `command` (string)
|
||||
let commands: string[];
|
||||
if (Array.isArray(ctx.payload.commands)) {
|
||||
commands = ctx.payload.commands as string[];
|
||||
} else {
|
||||
commands = [ctx.payload.command as string];
|
||||
}
|
||||
|
||||
// Resolve the approver's GitHub credentials. ctx.userId is namespaced
|
||||
// (`<channel>:<handle>`) and the mapping keys are too — exact match only.
|
||||
const ghAccount = getGhUserMap()[ctx.userId];
|
||||
const env: Record<string, string> = {};
|
||||
if (ghAccount) {
|
||||
const token = getGhToken(ghAccount);
|
||||
if (token) {
|
||||
env.GH_TOKEN = token;
|
||||
log.info('gh commands will run as', { ghAccount, userId: ctx.userId });
|
||||
} else {
|
||||
log.warn('gh account found in mapping but no token in hosts.yml', { ghAccount, userId: ctx.userId });
|
||||
}
|
||||
} else {
|
||||
log.warn('No gh account mapping for approver — using default credentials', { userId: ctx.userId });
|
||||
}
|
||||
|
||||
// Execute commands sequentially — stop on first failure
|
||||
const results: string[] = [];
|
||||
for (const command of commands) {
|
||||
const violation = repoViolation(command);
|
||||
if (violation) {
|
||||
log.warn('gh command refused by repo allowlist', { command, repo: violation, userId: ctx.userId });
|
||||
results.push(`\`${command}\` refused: repo \`${violation}\` is not in PR_FACTORY_GH_REPO_ALLOWLIST.`);
|
||||
break; // Stop — same contract as a failed command
|
||||
}
|
||||
|
||||
// Strip leading `gh ` — the agent writes the full command, but execFile
|
||||
// invokes the `gh` binary directly so we only pass the arguments.
|
||||
const argsStr = command.replace(/^gh\s+/, '');
|
||||
const args = argsStr.match(/(?:[^\s"']+|"[^"]*"|'[^']*')+/g) || [];
|
||||
const cleanArgs = args.map((a) => a.replace(/^["']|["']$/g, ''));
|
||||
|
||||
try {
|
||||
const output = await gh(cleanArgs, env);
|
||||
if (prThread)
|
||||
prLog(prThread.pr_number, prThread.repo_full_name, 'gh_command_executed', {
|
||||
command,
|
||||
ghAccount: ghAccount || 'default',
|
||||
});
|
||||
log.info('gh command executed', { command, ghAccount: ghAccount || 'default' });
|
||||
results.push(output ? `\`${command}\` succeeded:\n\`\`\`\n${output}\n\`\`\`` : `\`${command}\` succeeded.`);
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- a failed gh command is reported to the agent, never thrown past the loop
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
if (prThread) prLog(prThread.pr_number, prThread.repo_full_name, 'gh_command_failed', { command, error: msg });
|
||||
log.error('gh command failed', {
|
||||
command,
|
||||
err,
|
||||
prNumber: prThread?.pr_number,
|
||||
repo: prThread?.repo_full_name,
|
||||
category: 'gh-command',
|
||||
});
|
||||
|
||||
// Detect merge failures and provide actionable guidance
|
||||
const isMergeCmd = /\bpr\s+merge\b/.test(command);
|
||||
if (isMergeCmd) {
|
||||
results.push(
|
||||
[
|
||||
`\`${command}\` failed: ${msg}`,
|
||||
'',
|
||||
'This is likely due to branch protection rules or auto-merge being disabled on the repository.',
|
||||
'Post a message in the PR thread informing the author that tests passed but the merge must be performed manually by a maintainer with merge permissions.',
|
||||
'Do NOT attempt alternative merge strategies — the blocker is repository-level, not command-level.',
|
||||
].join('\n'),
|
||||
);
|
||||
} else {
|
||||
results.push(`\`${command}\` failed: ${msg}`);
|
||||
}
|
||||
break; // Stop on first failure
|
||||
}
|
||||
}
|
||||
|
||||
ctx.notify(results.join('\n\n'));
|
||||
}
|
||||
|
||||
setGhActionHandler(handleGh);
|
||||
registerApprovalHandler('pr_gh', onGhApproved);
|
||||
+305
@@ -0,0 +1,305 @@
|
||||
/**
|
||||
* gh-action-approval component guard — the modules-barrel line (`import
|
||||
* './pr-factory/gh-action-approval.js'` in src/modules/index.ts) and both
|
||||
* registrations it performs: the executor on core's gh-action seam
|
||||
* (setGhActionHandler) and the `pr_gh` approval handler
|
||||
* (registerApprovalHandler).
|
||||
*
|
||||
* Imports the REAL modules barrel and drives both registrations through
|
||||
* core's read sides: `dispatchGhAction` (the seam core's pr_gh delivery
|
||||
* action calls — falls back to an agent notification when the component is
|
||||
* absent, so the approval-card assertions go red if the barrel line is
|
||||
* deleted) and `getApprovalHandler('pr_gh')` against a PATH-shimmed fake
|
||||
* `gh` binary.
|
||||
*
|
||||
* The approval-handler cases pin: argument tokenization (quote-aware split,
|
||||
* leading `gh ` stripped), sequential stop-on-first-failure, the
|
||||
* merge-failure guidance branch, the optional PR_FACTORY_GH_REPO_ALLOWLIST
|
||||
* refusal, and the NAMESPACED gh-users mapping contract — keys are
|
||||
* `<channel>:<handle>` exactly as core reports approver ids, with NO
|
||||
* bare-id fallback, and the mapped account's token (from a HOME-sandboxed
|
||||
* ~/.config/gh/hosts.yml) reaches the subprocess as GH_TOKEN. A missing
|
||||
* data/gh-users.json must degrade to default credentials, never crash.
|
||||
*/
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { afterAll, beforeAll, beforeEach, describe, expect, it, vi, type Mock } from 'vitest';
|
||||
|
||||
vi.mock('../../container-runner.js', () => ({
|
||||
wakeContainer: vi.fn().mockResolvedValue(undefined),
|
||||
isContainerRunning: vi.fn().mockReturnValue(false),
|
||||
killContainer: vi.fn(),
|
||||
buildAgentGroupImage: vi.fn().mockResolvedValue(undefined),
|
||||
}));
|
||||
|
||||
vi.mock('../../config.js', async () => {
|
||||
const actual = await vi.importActual<typeof import('../../config.js')>('../../config.js');
|
||||
return {
|
||||
...actual,
|
||||
DATA_DIR: '/tmp/nanoclaw-test-prf-ghaction/data',
|
||||
GROUPS_DIR: '/tmp/nanoclaw-test-prf-ghaction/groups',
|
||||
};
|
||||
});
|
||||
|
||||
vi.mock('./activity-log.js', () => ({ prLog: vi.fn() }));
|
||||
|
||||
import type { ApprovalHandler, ApprovalHandlerContext } from '../approvals/primitive.js';
|
||||
import type { Session } from '../../types.js';
|
||||
|
||||
const TEST_DIR = '/tmp/nanoclaw-test-prf-ghaction';
|
||||
const GH_LOG = path.join(TEST_DIR, 'gh-calls.log');
|
||||
const GH_TOKEN_LOG = path.join(TEST_DIR, 'gh-tokens.log');
|
||||
const GH_FAIL_FLAG = path.join(TEST_DIR, 'gh-fail-flag');
|
||||
const ORIGINAL_CWD = process.cwd();
|
||||
const ORIGINAL_PATH = process.env.PATH;
|
||||
const ORIGINAL_HOME = process.env.HOME;
|
||||
|
||||
let ghHandler: ApprovalHandler;
|
||||
let dispatchGhAction: typeof import('./gh-action.js').dispatchGhAction;
|
||||
let closeDbFn: () => void;
|
||||
|
||||
const session: Session = {
|
||||
id: 'sess-gh',
|
||||
agent_group_id: 'ag-1',
|
||||
messaging_group_id: 'mg-gh',
|
||||
thread_id: 'slack:C0GH:42.1',
|
||||
agent_provider: null,
|
||||
status: 'active',
|
||||
container_status: 'idle',
|
||||
last_active: null,
|
||||
created_at: new Date().toISOString(),
|
||||
};
|
||||
|
||||
function ctx(
|
||||
payload: Record<string, unknown>,
|
||||
userId = 'slack:U0GOOD',
|
||||
): ApprovalHandlerContext & { notify: Mock<(text: string) => void> } {
|
||||
return { session, payload, userId, notify: vi.fn<(text: string) => void>() };
|
||||
}
|
||||
|
||||
function ghCalls(): string[] {
|
||||
if (!fs.existsSync(GH_LOG)) return [];
|
||||
return fs
|
||||
.readFileSync(GH_LOG, 'utf8')
|
||||
.trim()
|
||||
.split('\n')
|
||||
.filter(Boolean)
|
||||
.map((line) => line.replace(/\s+$/, ''));
|
||||
}
|
||||
|
||||
function ghTokens(): string[] {
|
||||
if (!fs.existsSync(GH_TOKEN_LOG)) return [];
|
||||
return fs.readFileSync(GH_TOKEN_LOG, 'utf8').trim().split('\n');
|
||||
}
|
||||
|
||||
beforeAll(async () => {
|
||||
fs.rmSync(TEST_DIR, { recursive: true, force: true });
|
||||
fs.mkdirSync(path.join(TEST_DIR, 'bin'), { recursive: true });
|
||||
fs.mkdirSync(path.join(TEST_DIR, 'data'), { recursive: true });
|
||||
fs.mkdirSync(path.join(TEST_DIR, 'home', '.config', 'gh'), { recursive: true });
|
||||
|
||||
// Fake gh: logs its argv tab-separated and its GH_TOKEN env, fails when
|
||||
// the flag file exists.
|
||||
const shim = [
|
||||
'#!/bin/sh',
|
||||
`LOG="${GH_LOG}"`,
|
||||
'out=""',
|
||||
'for a in "$@"; do out="$out$a\t"; done',
|
||||
'printf \'%s\\n\' "$out" >> "$LOG"',
|
||||
`printf 'TOKEN=%s\\n' "$GH_TOKEN" >> "${GH_TOKEN_LOG}"`,
|
||||
`if [ -e "${GH_FAIL_FLAG}" ]; then echo "merge blocked by branch protection" >&2; exit 1; fi`,
|
||||
'echo "gh-shim-ok"',
|
||||
].join('\n');
|
||||
fs.writeFileSync(path.join(TEST_DIR, 'bin', 'gh'), shim, { mode: 0o755 });
|
||||
process.env.PATH = `${path.join(TEST_DIR, 'bin')}:${process.env.PATH}`;
|
||||
|
||||
// HOME-sandboxed hosts.yml: the mapped account's oauth_token lives here.
|
||||
process.env.HOME = path.join(TEST_DIR, 'home');
|
||||
fs.writeFileSync(
|
||||
path.join(TEST_DIR, 'home', '.config', 'gh', 'hosts.yml'),
|
||||
[
|
||||
'github.com:',
|
||||
' users:',
|
||||
' mapped-gh-login:',
|
||||
' oauth_token: gho_test_token_123',
|
||||
' git_protocol: https',
|
||||
'',
|
||||
].join('\n'),
|
||||
);
|
||||
|
||||
// NAMESPACED mapping (D6): a correctly namespaced key for U0GOOD, plus a
|
||||
// legacy BARE key for U0BARE — which must NOT match (no strip-fallback).
|
||||
fs.writeFileSync(
|
||||
path.join(TEST_DIR, 'data', 'gh-users.json'),
|
||||
JSON.stringify({ 'slack:U0GOOD': 'mapped-gh-login', U0BARE: 'mapped-gh-login' }),
|
||||
);
|
||||
|
||||
// readEnvFile resolves .env from cwd — run from a dir guaranteed to have
|
||||
// none, so a developer's real .env can't leak into the module's env reads.
|
||||
process.chdir(TEST_DIR);
|
||||
// Core must load inert (this component registers regardless of the gate).
|
||||
for (const k of ['GITHUB_WEBHOOK_SECRET', 'PR_FACTORY_SLACK_CHANNEL_ID', 'SLACK_BOT_TOKEN']) delete process.env[k];
|
||||
// The allowlist is read at module load: prime it BEFORE the barrel import.
|
||||
process.env.PR_FACTORY_GH_REPO_ALLOWLIST = 'acme/widgets';
|
||||
|
||||
const dbMod = await import('../../db/index.js');
|
||||
const db = dbMod.initTestDb();
|
||||
dbMod.runMigrations(db);
|
||||
dbMod.createAgentGroup({ id: 'ag-1', name: 'W', folder: 'w', agent_provider: null, created_at: session.created_at });
|
||||
dbMod.createMessagingGroup({
|
||||
id: 'mg-gh',
|
||||
channel_type: 'slack',
|
||||
platform_id: 'slack:C0GH',
|
||||
name: null,
|
||||
is_group: 1,
|
||||
unknown_sender_policy: 'public',
|
||||
created_at: session.created_at,
|
||||
});
|
||||
const { createSession } = await import('../../db/sessions.js');
|
||||
createSession(session);
|
||||
const { initSessionFolder } = await import('../../session-manager.js');
|
||||
initSessionFolder('ag-1', 'sess-gh');
|
||||
closeDbFn = dbMod.closeDb;
|
||||
|
||||
await import('../index.js'); // the REAL modules barrel — the line under guard lives here
|
||||
const { getApprovalHandler } = await import('../approvals/primitive.js');
|
||||
ghHandler = getApprovalHandler('pr_gh')!;
|
||||
expect(ghHandler, 'pr_gh approval handler not registered — barrel line missing?').toBeDefined();
|
||||
dispatchGhAction = (await import('./gh-action.js')).dispatchGhAction;
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
closeDbFn?.();
|
||||
process.chdir(ORIGINAL_CWD);
|
||||
process.env.PATH = ORIGINAL_PATH;
|
||||
process.env.HOME = ORIGINAL_HOME;
|
||||
delete process.env.PR_FACTORY_GH_REPO_ALLOWLIST;
|
||||
fs.rmSync(TEST_DIR, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
fs.rmSync(GH_LOG, { force: true });
|
||||
fs.rmSync(GH_TOKEN_LOG, { force: true });
|
||||
fs.rmSync(GH_FAIL_FLAG, { force: true });
|
||||
});
|
||||
|
||||
describe('gh-action seam registration (dispatch reaches the installed executor)', () => {
|
||||
it('dispatchGhAction posts the preview + approval card instead of the not-installed fallback', async () => {
|
||||
type Delivered = { kind: string; content: Record<string, unknown>; instance?: string };
|
||||
const delivered: Delivered[] = [];
|
||||
const { setDeliveryAdapter } = await import('../../delivery.js');
|
||||
setDeliveryAdapter({
|
||||
async deliver(_channelType, _platformId, _threadId, kind, content, _files, instance) {
|
||||
delivered.push({ kind, content: JSON.parse(content) as Record<string, unknown>, instance });
|
||||
return `plat-${delivered.length}`;
|
||||
},
|
||||
});
|
||||
|
||||
await dispatchGhAction({ commands: ['gh pr view 42'], description: 'view the PR' }, session);
|
||||
|
||||
expect(delivered).toHaveLength(2);
|
||||
expect(delivered[0].kind).toBe('chat');
|
||||
expect(String(delivered[0].content.text)).toContain('view the PR');
|
||||
expect(delivered[1].kind).toBe('chat-sdk');
|
||||
expect(delivered[1].content.type).toBe('ask_question');
|
||||
expect(String(delivered[1].content.question)).toContain('gh pr view 42');
|
||||
// The card routes through the messaging group's instance (worker default).
|
||||
expect(delivered[1].instance).toBe('slack');
|
||||
|
||||
const { getPendingApprovalsBySessionAction, deletePendingApprovalsBySessionAction } =
|
||||
await import('../../db/sessions.js');
|
||||
const pending = getPendingApprovalsBySessionAction('sess-gh', 'pr_gh');
|
||||
expect(pending).toHaveLength(1);
|
||||
expect(JSON.parse(pending[0].payload!)).toEqual({ commands: ['gh pr view 42'], description: 'view the PR' });
|
||||
deletePendingApprovalsBySessionAction('sess-gh', 'pr_gh');
|
||||
});
|
||||
});
|
||||
|
||||
describe('pr_gh approval handler', () => {
|
||||
it('tokenizes the command (strips leading gh, keeps quoted args whole) and reports success', async () => {
|
||||
const c = ctx({ commands: ['gh pr comment 42 --body "hello world"'], description: 'comment' });
|
||||
await ghHandler(c);
|
||||
|
||||
expect(ghCalls()).toEqual(['pr\tcomment\t42\t--body\thello world']);
|
||||
expect(c.notify).toHaveBeenCalledTimes(1);
|
||||
expect(String(c.notify.mock.calls[0][0])).toContain('succeeded');
|
||||
});
|
||||
|
||||
it('maps a NAMESPACED approver id to gh credentials (GH_TOKEN from the sandboxed hosts.yml)', async () => {
|
||||
const c = ctx({ commands: ['gh pr view 42'], description: 'view' }, 'slack:U0GOOD');
|
||||
await ghHandler(c);
|
||||
|
||||
expect(ghCalls()).toEqual(['pr\tview\t42']);
|
||||
expect(ghTokens()).toEqual(['TOKEN=gho_test_token_123']);
|
||||
});
|
||||
|
||||
it('does NOT strip-match a bare legacy key — unmapped approvers run with default credentials', async () => {
|
||||
// gh-users.json has the BARE key "U0BARE"; the click reports "slack:U0BARE".
|
||||
const c = ctx({ command: 'gh pr view 42', description: 'view' }, 'slack:U0BARE');
|
||||
await ghHandler(c);
|
||||
|
||||
expect(ghCalls()).toEqual(['pr\tview\t42']); // legacy single `command` string still executes
|
||||
expect(ghTokens()).toEqual(['TOKEN=']);
|
||||
});
|
||||
|
||||
it('stops on first failure and surfaces merge-failure guidance', async () => {
|
||||
fs.writeFileSync(GH_FAIL_FLAG, '1');
|
||||
const c = ctx({
|
||||
commands: ['gh pr merge 42 --squash', 'gh pr comment 42 --body "after"'],
|
||||
description: 'merge then comment',
|
||||
});
|
||||
await ghHandler(c);
|
||||
|
||||
// Only the first command ran.
|
||||
expect(ghCalls()).toEqual(['pr\tmerge\t42\t--squash']);
|
||||
const text = String(c.notify.mock.calls[0][0]);
|
||||
expect(text).toContain('failed');
|
||||
expect(text).toContain('branch protection');
|
||||
expect(text).not.toContain('after');
|
||||
});
|
||||
|
||||
it('refuses a command referencing a repo outside PR_FACTORY_GH_REPO_ALLOWLIST before executing it', async () => {
|
||||
const c = ctx({
|
||||
commands: ['gh api repos/evil/exfil/dispatches', 'gh pr comment 42 --body "after"'],
|
||||
description: 'api call',
|
||||
});
|
||||
await ghHandler(c);
|
||||
|
||||
expect(ghCalls()).toEqual([]); // nothing executed
|
||||
const text = String(c.notify.mock.calls[0][0]);
|
||||
expect(text).toContain('refused');
|
||||
expect(text).toContain('evil/exfil');
|
||||
});
|
||||
|
||||
it('allowlisted repo references pass', async () => {
|
||||
const c = ctx({ commands: ['gh pr merge 42 -R acme/widgets'], description: 'merge' });
|
||||
await ghHandler(c);
|
||||
expect(ghCalls()).toEqual(['pr\tmerge\t42\t-R\tacme/widgets']);
|
||||
});
|
||||
|
||||
it('a missing data/gh-users.json degrades to default credentials without crashing', async () => {
|
||||
// Fresh module generation with no mapping file: the import and the
|
||||
// handler must both survive its absence (lazy + fail-soft read).
|
||||
vi.resetModules();
|
||||
fs.rmSync(path.join(TEST_DIR, 'data', 'gh-users.json'), { force: true });
|
||||
|
||||
const dbMod = await import('../../db/index.js');
|
||||
const db = dbMod.initTestDb();
|
||||
dbMod.runMigrations(db);
|
||||
try {
|
||||
await import('../index.js');
|
||||
const { getApprovalHandler } = await import('../approvals/primitive.js');
|
||||
const handler = getApprovalHandler('pr_gh')!;
|
||||
expect(handler).toBeDefined();
|
||||
|
||||
const c = ctx({ command: 'gh pr view 7', description: 'view' });
|
||||
await handler(c);
|
||||
|
||||
expect(ghCalls()).toEqual(['pr\tview\t7']);
|
||||
expect(ghTokens()).toEqual(['TOKEN=']);
|
||||
expect(String(c.notify.mock.calls[0][0])).toContain('succeeded');
|
||||
} finally {
|
||||
dbMod.closeDb();
|
||||
}
|
||||
});
|
||||
});
|
||||
+4
@@ -0,0 +1,4 @@
|
||||
{
|
||||
"slack:U0XXXXXXX": "first-approver-gh-login",
|
||||
"slack:U0YYYYYYY": "second-approver-gh-login"
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
# Remove pr-factory-core
|
||||
|
||||
Reverses every change the apply made. Remove dependent components first: `gh-action-approval`, `vm-test-orchestrator`, and `slack-canvas` all import this component's seam files and break the build once these files are gone.
|
||||
|
||||
## 1. Delete the copied files
|
||||
|
||||
```bash
|
||||
rm -rf src/modules/pr-factory
|
||||
rm -f src/db/pr-threads.ts
|
||||
rm -f src/db/migrations/module-pr-factory-pr-threads-v2.ts
|
||||
rm -f src/db/pr-threads.test.ts
|
||||
rm -f src/db/sessions-approval-helpers.test.ts
|
||||
rm -f src/modules/approvals/response-handler-reject.test.ts
|
||||
rm -f container/agent-runner/src/mcp-tools/pr-factory.ts
|
||||
rm -f container/agent-runner/src/mcp-tools/pr-factory-registration.test.ts
|
||||
rm -f container/agent-runner/src/mcp-tools/pr-factory-tools.test.ts
|
||||
```
|
||||
|
||||
(`rm -rf src/modules/pr-factory` removes the module's five tests along with the sources.)
|
||||
|
||||
## 2. Delete the barrel lines
|
||||
|
||||
- `src/modules/index.ts` — delete the line `import './pr-factory/index.js';`
|
||||
- `container/agent-runner/src/mcp-tools/index.ts` — delete the line `import './pr-factory.js';`
|
||||
- `src/db/migrations/index.ts` — delete the `modulePrFactoryPrThreadsV2` import line and its entry in the `migrations` array.
|
||||
|
||||
## 3. Delete the sessions.ts helpers
|
||||
|
||||
In `src/db/sessions.ts`, delete the four functions the apply appended: `getPendingApprovalsBySessionAction`, `getPendingApprovalsBySession`, `updatePendingApprovalPlatformMessageId`, `deletePendingApprovalsBySessionAction`. (First confirm nothing else now imports them: `grep -rn 'PendingApprovalsBySession\|updatePendingApprovalPlatformMessageId' src/ --include='*.ts'` should return only sessions.ts.)
|
||||
|
||||
## 4. Uninstall the dependency
|
||||
|
||||
```bash
|
||||
pnpm remove undici
|
||||
```
|
||||
|
||||
## 5. Remove the environment lines
|
||||
|
||||
Delete from `.env` (the GitHub webhook then 404s — also delete the webhook in the GitHub repo settings):
|
||||
|
||||
```
|
||||
GITHUB_WEBHOOK_SECRET
|
||||
PR_FACTORY_SLACK_CHANNEL_ID
|
||||
PR_FACTORY_SUPERVISOR_SLACK_CHANNEL_ID
|
||||
PR_FACTORY_DEFAULT_REPO
|
||||
PR_FACTORY_REPO_MIRROR_DIR
|
||||
PR_FACTORY_REVIEW_SKILL
|
||||
```
|
||||
|
||||
## 6. Data and DB choreography (read before deleting anything)
|
||||
|
||||
- **Migration row**: `module-pr-factory-pr-threads-v2` stays recorded in `schema_version` forever, harmlessly — the runner dedupes by name and nothing re-reads it. Do NOT delete the row; a future re-apply relies on name-keyed dedupe semantics either way.
|
||||
- **`pr_threads` table**: dropping it is data-destructive (it is the only index from PR numbers to threads/sessions). Leave it in place unless you are certain the install never returns to PR Factory; then drop it through the sanctioned query wrapper — `pnpm exec tsx scripts/q.ts data/v2.db "DROP TABLE pr_threads;"` — in a maintenance window, never while the host is running (and never via the `sqlite3` binary).
|
||||
- **Agent groups / messaging groups / wirings**: bootstrap-created rows (`pr-factory-worker`, `pr-factory-supervisor` agent groups; the PR-channel messaging groups for the `slack`, `slack-supervisor`, `slack-tester` instances; the supervisor admin-channel group) are operator data — remove them with `pnpm run ncl` group/wiring verbs if desired. The seeded `groups/pr-factory-worker/CLAUDE.local.md` and `groups/pr-factory-supervisor/CLAUDE.local.md` may carry operator edits; archive before deleting.
|
||||
- **Pending approvals**: any open `pr_*` cards become dead clicks after removal. Resolve or dismiss outstanding cards before stopping the module, or delete the rows through the sanctioned query wrapper — `pnpm exec tsx scripts/q.ts data/v2.db "DELETE FROM pending_approvals WHERE action LIKE 'pr_%';"` — and accept the stale Slack cards.
|
||||
- **Activity logs**: `data/pr-activity/` is append-only operator data; keep or archive.
|
||||
|
||||
## 7. Rebuild and restart
|
||||
|
||||
```bash
|
||||
./container/build.sh
|
||||
launchctl kickstart -k gui/$(id -u)/com.nanoclaw # macOS
|
||||
# systemctl --user restart nanoclaw # Linux
|
||||
```
|
||||
|
||||
## 8. Validate
|
||||
|
||||
> **Skip this step during full-recipe removal.** When you are removing the whole PR Factory (per the recipe REMOVE.md), the other components are still mid-teardown and reference seams this component owns — a build here is expected to be red. Only the recipe-level validation at the end binds. Run the block below only when removing `pr-factory-core` in isolation.
|
||||
|
||||
```bash
|
||||
pnpm run build && pnpm test
|
||||
pnpm exec tsc -p container/agent-runner/tsconfig.json --noEmit
|
||||
cd container/agent-runner && bun test; cd ../..
|
||||
```
|
||||
|
||||
All suites green, with the pr-factory tests gone from the run.
|
||||
@@ -0,0 +1,300 @@
|
||||
---
|
||||
name: pr-factory-core
|
||||
description: PR Factory component — the engine. GitHub pull_request webhook → per-PR Slack thread + worker agent session (triage/review/test-plan via default group instructions), supervisor bootstrap with skill-edit approval flow, send-to-testing approval gate, test-result coordination, pr_threads index, and four container MCP tools. Ships seams for the optional gh-action-approval, vm-test-orchestrator, and slack-canvas components.
|
||||
---
|
||||
|
||||
# pr-factory-core (PR Factory component)
|
||||
|
||||
The PR Factory engine, as a host module (`src/modules/pr-factory/`) plus a container MCP-tool module:
|
||||
|
||||
- **GitHub webhook** (`webhook.ts`) — mounts `/webhook/github` on core's shared webhook server via `registerWebhookHandler`, verifies HMAC-SHA256 (`GITHUB_WEBHOOK_SECRET`), filters `pull_request` events (opened / synchronize / closed / ready_for_review / converted_to_draft).
|
||||
- **Per-PR sessions** (`handler.ts`) — each opened PR gets a Slack thread (posted by the worker bot), a `pr_threads` row, and a per-thread session under the **PR Factory Worker** agent group, seeded with the diff and a `[PR_CONTEXT: …]` trigger. `synchronize` kills + re-creates the session in the same thread. GitHub reads ride the OneCLI gateway proxy when available (vault-stored PAT, never an env var) and fall back to direct unauthenticated calls.
|
||||
- **Bootstrap** (`bootstrap.ts`) — idempotent, self-correcting setup: worker agent group (default triage/review/test-plan instructions seeded into `groups/pr-factory-worker/CLAUDE.local.md`), default-instance worker messaging group, optional supervisor agent group with `slack-supervisor`-instance messaging groups (admin channel + PR channel), and a `slack-tester`-instance messaging group when the operator-created `pr-tester` agent group exists.
|
||||
- **Approval flows** — send-to-testing (`testing-approval.ts`, plan file → card → human gate), retry-after-failure, and supervisor skill edits (`skill-edit-approval.ts`, diff preview → card → write on approve). One active card per thread (`dismiss-approvals.ts`); a 👀 reaction marks awaiting-approval threads and clears on resolution — the reject path is observed through core's `registerApprovalResolvedHandler` hook.
|
||||
- **Coordination** (`orchestrator.ts`) — wakes the tester agent when a VM is ready, enforces a 30-minute run timeout, posts results, wakes the worker to propose merge (PASS) or analyze (FAIL/PARTIAL).
|
||||
- **Container MCP tools** (`container/agent-runner/src/mcp-tools/pr-factory.ts`) — `propose_skill_edit`, `send_to_testing`, `credentialed_gh`, `submit_test_results`. Each emits a `pr_*` system action via messages_out; the host module registers the four matching delivery actions.
|
||||
|
||||
Inert without `GITHUB_WEBHOOK_SECRET`: the module loads (approval handlers bind) but registers no delivery actions and mounts no webhook.
|
||||
|
||||
## Component seams (cross-component contract)
|
||||
|
||||
Core degrades gracefully where a sibling component owns the capability. Keep these exports stable — the named components register against them at import time:
|
||||
|
||||
| Seam (core file) | Exports | Registered by | Without it |
|
||||
|---|---|---|---|
|
||||
| `gh-action.ts` | `setGhActionHandler`, `GhActionHandler` | `gh-action-approval` | `credentialed_gh` calls notify the agent that the component is missing |
|
||||
| `test-orchestration.ts` | `registerTestOrchestrator`, `TestOrchestratorModule`, `TestRun`, `OrchestratorCallbacks` | `vm-test-orchestrator` | approved test plans answer "no test orchestrator installed"; orchestrator init is skipped |
|
||||
| `canvas.ts` | `registerCanvasProvider`, `createCanvas`, `CanvasResult` | `slack-canvas` | test plans/results post as plain text + `.md` file upload |
|
||||
|
||||
Core also imports `SUPERVISOR_INSTANCE` / `TESTER_INSTANCE` from the `slack-bots` component's adapters — that component must be applied first.
|
||||
|
||||
Integration surface: one appended line in `src/modules/index.ts`, one inserted line in `container/agent-runner/src/mcp-tools/index.ts`, one import + array entry in `src/db/migrations/index.ts`, a four-helper append in `src/db/sessions.ts`, and one pinned dependency (`undici`). Everything else is added files.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Probe each before applying; stop on a failed probe and do what it names first.
|
||||
|
||||
1. **`/add-slack` is applied** (worker bot):
|
||||
|
||||
```bash
|
||||
test -f src/channels/slack.ts && echo OK
|
||||
```
|
||||
|
||||
If it fails: run `/add-slack`, then return here.
|
||||
|
||||
2. **The `slack-bots` component is applied** (instance constants this module imports):
|
||||
|
||||
```bash
|
||||
grep -q "SUPERVISOR_INSTANCE = 'slack-supervisor'" src/channels/slack-supervisor.ts \
|
||||
&& grep -q "TESTER_INSTANCE = 'slack-tester'" src/channels/slack-tester.ts && echo OK
|
||||
```
|
||||
|
||||
If it fails: apply the `slack-bots` component first.
|
||||
|
||||
3. **Core ships the approval-resolved hook**:
|
||||
|
||||
```bash
|
||||
grep -q 'registerApprovalResolvedHandler' src/modules/approvals/primitive.ts && echo OK
|
||||
```
|
||||
|
||||
If it fails: **stop — core is missing the approval-resolved hook; update to nanoclaw ≥ 2.1.11 first.** This skill makes no core edits to substitute for it.
|
||||
|
||||
4. **Core ships the delivery-action getter**:
|
||||
|
||||
```bash
|
||||
grep -q 'export function getDeliveryAction' src/delivery.ts && echo OK
|
||||
```
|
||||
|
||||
If it fails: **stop — core is missing the delivery-action getter; update to nanoclaw ≥ 2.1.11 first.**
|
||||
|
||||
5. **Core ships the raw webhook registry**:
|
||||
|
||||
```bash
|
||||
grep -q 'export function registerWebhookHandler' src/webhook-server.ts && echo OK
|
||||
```
|
||||
|
||||
If it fails: **stop — core is missing the raw webhook registry; update to nanoclaw ≥ 2.1.11 first.** This skill mounts `/webhook/github` through that registry and makes no webhook-server edits.
|
||||
|
||||
6. **Core ships the channel-instance substrate**:
|
||||
|
||||
```bash
|
||||
test -f src/db/migrations/016-messaging-group-instance.ts && echo OK
|
||||
```
|
||||
|
||||
If it fails: **stop — core is missing the channel-instance substrate; update to nanoclaw ≥ 2.1.11 first.**
|
||||
|
||||
Each step below is idempotent: if the file already contains the patched form, leave it as is and continue.
|
||||
|
||||
## Apply
|
||||
|
||||
All copy sources are under this component's folder; run every command from the repo root:
|
||||
|
||||
```bash
|
||||
SKILL=.claude/skills/recipes/pr-factory/skills/pr-factory-core
|
||||
```
|
||||
|
||||
### 1. Copy the host module
|
||||
|
||||
```bash
|
||||
mkdir -p src/modules/pr-factory
|
||||
for f in index bootstrap handler webhook orchestrator testing-approval skill-edit-approval \
|
||||
dismiss-approvals reactions activity-log defaults supervisor \
|
||||
worker-instructions canvas test-orchestration gh-action; do
|
||||
cp $SKILL/files/src/modules/pr-factory/$f.ts src/modules/pr-factory/$f.ts
|
||||
done
|
||||
```
|
||||
|
||||
### 2. Copy the DB layer
|
||||
|
||||
```bash
|
||||
cp $SKILL/files/src/db/pr-threads.ts src/db/pr-threads.ts
|
||||
cp $SKILL/files/src/db/migrations/module-pr-factory-pr-threads-v2.ts src/db/migrations/module-pr-factory-pr-threads-v2.ts
|
||||
```
|
||||
|
||||
### 3. Register the migration (`src/db/migrations/index.ts`)
|
||||
|
||||
**3a.** Append to the import block (skip if already present):
|
||||
|
||||
```typescript
|
||||
import { modulePrFactoryPrThreadsV2 } from './module-pr-factory-pr-threads-v2.js';
|
||||
```
|
||||
|
||||
**3b.** Append `modulePrFactoryPrThreadsV2,` as the **last** entry of the `migrations` array. The migration name carries a `-v2` suffix deliberately: the runner dedupes by name, and installs upgraded from the legacy bot_id substrate already have `'module-pr-factory-pr-threads'` recorded — the new name is what makes the bot_id-column drop run there. Never rename it back.
|
||||
|
||||
### 4. Append the pending-approvals helpers (`src/db/sessions.ts`)
|
||||
|
||||
Insert after `getPendingApprovalsByAction` (skip any helper already present):
|
||||
|
||||
```typescript
|
||||
export function getPendingApprovalsBySessionAction(sessionId: string, action: string): PendingApproval[] {
|
||||
return getDb()
|
||||
.prepare('SELECT * FROM pending_approvals WHERE session_id = ? AND action = ?')
|
||||
.all(sessionId, action) as PendingApproval[];
|
||||
}
|
||||
|
||||
export function getPendingApprovalsBySession(sessionId: string): PendingApproval[] {
|
||||
return getDb()
|
||||
.prepare('SELECT * FROM pending_approvals WHERE session_id = ? AND status = ?')
|
||||
.all(sessionId, 'pending') as PendingApproval[];
|
||||
}
|
||||
|
||||
export function updatePendingApprovalPlatformMessageId(approvalId: string, platformMessageId: string): void {
|
||||
getDb()
|
||||
.prepare('UPDATE pending_approvals SET platform_message_id = ? WHERE approval_id = ?')
|
||||
.run(platformMessageId, approvalId);
|
||||
}
|
||||
|
||||
export function deletePendingApprovalsBySessionAction(sessionId: string, action: string): number {
|
||||
const result = getDb()
|
||||
.prepare('DELETE FROM pending_approvals WHERE session_id = ? AND action = ?')
|
||||
.run(sessionId, action);
|
||||
return result.changes;
|
||||
}
|
||||
```
|
||||
|
||||
### 5. Append the modules-barrel line (`src/modules/index.ts`)
|
||||
|
||||
(Skip if already present.)
|
||||
|
||||
```typescript
|
||||
import './pr-factory/index.js';
|
||||
```
|
||||
|
||||
### 6. Container MCP tools
|
||||
|
||||
```bash
|
||||
cp $SKILL/files/container/agent-runner/src/mcp-tools/pr-factory.ts container/agent-runner/src/mcp-tools/pr-factory.ts
|
||||
```
|
||||
|
||||
In `container/agent-runner/src/mcp-tools/index.ts`, insert a side-effect import **before** the `startMcpServer` import line (skip if already present):
|
||||
|
||||
```typescript
|
||||
import './pr-factory.js';
|
||||
```
|
||||
|
||||
### 7. Install the host dependency
|
||||
|
||||
```bash
|
||||
pnpm add undici@8.1.0 --save-exact
|
||||
```
|
||||
|
||||
(`handler.ts` needs undici's own `fetch` + `ProxyAgent` — Node's built-in fetch rejects an external dispatcher. Keep the pin exact.)
|
||||
|
||||
### 8. Copy the guard tests
|
||||
|
||||
```bash
|
||||
cp $SKILL/files/src/modules/pr-factory/bootstrap.test.ts src/modules/pr-factory/bootstrap.test.ts
|
||||
cp $SKILL/files/src/modules/pr-factory/webhook.test.ts src/modules/pr-factory/webhook.test.ts
|
||||
cp $SKILL/files/src/modules/pr-factory/registration.test.ts src/modules/pr-factory/registration.test.ts
|
||||
cp $SKILL/files/src/modules/pr-factory/handler.test.ts src/modules/pr-factory/handler.test.ts
|
||||
cp $SKILL/files/src/modules/pr-factory/orchestrator.test.ts src/modules/pr-factory/orchestrator.test.ts
|
||||
cp $SKILL/files/src/modules/approvals/response-handler-reject.test.ts src/modules/approvals/response-handler-reject.test.ts
|
||||
cp $SKILL/files/src/db/pr-threads.test.ts src/db/pr-threads.test.ts
|
||||
cp $SKILL/files/src/db/sessions-approval-helpers.test.ts src/db/sessions-approval-helpers.test.ts
|
||||
cp $SKILL/files/container/agent-runner/src/mcp-tools/pr-factory-registration.test.ts container/agent-runner/src/mcp-tools/pr-factory-registration.test.ts
|
||||
cp $SKILL/files/container/agent-runner/src/mcp-tools/pr-factory-tools.test.ts container/agent-runner/src/mcp-tools/pr-factory-tools.test.ts
|
||||
```
|
||||
|
||||
| Test | Guards |
|
||||
|------|--------|
|
||||
| `src/modules/pr-factory/registration.test.ts` | The modules-barrel line via the REAL barrel, the four `pr_*` delivery actions + three core approval handlers (read-side registries), the `GITHUB_WEBHOOK_SECRET` env gate, the host-side `PR_FACTORY_DEFAULT_REPO` contract, and the gh-action seam's not-installed fallback |
|
||||
| `src/modules/pr-factory/bootstrap.test.ts` | Bootstrap's consumption of the entity-model writers on the real migrated schema: instance-keyed messaging groups (worker default, supervisor/tester named), wiring modes, instruction seeding, idempotence, foreign-wiring drop, drift self-correction |
|
||||
| `src/modules/pr-factory/webhook.test.ts` | `registerGitHubWebhook`'s mount on core's raw webhook registry over real HTTP: HMAC accept/reject, event filtering, 405, throwing-handler → 500 |
|
||||
| `src/modules/pr-factory/handler.test.ts` | handler's consumption of resolveSession / writeSessionMessage / pr_threads on real session DBs: the PR_CONTEXT trigger contract, the default triage directive, synchronize kill/re-create, draft deferral, redelivery no-op |
|
||||
| `src/modules/pr-factory/orchestrator.test.ts` | The two-DB seam: writeOutboundDirect into worker outbound.db, tester-instance session resolution + inbound trigger + wake, PASS/FAIL verdict branching |
|
||||
| `src/modules/approvals/response-handler-reject.test.ts` | The module's `registerApprovalResolvedHandler` registration through the REAL response handler (with a role-seeded clicking admin — pins the authorization requirement too) |
|
||||
| `src/db/pr-threads.test.ts` | Migration barrel presence (real `runMigrations`), v2 schema shape (no bot column), the legacy-upgrade recreate arm, idempotence, CRUD |
|
||||
| `src/db/sessions-approval-helpers.test.ts` | The four appended sessions.ts helpers |
|
||||
| `container/.../pr-factory-registration.test.ts` | The container mcp-tools barrel line (AST: side-effect import before `startMcpServer()`) |
|
||||
| `container/.../pr-factory-tools.test.ts` | The four tools' messages_out contract: exact `pr_*` action strings (paired with the host registrations), odd-seq, repo-omission, command normalization |
|
||||
|
||||
## Configuration
|
||||
|
||||
### One factory instance serves one repository
|
||||
|
||||
A PR Factory instance serves a **single repository** — the one named in `PR_FACTORY_DEFAULT_REPO`. Point its GitHub webhook at that one repo, and all run state (sessions, VMs, the 30-minute timeouts) is keyed per-PR *within* that repo: PR numbers are unique inside a repo but collide across repos, so a single instance cannot safely serve more than one. To cover multiple repositories, run multiple factory instances (separate channels, bots, and `PR_FACTORY_DEFAULT_REPO` values).
|
||||
|
||||
### Environment (`.env`)
|
||||
|
||||
```bash
|
||||
GITHUB_WEBHOOK_SECRET=<webhook secret> # required — module is inert without it
|
||||
PR_FACTORY_SLACK_CHANNEL_ID=C0XXXXXXX # bare Slack channel id for PR threads
|
||||
PR_FACTORY_SUPERVISOR_SLACK_CHANNEL_ID=C0YYYYYYY # optional — enables the supervisor
|
||||
PR_FACTORY_DEFAULT_REPO=acme/widgets # repo assumed when MCP calls omit `repo`
|
||||
PR_FACTORY_REPO_MIRROR_DIR=data/repo-mirror # optional — local clone refreshed before each triage
|
||||
PR_FACTORY_REVIEW_SKILL= # optional — see "Review workflow" below
|
||||
```
|
||||
|
||||
`SLACK_BOT_TOKEN` is reused from `/add-slack`; the supervisor's credentials come from the `slack-bots` component.
|
||||
|
||||
If `NANOCLAW_EGRESS_LOCKDOWN` is enabled (default off), worker containers cannot reach GitHub and tester containers cannot reach test VMs — leave it off for PR Factory groups or allowlist those hosts.
|
||||
|
||||
### GitHub webhook
|
||||
|
||||
In the repository (or org) settings, add a webhook: Payload URL `https://your-domain/webhook/github`, content type `application/json`, secret = `GITHUB_WEBHOOK_SECRET`, events: **Pull requests** only.
|
||||
|
||||
### GitHub credential — read-only PAT (required)
|
||||
|
||||
The GitHub credential the factory injects into the **worker** and **tester** containers (and the one the host's diff/stats fetches ride through the OneCLI gateway) **must be a fine-grained, read-only Personal Access Token**. Mint it scoped to the single repository this factory serves (`PR_FACTORY_DEFAULT_REPO`) with exactly these permissions — nothing else:
|
||||
|
||||
- **Contents: Read-only**
|
||||
- **Pull requests: Read-only**
|
||||
- **Metadata: Read-only** (mandatory baseline for any fine-grained token)
|
||||
|
||||
Do **not** grant Contents/Pull-requests write, Administration, or merge. This is the load-bearing security boundary: the worker reads the diff and runs read-only `gh` lookups with this token, but **every write — comment, label, approve, merge, close — goes exclusively through the human-approved `credentialed_gh` path** (the `gh-action-approval` component), which executes under the approving human's own gh identity, never this token. A worker (or a tester, or a prompt-injected PR diff) that tries to write directly is refused by GitHub because the injected token has no write scope. Provisioning a write-capable token here collapses that boundary — the approval card stops being the only thing standing between an autonomous agent and your `main`.
|
||||
|
||||
Store the token in the OneCLI vault with a host pattern for `api.github.com` so the gateway injects it per request; it is never placed in `.env` or passed to the container as an env var. See `init-onecli`.
|
||||
|
||||
### Grant approver roles (required)
|
||||
|
||||
Core's approval-click authorization (`isAuthorizedApprovalClick`) silently ignores card clicks from users without a `user_roles` row — the symptom is a card that does nothing, with only a host-log warning. Grant every human who will click PR Factory approval cards an owner/admin role, e.g.:
|
||||
|
||||
```bash
|
||||
pnpm run ncl roles grant --user 'slack:U0XXXXXXX' --role admin
|
||||
```
|
||||
|
||||
(or the equivalent `grantRole` call from `src/modules/permissions/db/user-roles.ts`). User ids are namespaced `<channel>:<handle>`; the user must already exist in the `users` table (they do after their first message on the channel).
|
||||
|
||||
### Review workflow (operator override point)
|
||||
|
||||
The worker's triage/review/test-plan workflow ships as **default group instructions**, seeded once into `groups/pr-factory-worker/CLAUDE.local.md` on first bootstrap and never overwritten — edit that file to tune trusted contributors, merge policy, and review depth for your repo. Operators who maintain their own container skill instead write it to `container/skills/<skill-name>/` and set `PR_FACTORY_REVIEW_SKILL=<skill-name>`: every PR trigger then opens with `Use the /<skill-name> skill …` and the seeded defaults are ignored. The worker group's container config keeps the default `skills: 'all'`, so a new `container/skills/` folder reaches the worker on its next container start with no config change — only edit the group's `skills` selection if it uses an explicit allowlist instead of `'all'`.
|
||||
|
||||
### `gh` in the worker container (required by the default workflow)
|
||||
|
||||
The default worker workflow runs read-only `gh` lookups (viewing PRs, listing checks, fetching user info) inside the worker container. **The stock agent image does not ship `gh`** — its apt block installs `git`, `curl`, `chromium`, etc., and the Node-CLI block installs `claude-code` / `agent-browser` / `vercel`, but not the GitHub CLI. So either:
|
||||
|
||||
- add `gh` to the worker container (pin it via a new `ARG` in `container/Dockerfile`'s apt or Node-CLI block, then `./container/build.sh`), **or**
|
||||
- replace the default workflow with a review skill (`PR_FACTORY_REVIEW_SKILL`) that uses the GitHub REST API through the OneCLI proxy instead of `gh`.
|
||||
|
||||
Whichever you choose, the read-only GitHub credential (the fine-grained read-only PAT above, injected by the OneCLI gateway) must be reachable from the container so `gh`/REST calls authenticate. Writes still go only through `credentialed_gh` — the in-container `gh` is read-only by token scope.
|
||||
|
||||
### Tester agent group (optional)
|
||||
|
||||
Create an agent group with folder `pr-tester` (its instructions describe YOUR test environment, so they don't ship here). On the next bootstrap run the module wires it to the PR channel under the `slack-tester` instance. Test execution additionally requires the `vm-test-orchestrator` component.
|
||||
|
||||
## Finish
|
||||
|
||||
Rebuild the agent image so containers pick up the MCP tools, then restart the host:
|
||||
|
||||
```bash
|
||||
./container/build.sh
|
||||
launchctl kickstart -k gui/$(id -u)/com.nanoclaw # macOS
|
||||
# systemctl --user restart nanoclaw # Linux
|
||||
```
|
||||
|
||||
## Known smells (declared)
|
||||
|
||||
- **Host-side Slack calls outside the adapter.** `handler.ts` (thread opener) and `reactions.ts` call the Slack web API directly with `SLACK_BOT_TOKEN` instead of going through the channel adapter — the opener must return a `ts` synchronously to key the thread, which the adapter's deliver path doesn't expose. Lives entirely in skill-owned files.
|
||||
- **GitHub credentials.** Host-side GitHub reads ride the OneCLI gateway (vault PAT injected per request — the sanctioned path) with a documented unauthenticated fallback. The `gh-action-approval` component's command execution threads gh tokens via process env from `~/.config/gh/hosts.yml` — declared in that component, with OneCLI as the stated direction.
|
||||
- **Single-slot seams.** `canvas.ts` / `test-orchestration.ts` / `gh-action.ts` hold one provider each — a second registrant clobbers the first. Acceptable while exactly one component implements each; revisit if that changes.
|
||||
|
||||
## Validate
|
||||
|
||||
```bash
|
||||
pnpm run build
|
||||
pnpm test
|
||||
pnpm exec tsc -p container/agent-runner/tsconfig.json --noEmit
|
||||
cd container/agent-runner && bun test; cd ../..
|
||||
```
|
||||
|
||||
All suites green. Any failure means a step didn't apply cleanly.
|
||||
@@ -0,0 +1,35 @@
|
||||
# pr-factory-core — files this component owns outright: the host module,
|
||||
# the pr_threads DB layer + migration, the container MCP tools, and the ten
|
||||
# guard tests. The two barrel lines (src/modules/index.ts, container
|
||||
# mcp-tools/index.ts), the migrations-barrel insert, the sessions.ts
|
||||
# four-helper append, and the undici dependency are applied as edits per
|
||||
# SKILL.md, not as file copies.
|
||||
src/modules/pr-factory/index.ts
|
||||
src/modules/pr-factory/bootstrap.ts
|
||||
src/modules/pr-factory/handler.ts
|
||||
src/modules/pr-factory/webhook.ts
|
||||
src/modules/pr-factory/orchestrator.ts
|
||||
src/modules/pr-factory/testing-approval.ts
|
||||
src/modules/pr-factory/skill-edit-approval.ts
|
||||
src/modules/pr-factory/dismiss-approvals.ts
|
||||
src/modules/pr-factory/reactions.ts
|
||||
src/modules/pr-factory/activity-log.ts
|
||||
src/modules/pr-factory/defaults.ts
|
||||
src/modules/pr-factory/supervisor.ts
|
||||
src/modules/pr-factory/worker-instructions.ts
|
||||
src/modules/pr-factory/canvas.ts
|
||||
src/modules/pr-factory/test-orchestration.ts
|
||||
src/modules/pr-factory/gh-action.ts
|
||||
src/db/pr-threads.ts
|
||||
src/db/migrations/module-pr-factory-pr-threads-v2.ts
|
||||
container/agent-runner/src/mcp-tools/pr-factory.ts
|
||||
src/modules/pr-factory/bootstrap.test.ts
|
||||
src/modules/pr-factory/webhook.test.ts
|
||||
src/modules/pr-factory/registration.test.ts
|
||||
src/modules/pr-factory/handler.test.ts
|
||||
src/modules/pr-factory/orchestrator.test.ts
|
||||
src/modules/approvals/response-handler-reject.test.ts
|
||||
src/db/pr-threads.test.ts
|
||||
src/db/sessions-approval-helpers.test.ts
|
||||
container/agent-runner/src/mcp-tools/pr-factory-registration.test.ts
|
||||
container/agent-runner/src/mcp-tools/pr-factory-tools.test.ts
|
||||
+50
@@ -0,0 +1,50 @@
|
||||
/**
|
||||
* pr-factory-core guard — the mcp-tools barrel line.
|
||||
*
|
||||
* The handlers are behavior-tested in pr-factory-tools.test.ts, but that does
|
||||
* not prove the module is registered: delete the barrel import and the six
|
||||
* tools simply never appear, yet the handler test stays green. The barrel
|
||||
* (mcp-tools/index.ts) calls startMcpServer() at import, so it can't be
|
||||
* imported in a test — per the add-atomic-chat-tool precedent the
|
||||
* registration is asserted structurally: a side-effect ImportDeclaration of
|
||||
* './pr-factory.js' must exist BEFORE the startMcpServer() statement.
|
||||
*/
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
|
||||
import { describe, expect, it } from 'bun:test';
|
||||
import ts from 'typescript';
|
||||
|
||||
function sourceFile(): ts.SourceFile {
|
||||
const p = path.join(import.meta.dir, 'index.ts');
|
||||
return ts.createSourceFile(p, fs.readFileSync(p, 'utf8'), ts.ScriptTarget.Latest, true);
|
||||
}
|
||||
|
||||
describe('mcp-tools barrel registers pr-factory', () => {
|
||||
const sf = sourceFile();
|
||||
|
||||
const prFactoryImport = sf.statements.find(
|
||||
(s): s is ts.ImportDeclaration =>
|
||||
ts.isImportDeclaration(s) &&
|
||||
ts.isStringLiteral(s.moduleSpecifier) &&
|
||||
s.moduleSpecifier.text === './pr-factory.js',
|
||||
);
|
||||
|
||||
it("has a side-effect import of './pr-factory.js' (no import clause)", () => {
|
||||
expect(prFactoryImport).toBeDefined();
|
||||
// Side-effect import: `import './pr-factory.js';` — a named/default import
|
||||
// would not run registerTools the same way the other tool modules do.
|
||||
expect(prFactoryImport!.importClause).toBeUndefined();
|
||||
});
|
||||
|
||||
it('the import precedes the startMcpServer() call', () => {
|
||||
const startCall = sf.statements.find(
|
||||
(s) =>
|
||||
ts.isExpressionStatement(s) &&
|
||||
ts.isCallExpression(s.expression) &&
|
||||
s.expression.expression.getText(sf).startsWith('startMcpServer'),
|
||||
);
|
||||
expect(startCall).toBeDefined();
|
||||
expect(prFactoryImport!.getStart(sf)).toBeLessThan(startCall!.getStart(sf));
|
||||
});
|
||||
});
|
||||
+85
@@ -0,0 +1,85 @@
|
||||
/**
|
||||
* pr-factory-core guard — the six PR Factory MCP tool handlers' session-DB
|
||||
* consumption and the exact cross-process action-string contract.
|
||||
*
|
||||
* Each handler writes a kind:'system' row into messages_out whose
|
||||
* JSON.parse(content).action must equal the host-side registerDeliveryAction
|
||||
* key EXACTLY (pairs with src/modules/pr-factory/registration.test.ts on the
|
||||
* host) — a drifted string is a silent "Unknown system action" drop in
|
||||
* production, so it must go red here. Also pins the container's odd-seq
|
||||
* convention, pr_gh's command/commands normalization, and the repo-default
|
||||
* contract: when the agent omits `repo`, the payload omits it too — the HOST
|
||||
* action handlers apply PR_FACTORY_DEFAULT_REPO (the container never sees
|
||||
* that env var, so a container-side default would silently override it).
|
||||
*/
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
|
||||
|
||||
import { initTestSessionDb, closeSessionDb } from '../db/connection.js';
|
||||
import { getUndeliveredMessages } from '../db/messages-out.js';
|
||||
import { ghCommand, proposeSkillEdit, sendToTesting, submitTestResults } from './pr-factory.js';
|
||||
|
||||
beforeEach(() => {
|
||||
initTestSessionDb();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
closeSessionDb();
|
||||
});
|
||||
|
||||
function systemRows(): Array<{ seq: number; kind: string; action: string; content: Record<string, unknown> }> {
|
||||
return getUndeliveredMessages().map((m) => {
|
||||
const content = JSON.parse(m.content) as Record<string, unknown>;
|
||||
return { seq: m.seq as number, kind: m.kind, action: String(content.action), content };
|
||||
});
|
||||
}
|
||||
|
||||
describe('pr-factory MCP tools → messages_out contract', () => {
|
||||
it('propose_skill_edit emits pr_propose_skill_edit with the full file payload', async () => {
|
||||
await proposeSkillEdit.handler({ skill_name: 'my-review-skill', file_name: 'SKILL.md', content: '# v2' });
|
||||
const [row] = systemRows();
|
||||
expect(row.action).toBe('pr_propose_skill_edit');
|
||||
expect(row.content).toMatchObject({ skill_name: 'my-review-skill', file_name: 'SKILL.md', content: '# v2' });
|
||||
});
|
||||
|
||||
it('send_to_testing emits a bare pr_send_to_testing action', async () => {
|
||||
await sendToTesting.handler({});
|
||||
const [row] = systemRows();
|
||||
expect(row.kind).toBe('system');
|
||||
expect(row.seq % 2).toBe(1); // container writes odd seq
|
||||
expect(row.action).toBe('pr_send_to_testing');
|
||||
});
|
||||
|
||||
it('credentialed_gh normalizes a single command string into the commands array', async () => {
|
||||
await ghCommand.handler({ command: 'gh pr merge 42 --merge', description: 'merge it' });
|
||||
const [row] = systemRows();
|
||||
expect(row.action).toBe('pr_gh');
|
||||
expect(row.content.commands).toEqual(['gh pr merge 42 --merge']);
|
||||
expect(row.content.description).toBe('merge it');
|
||||
});
|
||||
|
||||
it('credentialed_gh passes a commands array through and errors when neither form is given', async () => {
|
||||
await ghCommand.handler({ commands: ['gh pr comment 42 --body hi', 'gh pr merge 42 --merge'], description: 'both' });
|
||||
const [row] = systemRows();
|
||||
expect(row.content.commands).toEqual(['gh pr comment 42 --body hi', 'gh pr merge 42 --merge']);
|
||||
|
||||
const err = await ghCommand.handler({ description: 'no commands' });
|
||||
expect(err.isError).toBe(true);
|
||||
expect(systemRows()).toHaveLength(1); // nothing extra written
|
||||
});
|
||||
|
||||
it('submit_test_results emits pr_submit_test_results with verdict, requires it, and omits repo unless given', async () => {
|
||||
await submitTestResults.handler({ pr_number: 42, verdict: 'PASS', content: '## results' });
|
||||
const [row] = systemRows();
|
||||
expect(row.action).toBe('pr_submit_test_results');
|
||||
expect(row.content).toMatchObject({ pr_number: 42, verdict: 'PASS', content: '## results' });
|
||||
// Repo absent in the payload → host-side PR_FACTORY_DEFAULT_REPO applies.
|
||||
expect('repo' in row.content).toBe(false);
|
||||
|
||||
await submitTestResults.handler({ pr_number: 43, repo: 'acme/widgets', verdict: 'FAIL', content: 'x' });
|
||||
const explicit = systemRows()[1];
|
||||
expect(explicit.content.repo).toBe('acme/widgets');
|
||||
|
||||
const err = await submitTestResults.handler({ pr_number: 42, content: 'missing verdict' });
|
||||
expect(err.isError).toBe(true);
|
||||
});
|
||||
});
|
||||
+203
@@ -0,0 +1,203 @@
|
||||
/**
|
||||
* PR Factory MCP tools — supervisor skill-edit proposals, testing gate,
|
||||
* credentialed GitHub commands, and test-result submission.
|
||||
*
|
||||
* The container can't write to inbound.db (host-owned), so each tool emits
|
||||
* a system action via messages_out and the host's delivery loop dispatches
|
||||
* it (see src/modules/pr-factory/index.ts on the host).
|
||||
*
|
||||
* These tools are visible in every container today; the host action
|
||||
* handlers are only registered when the pr-factory module is enabled,
|
||||
* so calling them from a non-PR-factory agent group is a no-op (host
|
||||
* logs "Unknown system action" and drops the request).
|
||||
*/
|
||||
import { writeMessageOut } from '../db/messages-out.js';
|
||||
import { registerTools } from './server.js';
|
||||
import type { McpToolDefinition } from './types.js';
|
||||
|
||||
function log(msg: string): void {
|
||||
console.error(`[mcp-tools] ${msg}`);
|
||||
}
|
||||
|
||||
function ok(text: string) {
|
||||
return { content: [{ type: 'text' as const, text }] };
|
||||
}
|
||||
|
||||
function err(text: string) {
|
||||
return { content: [{ type: 'text' as const, text: `Error: ${text}` }], isError: true };
|
||||
}
|
||||
|
||||
function genId(prefix: string): string {
|
||||
return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
}
|
||||
|
||||
export const proposeSkillEdit: McpToolDefinition = {
|
||||
tool: {
|
||||
name: 'propose_skill_edit',
|
||||
description:
|
||||
'PR Factory supervisor only. Propose an edit to a container skill file. The host posts the diff for human approval — the edit is only applied if the human accepts. Read the current file from /app/skills/ first, then pass the full new content here.',
|
||||
inputSchema: {
|
||||
type: 'object' as const,
|
||||
properties: {
|
||||
skill_name: {
|
||||
type: 'string',
|
||||
description: 'Skill directory name under /app/skills/, e.g. "my-review-skill".',
|
||||
},
|
||||
file_name: {
|
||||
type: 'string',
|
||||
description: 'File within the skill directory, e.g. "SKILL.md".',
|
||||
},
|
||||
content: {
|
||||
type: 'string',
|
||||
description: 'Full new file content to propose.',
|
||||
},
|
||||
},
|
||||
required: ['skill_name', 'file_name', 'content'],
|
||||
},
|
||||
},
|
||||
async handler(args) {
|
||||
const skill_name = args.skill_name as string;
|
||||
const file_name = args.file_name as string;
|
||||
const content = args.content as string;
|
||||
if (!skill_name || !file_name || !content) return err('skill_name, file_name, and content are required');
|
||||
|
||||
writeMessageOut({
|
||||
id: genId('sys'),
|
||||
kind: 'system',
|
||||
content: JSON.stringify({ action: 'pr_propose_skill_edit', skill_name, file_name, content }),
|
||||
});
|
||||
log(`pr_propose_skill_edit: ${skill_name}/${file_name}`);
|
||||
return ok(`Skill edit proposed for ${skill_name}/${file_name} — waiting for human approval.`);
|
||||
},
|
||||
};
|
||||
|
||||
export const sendToTesting: McpToolDefinition = {
|
||||
tool: {
|
||||
name: 'send_to_testing',
|
||||
description:
|
||||
'PR Factory worker only. Request that the test plan (previously saved to /workspace/agent/test-plans/) be sent to the orchestrator for execution. The host will post the plan in the thread with an approval card — a human must approve before the plan is forwarded. Call this after writing the test plan file.',
|
||||
inputSchema: {
|
||||
type: 'object' as const,
|
||||
properties: {},
|
||||
required: [],
|
||||
},
|
||||
},
|
||||
async handler() {
|
||||
writeMessageOut({
|
||||
id: genId('sys'),
|
||||
kind: 'system',
|
||||
content: JSON.stringify({ action: 'pr_send_to_testing' }),
|
||||
});
|
||||
log('pr_send_to_testing requested');
|
||||
return ok('Test plan approval requested — waiting for human to accept.');
|
||||
},
|
||||
};
|
||||
|
||||
export const ghCommand: McpToolDefinition = {
|
||||
tool: {
|
||||
name: 'credentialed_gh',
|
||||
description:
|
||||
'Run credentialed GitHub CLI commands that take action on a repository (merge, close, comment, label, approve, etc.). Write the full command(s) starting with `gh`, exactly as you would type in a terminal. The host shows the commands to a human for approval — they only execute if approved. When you need multiple commands (e.g. comment then merge), pass them as an array in `commands` — they execute sequentially on a single approval. Do NOT use this for read-only lookups (viewing PRs, listing checks, fetching user info, etc.) — use the `gh` CLI in your shell directly for those. Follow the merge strategy named in your group instructions when merging PRs.',
|
||||
inputSchema: {
|
||||
type: 'object' as const,
|
||||
properties: {
|
||||
command: {
|
||||
type: 'string',
|
||||
description: 'A single gh CLI command. Use `commands` instead when you need multiple commands.',
|
||||
},
|
||||
commands: {
|
||||
type: 'array',
|
||||
items: { type: 'string' },
|
||||
description:
|
||||
'Array of gh CLI commands to execute sequentially on a single approval. E.g. ["gh pr comment 42 --repo org/repo --body \'LGTM\'", "gh pr merge 42 --repo org/repo --merge --delete-branch"].',
|
||||
},
|
||||
description: {
|
||||
type: 'string',
|
||||
description: 'Short human-readable explanation of what these commands do, e.g. "comments LGTM and merges the PR".',
|
||||
},
|
||||
},
|
||||
required: ['description'],
|
||||
},
|
||||
},
|
||||
async handler(args) {
|
||||
const description = args.description as string;
|
||||
if (!description) return err('description is required');
|
||||
|
||||
// Accept either `commands` (array) or `command` (string)
|
||||
let commands: string[];
|
||||
if (Array.isArray(args.commands) && args.commands.length > 0) {
|
||||
commands = args.commands as string[];
|
||||
} else if (typeof args.command === 'string' && args.command) {
|
||||
commands = [args.command];
|
||||
} else {
|
||||
return err('command or commands is required');
|
||||
}
|
||||
|
||||
writeMessageOut({
|
||||
id: genId('sys'),
|
||||
kind: 'system',
|
||||
content: JSON.stringify({ action: 'pr_gh', commands, description }),
|
||||
});
|
||||
log(`pr_gh: ${commands.join(' && ')}`);
|
||||
return ok(
|
||||
`Credentialed gh command(s) proposed — waiting for human approval.\n${commands.map((c) => `\`${c}\``).join('\n')}`,
|
||||
);
|
||||
},
|
||||
};
|
||||
|
||||
export const submitTestResults: McpToolDefinition = {
|
||||
tool: {
|
||||
name: 'submit_test_results',
|
||||
description:
|
||||
'PR Factory tester only. Submit the results of a test run. Include an explicit verdict (PASS, PARTIAL, or FAIL) and the full test results as markdown.',
|
||||
inputSchema: {
|
||||
type: 'object' as const,
|
||||
properties: {
|
||||
pr_number: {
|
||||
type: 'number',
|
||||
description: 'Pull request number that was tested.',
|
||||
},
|
||||
repo: {
|
||||
type: 'string',
|
||||
description:
|
||||
'Full repository name, e.g. "acme/widgets". Omit to use the host\'s configured default repo (PR_FACTORY_DEFAULT_REPO).',
|
||||
},
|
||||
verdict: {
|
||||
type: 'string',
|
||||
enum: ['PASS', 'PARTIAL', 'FAIL'],
|
||||
description:
|
||||
'Test verdict: PASS (all must-pass tests passed), PARTIAL (some skipped or should-pass failed), FAIL (must-pass test failed).',
|
||||
},
|
||||
content: {
|
||||
type: 'string',
|
||||
description: 'Full markdown test results including individual test outcomes and any error details.',
|
||||
},
|
||||
},
|
||||
required: ['pr_number', 'verdict', 'content'],
|
||||
},
|
||||
},
|
||||
async handler(args) {
|
||||
const pr_number = args.pr_number as number;
|
||||
// Omitted repo stays omitted — the host applies PR_FACTORY_DEFAULT_REPO.
|
||||
const repo = args.repo as string | undefined;
|
||||
const verdict = args.verdict as string;
|
||||
const content = args.content as string;
|
||||
if (!pr_number || !verdict || !content) return err('pr_number, verdict, and content are required');
|
||||
|
||||
writeMessageOut({
|
||||
id: genId('sys'),
|
||||
kind: 'system',
|
||||
content: JSON.stringify({
|
||||
action: 'pr_submit_test_results',
|
||||
pr_number,
|
||||
...(repo ? { repo } : {}),
|
||||
verdict,
|
||||
content,
|
||||
}),
|
||||
});
|
||||
log(`pr_submit_test_results: ${repo ?? '<default repo>'}#${pr_number} verdict=${verdict}`);
|
||||
return ok(`Test results submitted for ${repo ?? 'the default repo'}#${pr_number} — verdict: ${verdict}`);
|
||||
},
|
||||
};
|
||||
|
||||
registerTools([proposeSkillEdit, sendToTesting, ghCommand, submitTestResults]);
|
||||
+68
@@ -0,0 +1,68 @@
|
||||
import type Database from 'better-sqlite3';
|
||||
|
||||
import type { Migration } from './index.js';
|
||||
|
||||
/**
|
||||
* pr_threads, v2 shape (owned by the pr-factory-core component).
|
||||
*
|
||||
* Fresh installs: creates the table without a bot identity column — the
|
||||
* delivering bot is resolved per messaging group via
|
||||
* `messaging_groups.instance`, so pr_threads never needs to record it.
|
||||
*
|
||||
* Fork upgrades: installs that ran the earlier pr-factory substrate carry a
|
||||
* `bot_id` column on pr_threads (verified write-only — every writer set it
|
||||
* to NULL and nothing reads it) and a `'module-pr-factory-pr-threads'` row
|
||||
* in schema_version. The runner dedupes migrations by NAME, so this
|
||||
* migration uses a NEW name (`-v2`) to run on those DBs and recreate the
|
||||
* table without the column, preserving all rows.
|
||||
*/
|
||||
export const modulePrFactoryPrThreadsV2: Migration = {
|
||||
version: 101,
|
||||
name: 'module-pr-factory-pr-threads-v2',
|
||||
disableForeignKeys: true,
|
||||
up(db: Database.Database) {
|
||||
const TARGET = `
|
||||
CREATE TABLE pr_threads (
|
||||
channel_id TEXT NOT NULL,
|
||||
thread_ts TEXT NOT NULL,
|
||||
channel_type TEXT NOT NULL,
|
||||
repo_full_name TEXT NOT NULL,
|
||||
pr_number INTEGER NOT NULL,
|
||||
session_id TEXT NOT NULL,
|
||||
created_at TEXT NOT NULL,
|
||||
PRIMARY KEY (channel_id, thread_ts)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_pr_threads_repo_pr ON pr_threads (repo_full_name, pr_number);
|
||||
CREATE INDEX idx_pr_threads_session ON pr_threads (session_id);
|
||||
`;
|
||||
|
||||
const exists = db.prepare("SELECT name FROM sqlite_master WHERE type = 'table' AND name = 'pr_threads'").get() as
|
||||
| { name: string }
|
||||
| undefined;
|
||||
|
||||
if (!exists) {
|
||||
db.exec(TARGET);
|
||||
return;
|
||||
}
|
||||
|
||||
const cols = db.prepare('PRAGMA table_info(pr_threads)').all() as Array<{ name: string }>;
|
||||
if (!cols.some((c) => c.name === 'bot_id')) return; // already v2 shape
|
||||
|
||||
// Old fork shape — recreate without bot_id, carrying every row over.
|
||||
// The rename keeps the old table's indexes (and their names), so drop
|
||||
// them first or TARGET's CREATE INDEX collides.
|
||||
db.exec(`
|
||||
DROP INDEX IF EXISTS idx_pr_threads_repo_pr;
|
||||
DROP INDEX IF EXISTS idx_pr_threads_session;
|
||||
ALTER TABLE pr_threads RENAME TO pr_threads_old;
|
||||
`);
|
||||
db.exec(TARGET);
|
||||
db.exec(`
|
||||
INSERT INTO pr_threads (channel_id, thread_ts, channel_type, repo_full_name, pr_number, session_id, created_at)
|
||||
SELECT channel_id, thread_ts, channel_type, repo_full_name, pr_number, session_id, created_at
|
||||
FROM pr_threads_old
|
||||
`);
|
||||
db.exec('DROP TABLE pr_threads_old');
|
||||
},
|
||||
};
|
||||
+146
@@ -0,0 +1,146 @@
|
||||
/**
|
||||
* pr-factory-core guard — pr_threads migration registration + DB layer.
|
||||
*
|
||||
* Runs the REAL migration barrel against a fresh DB and asserts the
|
||||
* pr_threads table exists with its composite primary key, both indexes, and
|
||||
* NO bot identity column (delivery identity lives on
|
||||
* messaging_groups.instance) — red if the modulePrFactoryPrThreadsV2 import
|
||||
* or array entry is deleted from src/db/migrations/index.ts. Exercises the
|
||||
* full CRUD surface of src/db/pr-threads.ts against the migrated schema, and
|
||||
* pins the fork-upgrade arm: a bot_id-shaped pr_threads (old recorded
|
||||
* migration name) is recreated without the column, rows preserved.
|
||||
*/
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
|
||||
import { closeDb, getDb, initTestDb, runMigrations } from './index.js';
|
||||
import { migrations } from './migrations/index.js';
|
||||
import {
|
||||
createPrThread,
|
||||
deletePrThread,
|
||||
getPrThread,
|
||||
getPrThreadByRepoPr,
|
||||
getPrThreadBySession,
|
||||
updatePrThreadSession,
|
||||
type PrThread,
|
||||
} from './pr-threads.js';
|
||||
|
||||
function row(overrides: Partial<PrThread> = {}): PrThread {
|
||||
return {
|
||||
channel_id: 'slack:C0TEST',
|
||||
thread_ts: '1700000000.000100',
|
||||
channel_type: 'slack',
|
||||
repo_full_name: 'acme/widgets',
|
||||
pr_number: 42,
|
||||
session_id: 'sess-42',
|
||||
created_at: new Date().toISOString(),
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
const db = initTestDb();
|
||||
runMigrations(db);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
closeDb();
|
||||
});
|
||||
|
||||
describe('pr_threads migration', () => {
|
||||
it('creates the table with the (channel_id, thread_ts) composite primary key and no bot column', () => {
|
||||
const cols = getDb().prepare('PRAGMA table_info(pr_threads)').all() as Array<{ name: string; pk: number }>;
|
||||
expect(cols.length).toBeGreaterThan(0);
|
||||
const pk = cols
|
||||
.filter((c) => c.pk > 0)
|
||||
.sort((a, b) => a.pk - b.pk)
|
||||
.map((c) => c.name);
|
||||
expect(pk).toEqual(['channel_id', 'thread_ts']);
|
||||
const names = cols.map((c) => c.name);
|
||||
for (const expected of ['channel_type', 'repo_full_name', 'pr_number', 'session_id', 'created_at']) {
|
||||
expect(names).toContain(expected);
|
||||
}
|
||||
expect(names).not.toContain('bot_id');
|
||||
});
|
||||
|
||||
it('creates both lookup indexes', () => {
|
||||
const indexes = getDb()
|
||||
.prepare("SELECT name FROM sqlite_master WHERE type = 'index' AND tbl_name = 'pr_threads'")
|
||||
.all() as Array<{ name: string }>;
|
||||
const names = indexes.map((i) => i.name);
|
||||
expect(names).toContain('idx_pr_threads_repo_pr');
|
||||
expect(names).toContain('idx_pr_threads_session');
|
||||
});
|
||||
});
|
||||
|
||||
describe('pr_threads fork upgrade (bot_id-shaped table)', () => {
|
||||
it('recreates an old bot_id-shaped pr_threads without the column, preserving rows', () => {
|
||||
closeDb();
|
||||
const db = initTestDb();
|
||||
// Run everything EXCEPT the v2 migration, then synthesize the old fork
|
||||
// state: bot_id-shaped table + the old recorded migration name.
|
||||
runMigrations(
|
||||
db,
|
||||
migrations.filter((m) => m.name !== 'module-pr-factory-pr-threads-v2'),
|
||||
);
|
||||
db.exec(`
|
||||
CREATE TABLE pr_threads (
|
||||
channel_id TEXT NOT NULL,
|
||||
thread_ts TEXT NOT NULL,
|
||||
channel_type TEXT NOT NULL,
|
||||
bot_id TEXT,
|
||||
repo_full_name TEXT NOT NULL,
|
||||
pr_number INTEGER NOT NULL,
|
||||
session_id TEXT NOT NULL,
|
||||
created_at TEXT NOT NULL,
|
||||
PRIMARY KEY (channel_id, thread_ts)
|
||||
);
|
||||
CREATE INDEX idx_pr_threads_repo_pr ON pr_threads (repo_full_name, pr_number);
|
||||
CREATE INDEX idx_pr_threads_session ON pr_threads (session_id);
|
||||
INSERT INTO pr_threads VALUES
|
||||
('slack:C0OLD', '1700000000.000200', 'slack', NULL, 'acme/widgets', 7, 'sess-old-7', '2025-01-01T00:00:00Z');
|
||||
`);
|
||||
db.prepare("INSERT INTO schema_version (version, name, applied) VALUES (?, 'module-pr-factory-pr-threads', ?)").run(
|
||||
900,
|
||||
new Date().toISOString(),
|
||||
);
|
||||
|
||||
// The real barrel now applies ONLY the v2 migration (name-keyed dedupe
|
||||
// skips everything else, and the old name never blocks the new one).
|
||||
runMigrations(db);
|
||||
|
||||
const names = (db.prepare('PRAGMA table_info(pr_threads)').all() as Array<{ name: string }>).map((c) => c.name);
|
||||
expect(names).not.toContain('bot_id');
|
||||
expect(getPrThreadByRepoPr('acme/widgets', 7)?.session_id).toBe('sess-old-7');
|
||||
|
||||
// Idempotent: a second pass changes nothing.
|
||||
runMigrations(db);
|
||||
expect(getPrThreadByRepoPr('acme/widgets', 7)?.session_id).toBe('sess-old-7');
|
||||
});
|
||||
});
|
||||
|
||||
describe('pr-threads CRUD', () => {
|
||||
it('round-trips create → all three lookups', () => {
|
||||
createPrThread(row());
|
||||
|
||||
expect(getPrThread('slack:C0TEST', '1700000000.000100')?.pr_number).toBe(42);
|
||||
expect(getPrThreadByRepoPr('acme/widgets', 42)?.session_id).toBe('sess-42');
|
||||
expect(getPrThreadBySession('sess-42')?.thread_ts).toBe('1700000000.000100');
|
||||
expect(getPrThreadByRepoPr('acme/widgets', 99)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('updatePrThreadSession repoints the row to a new session', () => {
|
||||
createPrThread(row());
|
||||
|
||||
updatePrThreadSession('slack:C0TEST', '1700000000.000100', 'sess-fresh');
|
||||
expect(getPrThreadByRepoPr('acme/widgets', 42)?.session_id).toBe('sess-fresh');
|
||||
expect(getPrThreadBySession('sess-42')).toBeUndefined();
|
||||
expect(getPrThreadBySession('sess-fresh')?.pr_number).toBe(42);
|
||||
});
|
||||
|
||||
it('deletePrThread removes the row', () => {
|
||||
createPrThread(row());
|
||||
deletePrThread('slack:C0TEST', '1700000000.000100');
|
||||
expect(getPrThread('slack:C0TEST', '1700000000.000100')).toBeUndefined();
|
||||
expect(getPrThreadByRepoPr('acme/widgets', 42)).toBeUndefined();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,59 @@
|
||||
/**
|
||||
* pr_threads — central index of PR ↔ chat-thread ↔ session.
|
||||
*
|
||||
* With per-PR sessions inside a single PR Factory Worker agent group, this
|
||||
* table maps (chat thread) ↔ (repo, PR#) ↔ (session) without a folder on
|
||||
* disk. The delivering bot identity is not recorded here — outbound identity
|
||||
* is resolved per messaging group via `messaging_groups.instance`.
|
||||
*
|
||||
* Used by:
|
||||
* - pr-factory handler: insert on PR opened; lookup + repoint on synchronize/close
|
||||
* - pr-factory orchestrator: lookup by (repo_full_name, pr_number) when test results land
|
||||
* - pr-factory testing/gh/skill-edit gates + reactions: lookup by (session_id)
|
||||
*/
|
||||
import { getDb } from './connection.js';
|
||||
|
||||
export interface PrThread {
|
||||
channel_id: string;
|
||||
thread_ts: string;
|
||||
channel_type: string;
|
||||
repo_full_name: string;
|
||||
pr_number: number;
|
||||
session_id: string;
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
export function createPrThread(row: PrThread): void {
|
||||
getDb()
|
||||
.prepare(
|
||||
`INSERT INTO pr_threads (channel_id, thread_ts, channel_type, repo_full_name, pr_number, session_id, created_at)
|
||||
VALUES (@channel_id, @thread_ts, @channel_type, @repo_full_name, @pr_number, @session_id, @created_at)`,
|
||||
)
|
||||
.run(row);
|
||||
}
|
||||
|
||||
export function getPrThread(channelId: string, threadTs: string): PrThread | undefined {
|
||||
return getDb().prepare('SELECT * FROM pr_threads WHERE channel_id = ? AND thread_ts = ?').get(channelId, threadTs) as
|
||||
| PrThread
|
||||
| undefined;
|
||||
}
|
||||
|
||||
export function getPrThreadByRepoPr(repoFullName: string, prNumber: number): PrThread | undefined {
|
||||
return getDb()
|
||||
.prepare('SELECT * FROM pr_threads WHERE repo_full_name = ? AND pr_number = ?')
|
||||
.get(repoFullName, prNumber) as PrThread | undefined;
|
||||
}
|
||||
|
||||
export function getPrThreadBySession(sessionId: string): PrThread | undefined {
|
||||
return getDb().prepare('SELECT * FROM pr_threads WHERE session_id = ?').get(sessionId) as PrThread | undefined;
|
||||
}
|
||||
|
||||
export function updatePrThreadSession(channelId: string, threadTs: string, sessionId: string): void {
|
||||
getDb()
|
||||
.prepare('UPDATE pr_threads SET session_id = ? WHERE channel_id = ? AND thread_ts = ?')
|
||||
.run(sessionId, channelId, threadTs);
|
||||
}
|
||||
|
||||
export function deletePrThread(channelId: string, threadTs: string): void {
|
||||
getDb().prepare('DELETE FROM pr_threads WHERE channel_id = ? AND thread_ts = ?').run(channelId, threadTs);
|
||||
}
|
||||
+95
@@ -0,0 +1,95 @@
|
||||
/**
|
||||
* pr-factory-core guard — the four pending_approvals helpers appended to
|
||||
* src/db/sessions.ts (getPendingApprovalsBySessionAction,
|
||||
* getPendingApprovalsBySession, updatePendingApprovalPlatformMessageId,
|
||||
* deletePendingApprovalsBySessionAction). pr-factory's approval-card flow
|
||||
* (dismiss-stale, card-id round-trip) consumes exactly these; deleting any
|
||||
* helper goes red here before it breaks the module at runtime.
|
||||
*/
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
|
||||
import { closeDb, createAgentGroup, initTestDb, runMigrations } from './index.js';
|
||||
import {
|
||||
createPendingApproval,
|
||||
createSession,
|
||||
deletePendingApprovalsBySessionAction,
|
||||
getPendingApprovalsBySession,
|
||||
getPendingApprovalsBySessionAction,
|
||||
updatePendingApprovalPlatformMessageId,
|
||||
updatePendingApprovalStatus,
|
||||
} from './sessions.js';
|
||||
|
||||
function now(): string {
|
||||
return new Date().toISOString();
|
||||
}
|
||||
|
||||
function seed(approvalId: string, sessionId: string, action: string): void {
|
||||
createPendingApproval({
|
||||
approval_id: approvalId,
|
||||
session_id: sessionId,
|
||||
request_id: approvalId,
|
||||
action,
|
||||
payload: '{}',
|
||||
created_at: new Date().toISOString(),
|
||||
title: 'Test',
|
||||
options_json: '[]',
|
||||
});
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
const db = initTestDb();
|
||||
runMigrations(db);
|
||||
|
||||
createAgentGroup({ id: 'ag-1', name: 'Agent', folder: 'agent', agent_provider: null, created_at: now() });
|
||||
for (const id of ['sess-a', 'sess-b']) {
|
||||
createSession({
|
||||
id,
|
||||
agent_group_id: 'ag-1',
|
||||
messaging_group_id: null,
|
||||
thread_id: null,
|
||||
agent_provider: null,
|
||||
status: 'active',
|
||||
container_status: 'idle',
|
||||
last_active: null,
|
||||
created_at: now(),
|
||||
});
|
||||
}
|
||||
|
||||
seed('appr-1', 'sess-a', 'pr_gh');
|
||||
seed('appr-2', 'sess-a', 'pr_send_to_testing');
|
||||
seed('appr-3', 'sess-b', 'pr_gh');
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
closeDb();
|
||||
});
|
||||
|
||||
describe('pending_approvals helpers', () => {
|
||||
it('getPendingApprovalsBySessionAction filters by both session and action', () => {
|
||||
expect(getPendingApprovalsBySessionAction('sess-a', 'pr_gh').map((r) => r.approval_id)).toEqual(['appr-1']);
|
||||
expect(getPendingApprovalsBySessionAction('sess-a', 'pr_retry_test')).toEqual([]);
|
||||
expect(getPendingApprovalsBySessionAction('sess-b', 'pr_gh').map((r) => r.approval_id)).toEqual(['appr-3']);
|
||||
});
|
||||
|
||||
it('getPendingApprovalsBySession returns only status=pending rows for the session', () => {
|
||||
const before = getPendingApprovalsBySession('sess-a');
|
||||
expect(before.map((r) => r.approval_id).sort()).toEqual(['appr-1', 'appr-2']);
|
||||
|
||||
updatePendingApprovalStatus('appr-1', 'approved');
|
||||
expect(getPendingApprovalsBySession('sess-a').map((r) => r.approval_id)).toEqual(['appr-2']);
|
||||
});
|
||||
|
||||
it('updatePendingApprovalPlatformMessageId round-trips through the row', () => {
|
||||
updatePendingApprovalPlatformMessageId('appr-2', '1700000000.000200');
|
||||
const [rowBack] = getPendingApprovalsBySessionAction('sess-a', 'pr_send_to_testing');
|
||||
expect(rowBack.platform_message_id).toBe('1700000000.000200');
|
||||
});
|
||||
|
||||
it('deletePendingApprovalsBySessionAction deletes and reports the change count', () => {
|
||||
expect(deletePendingApprovalsBySessionAction('sess-a', 'pr_gh')).toBe(1);
|
||||
expect(deletePendingApprovalsBySessionAction('sess-a', 'pr_gh')).toBe(0);
|
||||
// Other sessions/actions untouched.
|
||||
expect(getPendingApprovalsBySessionAction('sess-b', 'pr_gh')).toHaveLength(1);
|
||||
expect(getPendingApprovalsBySessionAction('sess-a', 'pr_send_to_testing')).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
+154
@@ -0,0 +1,154 @@
|
||||
/**
|
||||
* pr-factory-core guard — the module's approval-resolved registration.
|
||||
*
|
||||
* pr-factory clears the 👀 awaiting-approval reaction (EMOJI_AWAITING,
|
||||
* Slack name 'warning') when an admin REJECTS an approval card. The reject
|
||||
* path resolves in core, so the module observes it via
|
||||
* registerApprovalResolvedHandler — a top-level registration in
|
||||
* src/modules/pr-factory/index.ts that runs even without env (inert mode).
|
||||
*
|
||||
* This drives the REAL handleApprovalsResponse with a reject payload after
|
||||
* importing the real pr-factory module, faking only global fetch. The
|
||||
* clicking user is seeded with an owner role — core's
|
||||
* isAuthorizedApprovalClick gate silently swallows clicks from
|
||||
* non-role-holders, which is itself the documented operator-setup
|
||||
* requirement. Deleting the module's registerApprovalResolvedHandler call
|
||||
* (or its reject filter calling clearAwaitingApproval) goes red. The core
|
||||
* half of the hook is guarded separately in approval-resolved.test.ts.
|
||||
*/
|
||||
import fs from 'fs';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
vi.mock('../../container-runner.js', () => ({
|
||||
wakeContainer: vi.fn().mockResolvedValue(undefined),
|
||||
isContainerRunning: vi.fn().mockReturnValue(false),
|
||||
killContainer: vi.fn(),
|
||||
buildAgentGroupImage: vi.fn().mockResolvedValue(undefined),
|
||||
}));
|
||||
|
||||
vi.mock('../../config.js', async () => {
|
||||
const actual = await vi.importActual<typeof import('../../config.js')>('../../config.js');
|
||||
return {
|
||||
...actual,
|
||||
DATA_DIR: '/tmp/nanoclaw-test-prf-reject/data',
|
||||
GROUPS_DIR: '/tmp/nanoclaw-test-prf-reject/groups',
|
||||
};
|
||||
});
|
||||
|
||||
// Importing the module for its side effect: the approval-resolved registration.
|
||||
import '../pr-factory/index.js';
|
||||
import { closeDb, createAgentGroup, initTestDb, runMigrations } from '../../db/index.js';
|
||||
import { createPendingApproval, createSession } from '../../db/sessions.js';
|
||||
import { createPrThread } from '../../db/pr-threads.js';
|
||||
import { upsertUser } from '../permissions/db/users.js';
|
||||
import { grantRole } from '../permissions/db/user-roles.js';
|
||||
import { initSessionFolder } from '../../session-manager.js';
|
||||
import { handleApprovalsResponse } from './response-handler.js';
|
||||
|
||||
const TEST_DIR = '/tmp/nanoclaw-test-prf-reject';
|
||||
|
||||
function now(): string {
|
||||
return new Date().toISOString();
|
||||
}
|
||||
|
||||
function seedSession(sessionId: string): void {
|
||||
createSession({
|
||||
id: sessionId,
|
||||
agent_group_id: 'ag-1',
|
||||
messaging_group_id: null,
|
||||
thread_id: null,
|
||||
agent_provider: null,
|
||||
status: 'active',
|
||||
container_status: 'idle',
|
||||
last_active: null,
|
||||
created_at: now(),
|
||||
});
|
||||
initSessionFolder('ag-1', sessionId);
|
||||
}
|
||||
|
||||
function seedApproval(approvalId: string, sessionId: string): void {
|
||||
createPendingApproval({
|
||||
approval_id: approvalId,
|
||||
session_id: sessionId,
|
||||
request_id: approvalId,
|
||||
action: 'pr_send_to_testing',
|
||||
payload: JSON.stringify({ filePath: '/tmp/none', fileName: 'none', prNumber: 42, repo: 'acme/widgets' }),
|
||||
created_at: now(),
|
||||
title: 'Send to Testing',
|
||||
options_json: '[]',
|
||||
});
|
||||
}
|
||||
|
||||
async function reject(approvalId: string): Promise<boolean> {
|
||||
return handleApprovalsResponse({
|
||||
questionId: approvalId,
|
||||
value: 'reject',
|
||||
userId: 'slack:admin-1',
|
||||
channelType: 'slack',
|
||||
platformId: 'slack:C0WORK',
|
||||
threadId: null,
|
||||
});
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
if (fs.existsSync(TEST_DIR)) fs.rmSync(TEST_DIR, { recursive: true });
|
||||
fs.mkdirSync(TEST_DIR, { recursive: true });
|
||||
const db = initTestDb();
|
||||
runMigrations(db);
|
||||
createAgentGroup({
|
||||
id: 'ag-1',
|
||||
name: 'Worker',
|
||||
folder: 'pr-factory-worker',
|
||||
agent_provider: null,
|
||||
created_at: now(),
|
||||
});
|
||||
// Authorize the clicking admin — without a user_roles row core's
|
||||
// isAuthorizedApprovalClick swallows the click and the hook never fires.
|
||||
upsertUser({ id: 'slack:admin-1', kind: 'slack', display_name: 'Admin', created_at: now() });
|
||||
grantRole({ user_id: 'slack:admin-1', role: 'owner', agent_group_id: null, granted_by: null, granted_at: now() });
|
||||
|
||||
vi.stubGlobal(
|
||||
'fetch',
|
||||
vi.fn(async () => new Response(JSON.stringify({ ok: true }), { status: 200 })),
|
||||
);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals();
|
||||
closeDb();
|
||||
if (fs.existsSync(TEST_DIR)) fs.rmSync(TEST_DIR, { recursive: true });
|
||||
});
|
||||
|
||||
describe('pr-factory reject-side reaction cleanup', () => {
|
||||
it('rejecting an approval on a PR-thread session removes the awaiting-approval reaction', async () => {
|
||||
seedSession('sess-pr');
|
||||
createPrThread({
|
||||
channel_id: 'slack:C0WORK',
|
||||
thread_ts: '1700000000.000100',
|
||||
channel_type: 'slack',
|
||||
repo_full_name: 'acme/widgets',
|
||||
pr_number: 42,
|
||||
session_id: 'sess-pr',
|
||||
created_at: now(),
|
||||
});
|
||||
seedApproval('appr-pr-1', 'sess-pr');
|
||||
|
||||
expect(await reject('appr-pr-1')).toBe(true);
|
||||
|
||||
const fetchMock = vi.mocked(globalThis.fetch);
|
||||
const removeCalls = fetchMock.mock.calls.filter(([url]) => String(url).includes('reactions.remove'));
|
||||
expect(removeCalls).toHaveLength(1);
|
||||
const body = JSON.parse(String(removeCalls[0][1]?.body)) as { channel: string; timestamp: string; name: string };
|
||||
expect(body).toMatchObject({ channel: 'C0WORK', timestamp: '1700000000.000100', name: 'warning' });
|
||||
});
|
||||
|
||||
it('rejecting an approval on a session without a pr_threads row makes no Slack call', async () => {
|
||||
seedSession('sess-plain');
|
||||
seedApproval('appr-plain-1', 'sess-plain');
|
||||
|
||||
expect(await reject('appr-plain-1')).toBe(true);
|
||||
|
||||
const fetchMock = vi.mocked(globalThis.fetch);
|
||||
expect(fetchMock.mock.calls.filter(([url]) => String(url).includes('reactions.remove'))).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
+39
@@ -0,0 +1,39 @@
|
||||
/**
|
||||
* Per-PR activity log — append-only NDJSON files, one per PR.
|
||||
*
|
||||
* Files live at `data/pr-activity/<owner>/<repo>/<pr-number>.log`.
|
||||
* Each line is a JSON object with ts, event, pr, repo, and arbitrary details.
|
||||
*
|
||||
* Stream live:
|
||||
* tail -f data/pr-activity/<owner>/<repo>/42.log # single PR
|
||||
* tail -f data/pr-activity/<owner>/<repo>/*.log # all PRs
|
||||
*/
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
|
||||
const BASE_DIR = path.resolve('data', 'pr-activity');
|
||||
|
||||
const dirCache = new Set<string>();
|
||||
|
||||
function ensureDir(dir: string): void {
|
||||
if (dirCache.has(dir)) return;
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
dirCache.add(dir);
|
||||
}
|
||||
|
||||
export function prLog(prNumber: number, repo: string, event: string, details?: Record<string, unknown>): void {
|
||||
// Non-PR-scoped events on installs without PR_FACTORY_DEFAULT_REPO land
|
||||
// under a literal "unconfigured" directory rather than corrupting paths.
|
||||
const dir = path.join(BASE_DIR, ...(repo ? repo.split('/') : ['unconfigured']));
|
||||
ensureDir(dir);
|
||||
|
||||
const entry = {
|
||||
ts: new Date().toISOString(),
|
||||
event,
|
||||
pr: prNumber,
|
||||
repo,
|
||||
...details,
|
||||
};
|
||||
|
||||
fs.appendFileSync(path.join(dir, `${prNumber}.log`), JSON.stringify(entry) + '\n');
|
||||
}
|
||||
+209
@@ -0,0 +1,209 @@
|
||||
/**
|
||||
* pr-factory-core guard — bootstrap's consumption of the core entity-model
|
||||
* writers against the REAL composed schema (instance substrate + pr-factory
|
||||
* migration both applied, which pins the recipe ordering: slack-bots before
|
||||
* pr-factory-core).
|
||||
*
|
||||
* Asserts the full bootstrap surface: worker agent group (default-instance
|
||||
* messaging group, mention-sticky/per-thread wiring, seeded default
|
||||
* instructions); supervisor agent group + two instance-scoped messaging
|
||||
* groups with their distinct wirings; tester messaging group auto-created
|
||||
* when the operator's pr-tester agent group exists; idempotent re-run;
|
||||
* foreign-wiring drop; engage-mode drift correction.
|
||||
*/
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
vi.mock('../../config.js', async () => {
|
||||
const actual = await vi.importActual<typeof import('../../config.js')>('../../config.js');
|
||||
return {
|
||||
...actual,
|
||||
DATA_DIR: '/tmp/nanoclaw-test-prf-bootstrap/data',
|
||||
GROUPS_DIR: '/tmp/nanoclaw-test-prf-bootstrap/groups',
|
||||
};
|
||||
});
|
||||
|
||||
import { closeDb, createAgentGroup, initTestDb, runMigrations } from '../../db/index.js';
|
||||
import { getAgentGroupByFolder } from '../../db/agent-groups.js';
|
||||
import {
|
||||
createMessagingGroupAgent,
|
||||
getMessagingGroupAgents,
|
||||
getMessagingGroupByPlatform,
|
||||
updateMessagingGroupAgent,
|
||||
} from '../../db/messaging-groups.js';
|
||||
import { SUPERVISOR_INSTANCE } from '../../channels/slack-supervisor.js';
|
||||
import { TESTER_INSTANCE } from '../../channels/slack-tester.js';
|
||||
import { bootstrapPrFactory, TESTER_FOLDER, WORKER_FOLDER } from './bootstrap.js';
|
||||
import { SUPERVISOR_FOLDER } from './supervisor.js';
|
||||
|
||||
const TEST_DIR = '/tmp/nanoclaw-test-prf-bootstrap';
|
||||
const WORKER_CHANNEL = 'C0WORK';
|
||||
const SUPERVISOR_CHANNEL = 'C0ADMIN';
|
||||
const WORKER_PLATFORM_ID = `slack:${WORKER_CHANNEL}`;
|
||||
|
||||
function now(): string {
|
||||
return new Date().toISOString();
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
if (fs.existsSync(TEST_DIR)) fs.rmSync(TEST_DIR, { recursive: true });
|
||||
fs.mkdirSync(TEST_DIR, { recursive: true });
|
||||
const db = initTestDb();
|
||||
runMigrations(db);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
closeDb();
|
||||
if (fs.existsSync(TEST_DIR)) fs.rmSync(TEST_DIR, { recursive: true });
|
||||
});
|
||||
|
||||
describe('bootstrapPrFactory', () => {
|
||||
it('creates the worker agent group, default-instance messaging group, and mention-sticky/per-thread wiring', () => {
|
||||
const result = bootstrapPrFactory({ workerChannelId: WORKER_CHANNEL });
|
||||
|
||||
const worker = getAgentGroupByFolder(WORKER_FOLDER);
|
||||
expect(worker).toBeDefined();
|
||||
expect(result.workerAgentGroupId).toBe(worker!.id);
|
||||
expect(result.workerPlatformId).toBe(WORKER_PLATFORM_ID);
|
||||
|
||||
const mg = getMessagingGroupByPlatform('slack', WORKER_PLATFORM_ID, 'slack');
|
||||
expect(mg).toBeDefined();
|
||||
expect(result.workerMessagingGroupId).toBe(mg!.id);
|
||||
expect(mg!.instance).toBe('slack');
|
||||
|
||||
const wirings = getMessagingGroupAgents(mg!.id);
|
||||
expect(wirings).toHaveLength(1);
|
||||
expect(wirings[0].agent_group_id).toBe(worker!.id);
|
||||
expect(wirings[0].engage_mode).toBe('mention-sticky');
|
||||
expect(wirings[0].session_mode).toBe('per-thread');
|
||||
expect(wirings[0].ignored_message_policy).toBe('drop');
|
||||
});
|
||||
|
||||
it('seeds the worker group with the default triage instructions', () => {
|
||||
bootstrapPrFactory({ workerChannelId: WORKER_CHANNEL });
|
||||
|
||||
// Default review/triage/test-plan workflow lands in the group's
|
||||
// CLAUDE.local.md — the operator override point.
|
||||
const claudeLocal = path.join(TEST_DIR, 'groups', WORKER_FOLDER, 'CLAUDE.local.md');
|
||||
const seeded = fs.readFileSync(claudeLocal, 'utf8');
|
||||
expect(seeded).toContain('# PR Factory Worker');
|
||||
expect(seeded).toContain('PR triage workflow');
|
||||
expect(seeded).toContain('send_to_testing');
|
||||
});
|
||||
|
||||
it('creates the supervisor group plus its two instance-scoped messaging groups with distinct modes', () => {
|
||||
bootstrapPrFactory({ workerChannelId: WORKER_CHANNEL, supervisorChannelId: SUPERVISOR_CHANNEL });
|
||||
|
||||
const supervisor = getAgentGroupByFolder(SUPERVISOR_FOLDER);
|
||||
expect(supervisor).toBeDefined();
|
||||
|
||||
const adminMg = getMessagingGroupByPlatform('slack', `slack:${SUPERVISOR_CHANNEL}`, SUPERVISOR_INSTANCE);
|
||||
expect(adminMg).toBeDefined();
|
||||
const adminWiring = getMessagingGroupAgents(adminMg!.id)[0];
|
||||
expect(adminWiring.agent_group_id).toBe(supervisor!.id);
|
||||
expect(adminWiring.engage_mode).toBe('pattern');
|
||||
expect(adminWiring.session_mode).toBe('shared');
|
||||
|
||||
const prMg = getMessagingGroupByPlatform('slack', WORKER_PLATFORM_ID, SUPERVISOR_INSTANCE);
|
||||
expect(prMg).toBeDefined();
|
||||
const prWiring = getMessagingGroupAgents(prMg!.id)[0];
|
||||
expect(prWiring.agent_group_id).toBe(supervisor!.id);
|
||||
expect(prWiring.engage_mode).toBe('mention');
|
||||
expect(prWiring.ignored_message_policy).toBe('accumulate');
|
||||
expect(prWiring.session_mode).toBe('per-thread');
|
||||
|
||||
// The supervisor's PR-channel row never shadows the worker's
|
||||
// default-instance row.
|
||||
expect(getMessagingGroupByPlatform('slack', WORKER_PLATFORM_ID, 'slack')!.id).not.toBe(prMg!.id);
|
||||
});
|
||||
|
||||
it('creates the tester messaging group + wiring when the operator-created pr-tester agent group exists', () => {
|
||||
// Without the tester agent group: no tester messaging group.
|
||||
bootstrapPrFactory({ workerChannelId: WORKER_CHANNEL });
|
||||
expect(getMessagingGroupByPlatform('slack', WORKER_PLATFORM_ID, TESTER_INSTANCE)).toBeUndefined();
|
||||
|
||||
// Operator creates the tester agent group out of band, then bootstrap re-runs.
|
||||
createAgentGroup({
|
||||
id: 'ag-tester',
|
||||
name: 'PR Tester',
|
||||
folder: TESTER_FOLDER,
|
||||
agent_provider: null,
|
||||
created_at: now(),
|
||||
});
|
||||
bootstrapPrFactory({ workerChannelId: WORKER_CHANNEL });
|
||||
|
||||
const testerMg = getMessagingGroupByPlatform('slack', WORKER_PLATFORM_ID, TESTER_INSTANCE);
|
||||
expect(testerMg).toBeDefined();
|
||||
const wiring = getMessagingGroupAgents(testerMg!.id)[0];
|
||||
expect(wiring.agent_group_id).toBe('ag-tester');
|
||||
expect(wiring.engage_mode).toBe('mention');
|
||||
expect(wiring.session_mode).toBe('per-thread');
|
||||
});
|
||||
|
||||
it('is idempotent — a second run creates no duplicate rows', () => {
|
||||
createAgentGroup({
|
||||
id: 'ag-tester',
|
||||
name: 'PR Tester',
|
||||
folder: TESTER_FOLDER,
|
||||
agent_provider: null,
|
||||
created_at: now(),
|
||||
});
|
||||
const first = bootstrapPrFactory({ workerChannelId: WORKER_CHANNEL, supervisorChannelId: SUPERVISOR_CHANNEL });
|
||||
const second = bootstrapPrFactory({ workerChannelId: WORKER_CHANNEL, supervisorChannelId: SUPERVISOR_CHANNEL });
|
||||
|
||||
expect(second.workerAgentGroupId).toBe(first.workerAgentGroupId);
|
||||
expect(second.workerMessagingGroupId).toBe(first.workerMessagingGroupId);
|
||||
|
||||
for (const instance of ['slack', SUPERVISOR_INSTANCE, TESTER_INSTANCE]) {
|
||||
const mg = getMessagingGroupByPlatform('slack', WORKER_PLATFORM_ID, instance);
|
||||
expect(mg).toBeDefined();
|
||||
expect(getMessagingGroupAgents(mg!.id)).toHaveLength(1);
|
||||
}
|
||||
});
|
||||
|
||||
it('drops pre-seeded foreign wirings on the PR channel', () => {
|
||||
const first = bootstrapPrFactory({ workerChannelId: WORKER_CHANNEL });
|
||||
|
||||
// A legacy agent group still wired to the PR channel.
|
||||
createAgentGroup({
|
||||
id: 'ag-legacy',
|
||||
name: 'Legacy',
|
||||
folder: 'legacy-worker',
|
||||
agent_provider: null,
|
||||
created_at: now(),
|
||||
});
|
||||
createMessagingGroupAgent({
|
||||
id: 'mga-legacy',
|
||||
messaging_group_id: first.workerMessagingGroupId,
|
||||
agent_group_id: 'ag-legacy',
|
||||
engage_mode: 'pattern',
|
||||
engage_pattern: '.',
|
||||
sender_scope: 'all',
|
||||
ignored_message_policy: 'drop',
|
||||
session_mode: 'shared',
|
||||
priority: 0,
|
||||
created_at: now(),
|
||||
});
|
||||
expect(getMessagingGroupAgents(first.workerMessagingGroupId)).toHaveLength(2);
|
||||
|
||||
bootstrapPrFactory({ workerChannelId: WORKER_CHANNEL });
|
||||
const wirings = getMessagingGroupAgents(first.workerMessagingGroupId);
|
||||
expect(wirings).toHaveLength(1);
|
||||
expect(wirings[0].agent_group_id).toBe(first.workerAgentGroupId);
|
||||
});
|
||||
|
||||
it('self-corrects drifted wiring options instead of skipping them', () => {
|
||||
const first = bootstrapPrFactory({ workerChannelId: WORKER_CHANNEL });
|
||||
const [wiring] = getMessagingGroupAgents(first.workerMessagingGroupId);
|
||||
|
||||
// Simulate an older bootstrap having written different modes.
|
||||
updateMessagingGroupAgent(wiring.id, { engage_mode: 'pattern', session_mode: 'shared' });
|
||||
|
||||
bootstrapPrFactory({ workerChannelId: WORKER_CHANNEL });
|
||||
const [corrected] = getMessagingGroupAgents(first.workerMessagingGroupId);
|
||||
expect(corrected.id).toBe(wiring.id);
|
||||
expect(corrected.engage_mode).toBe('mention-sticky');
|
||||
expect(corrected.session_mode).toBe('per-thread');
|
||||
});
|
||||
});
|
||||
+240
@@ -0,0 +1,240 @@
|
||||
/**
|
||||
* Idempotent setup for the PR Factory:
|
||||
*
|
||||
* - PR Factory Worker agent group (one, fixed; default instructions seeded)
|
||||
* - PR Factory Supervisor agent group (optional, gated by config)
|
||||
* - messaging_groups for (PR channel × worker instance) and
|
||||
* (PR channel × supervisor instance)
|
||||
* - messaging_group for the supervisor's admin channel
|
||||
* - wirings between them
|
||||
*
|
||||
* Drops any pre-existing wiring on the PR channel that doesn't belong to the
|
||||
* new worker, so a legacy agent group stops receiving PR-channel traffic once
|
||||
* this module takes over.
|
||||
*
|
||||
* Called from index.ts at startup once the Slack adapters are ready.
|
||||
*/
|
||||
import { createAgentGroup, getAgentGroupByFolder } from '../../db/agent-groups.js';
|
||||
import {
|
||||
createMessagingGroup,
|
||||
createMessagingGroupAgent,
|
||||
deleteMessagingGroupAgent,
|
||||
getMessagingGroupAgents,
|
||||
getMessagingGroupByPlatform,
|
||||
updateMessagingGroupAgent,
|
||||
} from '../../db/messaging-groups.js';
|
||||
import { initGroupFilesystem } from '../../group-init.js';
|
||||
import { log } from '../../log.js';
|
||||
import { SUPERVISOR_INSTANCE } from '../../channels/slack-supervisor.js';
|
||||
import { TESTER_INSTANCE } from '../../channels/slack-tester.js';
|
||||
import type { AgentGroup, MessagingGroupAgent, UnknownSenderPolicy } from '../../types.js';
|
||||
import { SUPERVISOR_FOLDER, SUPERVISOR_INSTRUCTIONS } from './supervisor.js';
|
||||
import { WORKER_INSTRUCTIONS } from './worker-instructions.js';
|
||||
|
||||
export const WORKER_FOLDER = 'pr-factory-worker';
|
||||
|
||||
/**
|
||||
* The tester agent group is operator-created (its instructions describe the
|
||||
* operator's test environment, so they don't ship with the module). Bootstrap
|
||||
* only wires it up: when an agent group with this folder exists, the tester's
|
||||
* messaging group on the PR channel is created automatically.
|
||||
*/
|
||||
export const TESTER_FOLDER = 'pr-tester';
|
||||
|
||||
function generateId(prefix: string): string {
|
||||
return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
}
|
||||
|
||||
function ensureAgentGroup(folder: string, name: string, instructions?: string): AgentGroup {
|
||||
const existing = getAgentGroupByFolder(folder);
|
||||
if (existing) return existing;
|
||||
const ag: AgentGroup = {
|
||||
id: generateId('ag'),
|
||||
name,
|
||||
folder,
|
||||
agent_provider: null,
|
||||
created_at: new Date().toISOString(),
|
||||
};
|
||||
createAgentGroup(ag);
|
||||
initGroupFilesystem(ag, instructions ? { instructions } : undefined);
|
||||
log.info('PR factory: created agent group', { id: ag.id, folder });
|
||||
return ag;
|
||||
}
|
||||
|
||||
/**
|
||||
* Exact (channel_type, platform_id, instance) ensure. Lookups pass the
|
||||
* instance explicitly so a named-instance row (supervisor/tester) is never
|
||||
* confused with the worker's default-instance row on the same channel —
|
||||
* `getMessagingGroupByPlatform` is exact-only when instance is set.
|
||||
*/
|
||||
function ensureMessagingGroup(
|
||||
channelType: string,
|
||||
platformId: string,
|
||||
instance: string | undefined,
|
||||
name: string,
|
||||
unknownSenderPolicy: UnknownSenderPolicy,
|
||||
): string {
|
||||
const inst = instance ?? channelType;
|
||||
const existing = getMessagingGroupByPlatform(channelType, platformId, inst);
|
||||
if (existing) return existing.id;
|
||||
const id = generateId('mg');
|
||||
createMessagingGroup({
|
||||
id,
|
||||
channel_type: channelType,
|
||||
platform_id: platformId,
|
||||
instance: inst,
|
||||
name,
|
||||
is_group: 1,
|
||||
unknown_sender_policy: unknownSenderPolicy,
|
||||
created_at: new Date().toISOString(),
|
||||
});
|
||||
log.info('PR factory: created messaging group', { id, platformId, instance: inst });
|
||||
return id;
|
||||
}
|
||||
|
||||
type WiringOptions = Pick<
|
||||
MessagingGroupAgent,
|
||||
'engage_mode' | 'engage_pattern' | 'sender_scope' | 'ignored_message_policy' | 'session_mode'
|
||||
>;
|
||||
|
||||
function ensureWiring(messagingGroupId: string, agentGroupId: string, opts: WiringOptions): void {
|
||||
const existing = getMessagingGroupAgents(messagingGroupId).find((m) => m.agent_group_id === agentGroupId);
|
||||
if (existing) {
|
||||
// Fix up an out-of-date wiring (e.g. an earlier version of bootstrap that
|
||||
// wrote different engage_mode / session_mode values). Ensures bootstrap
|
||||
// is self-correcting across upgrades, not just self-skipping.
|
||||
const drift: Partial<WiringOptions> = {};
|
||||
for (const k of Object.keys(opts) as (keyof WiringOptions)[]) {
|
||||
if (existing[k] !== opts[k]) {
|
||||
// narrow assignment: WiringOptions keys are a subset of MessagingGroupAgent.
|
||||
(drift as Record<string, unknown>)[k] = opts[k];
|
||||
}
|
||||
}
|
||||
if (Object.keys(drift).length > 0) {
|
||||
updateMessagingGroupAgent(existing.id, drift);
|
||||
log.info('PR factory: updated wiring', { id: existing.id, fields: Object.keys(drift) });
|
||||
}
|
||||
return;
|
||||
}
|
||||
const id = generateId('mga');
|
||||
createMessagingGroupAgent({
|
||||
id,
|
||||
messaging_group_id: messagingGroupId,
|
||||
agent_group_id: agentGroupId,
|
||||
...opts,
|
||||
priority: 0,
|
||||
created_at: new Date().toISOString(),
|
||||
});
|
||||
log.info('PR factory: created wiring', { id, messagingGroupId, agentGroupId });
|
||||
}
|
||||
|
||||
function dropForeignWirings(messagingGroupId: string, keepAgentGroupId: string): void {
|
||||
// Remove any wiring on the PR channel that doesn't belong to the new
|
||||
// worker. Without this, a legacy agent group keeps receiving PR-channel
|
||||
// traffic and double-engages on every message.
|
||||
for (const w of getMessagingGroupAgents(messagingGroupId)) {
|
||||
if (w.agent_group_id !== keepAgentGroupId) {
|
||||
deleteMessagingGroupAgent(w.id);
|
||||
log.info('PR factory: dropped foreign wiring', { id: w.id, agentGroupId: w.agent_group_id });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export interface BootstrapOptions {
|
||||
/** Bare Slack channel id for PR threads, e.g. C0B0XTGUTS5. */
|
||||
workerChannelId: string;
|
||||
/** Bare Slack channel id for the supervisor's admin channel. Supervisor disabled if absent. */
|
||||
supervisorChannelId?: string;
|
||||
}
|
||||
|
||||
export interface BootstrapResult {
|
||||
workerAgentGroupId: string;
|
||||
workerMessagingGroupId: string;
|
||||
workerPlatformId: string;
|
||||
}
|
||||
|
||||
export function bootstrapPrFactory(opts: BootstrapOptions): BootstrapResult {
|
||||
const workerPlatformId = `slack:${opts.workerChannelId}`;
|
||||
|
||||
// === Worker ===
|
||||
const worker = ensureAgentGroup(WORKER_FOLDER, 'PR Factory Worker', WORKER_INSTRUCTIONS);
|
||||
const workerMgId = ensureMessagingGroup('slack', workerPlatformId, undefined, 'PR Factory Worker', 'public');
|
||||
dropForeignWirings(workerMgId, worker.id);
|
||||
ensureWiring(workerMgId, worker.id, {
|
||||
// mention-sticky + per-thread: worker only engages in threads it's been
|
||||
// explicitly subscribed to. The PR handler subscribes each new PR thread
|
||||
// on bootstrap so in-thread replies route automatically without anyone
|
||||
// needing to @-mention the worker. Top-level channel posts (e.g. someone
|
||||
// @-mentioning the supervisor to add them to the channel) don't engage
|
||||
// the worker because they're in an unsubscribed thread.
|
||||
engage_mode: 'mention-sticky',
|
||||
engage_pattern: null,
|
||||
sender_scope: 'all',
|
||||
ignored_message_policy: 'drop',
|
||||
session_mode: 'per-thread',
|
||||
});
|
||||
|
||||
// === Supervisor (optional) ===
|
||||
if (opts.supervisorChannelId) {
|
||||
const supervisor = ensureAgentGroup(SUPERVISOR_FOLDER, 'PR Factory Supervisor', SUPERVISOR_INSTRUCTIONS);
|
||||
|
||||
const adminMgId = ensureMessagingGroup(
|
||||
'slack',
|
||||
`slack:${opts.supervisorChannelId}`,
|
||||
SUPERVISOR_INSTANCE,
|
||||
'PR Factory Supervisor (admin)',
|
||||
'public',
|
||||
);
|
||||
ensureWiring(adminMgId, supervisor.id, {
|
||||
engage_mode: 'pattern',
|
||||
engage_pattern: '.',
|
||||
sender_scope: 'all',
|
||||
ignored_message_policy: 'drop',
|
||||
session_mode: 'shared',
|
||||
});
|
||||
|
||||
const prMgForSupervisorId = ensureMessagingGroup(
|
||||
'slack',
|
||||
workerPlatformId,
|
||||
SUPERVISOR_INSTANCE,
|
||||
'PR Factory Supervisor (PR threads)',
|
||||
'public',
|
||||
);
|
||||
ensureWiring(prMgForSupervisorId, supervisor.id, {
|
||||
engage_mode: 'mention',
|
||||
engage_pattern: null,
|
||||
sender_scope: 'all',
|
||||
ignored_message_policy: 'accumulate',
|
||||
session_mode: 'per-thread',
|
||||
});
|
||||
}
|
||||
|
||||
// === Tester (optional) ===
|
||||
// The pr-tester agent group is created by the operator (see TESTER_FOLDER).
|
||||
// When it exists, ensure the tester instance's messaging group on the PR
|
||||
// channel — the orchestrator resolves tester sessions against this row, and
|
||||
// index.ts refuses to start the test orchestrator without it.
|
||||
const tester = getAgentGroupByFolder(TESTER_FOLDER);
|
||||
if (tester) {
|
||||
const prMgForTesterId = ensureMessagingGroup(
|
||||
'slack',
|
||||
workerPlatformId,
|
||||
TESTER_INSTANCE,
|
||||
'PR Factory Tester (PR threads)',
|
||||
'public',
|
||||
);
|
||||
ensureWiring(prMgForTesterId, tester.id, {
|
||||
engage_mode: 'mention',
|
||||
engage_pattern: null,
|
||||
sender_scope: 'all',
|
||||
ignored_message_policy: 'accumulate',
|
||||
session_mode: 'per-thread',
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
workerAgentGroupId: worker.id,
|
||||
workerMessagingGroupId: workerMgId,
|
||||
workerPlatformId,
|
||||
};
|
||||
}
|
||||
+41
@@ -0,0 +1,41 @@
|
||||
/**
|
||||
* Canvas seam — pr-factory-core's optional rendered-document surface.
|
||||
*
|
||||
* Core itself ships NO canvas implementation: `createCanvas` returns null
|
||||
* until a provider registers, and every caller (test plans, test results)
|
||||
* falls back to plain text + .md file upload when it does. The `slack-canvas`
|
||||
* component registers the real Slack Canvas API client here at import time.
|
||||
*
|
||||
* Cross-component contract: keep `registerCanvasProvider` / `createCanvas` /
|
||||
* `CanvasResult` stable — the slack-canvas component imports them.
|
||||
*/
|
||||
import { log } from '../../log.js';
|
||||
|
||||
export interface CanvasResult {
|
||||
canvasId: string;
|
||||
permalink: string;
|
||||
}
|
||||
|
||||
export type CanvasProvider = (title: string, markdown: string, channelId: string) => Promise<CanvasResult | null>;
|
||||
|
||||
let provider: CanvasProvider | null = null;
|
||||
|
||||
export function registerCanvasProvider(p: CanvasProvider): void {
|
||||
provider = p;
|
||||
}
|
||||
|
||||
/**
|
||||
* Render markdown as a canvas document shared with the channel. Returns null
|
||||
* when no provider is installed or the provider fails — callers fall back to
|
||||
* file upload.
|
||||
*/
|
||||
export async function createCanvas(title: string, markdown: string, channelId: string): Promise<CanvasResult | null> {
|
||||
if (!provider) return null;
|
||||
try {
|
||||
return await provider(title, markdown, channelId);
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- canvas is best-effort by contract; every caller has a file-upload fallback
|
||||
} catch (err) {
|
||||
log.warn('Canvas provider failed — falling back to file upload', { title, err });
|
||||
return null;
|
||||
}
|
||||
}
|
||||
+44
@@ -0,0 +1,44 @@
|
||||
/**
|
||||
* Install-specific defaults for the PR Factory, read once from .env.
|
||||
*
|
||||
* PR_FACTORY_DEFAULT_REPO — repo assumed when an MCP action omits `repo`
|
||||
* and used as the activity-log key for
|
||||
* non-PR-scoped events. No built-in default:
|
||||
* set it to your repo (e.g. acme/widgets) or
|
||||
* always pass `repo` explicitly in tool calls.
|
||||
* PR_FACTORY_REPO_MIRROR_DIR — local clone the handler fast-forwards before
|
||||
* each triage so the worker container has
|
||||
* current code to search. Optional — when the
|
||||
* directory doesn't exist the refresh is a
|
||||
* no-op. Default: data/repo-mirror.
|
||||
* PR_FACTORY_REVIEW_SKILL — optional name of an operator-supplied
|
||||
* container skill that owns the triage/review
|
||||
* workflow. When set, PR trigger prompts tell
|
||||
* the worker to invoke /<skill>; when unset,
|
||||
* the worker follows the default triage
|
||||
* workflow seeded into its group instructions.
|
||||
*/
|
||||
import path from 'path';
|
||||
|
||||
import { readEnvFile } from '../../env.js';
|
||||
|
||||
const env = readEnvFile(['PR_FACTORY_DEFAULT_REPO', 'PR_FACTORY_REPO_MIRROR_DIR', 'PR_FACTORY_REVIEW_SKILL']);
|
||||
|
||||
export const DEFAULT_REPO = process.env.PR_FACTORY_DEFAULT_REPO || env.PR_FACTORY_DEFAULT_REPO || '';
|
||||
|
||||
export const REPO_MIRROR_DIR = path.resolve(
|
||||
process.env.PR_FACTORY_REPO_MIRROR_DIR || env.PR_FACTORY_REPO_MIRROR_DIR || path.join('data', 'repo-mirror'),
|
||||
);
|
||||
|
||||
export const REVIEW_SKILL = process.env.PR_FACTORY_REVIEW_SKILL || env.PR_FACTORY_REVIEW_SKILL || '';
|
||||
|
||||
/**
|
||||
* The sentence that points the worker at its triage workflow. Operators who
|
||||
* ship their own tuned container skill set PR_FACTORY_REVIEW_SKILL; everyone
|
||||
* else gets the default group-instruction workflow seeded by bootstrap.
|
||||
*/
|
||||
export function triageDirective(): string {
|
||||
return REVIEW_SKILL
|
||||
? `Use the /${REVIEW_SKILL} skill to triage this pull request.`
|
||||
: 'Triage this pull request following the PR triage workflow in your group instructions.';
|
||||
}
|
||||
+52
@@ -0,0 +1,52 @@
|
||||
/**
|
||||
* Dismiss stale approval cards for a PR session.
|
||||
*
|
||||
* Dismisses ALL pending approval cards for the session — only one
|
||||
* active card per thread at a time. If the agent needs multiple GH
|
||||
* commands, they should be combined into one or queued sequentially
|
||||
* (approve card 1 → executes → agent posts card 2).
|
||||
*/
|
||||
import { getPendingApprovalsBySession, deletePendingApproval } from '../../db/sessions.js';
|
||||
import { getMessagingGroup } from '../../db/messaging-groups.js';
|
||||
import { getDeliveryAdapter } from '../../delivery.js';
|
||||
import { log } from '../../log.js';
|
||||
import type { Session } from '../../types.js';
|
||||
|
||||
export async function dismissStaleApprovals(session: Session): Promise<number> {
|
||||
const stale = getPendingApprovalsBySession(session.id);
|
||||
if (stale.length === 0) return 0;
|
||||
|
||||
const adapter = getDeliveryAdapter();
|
||||
const mg = session.messaging_group_id ? getMessagingGroup(session.messaging_group_id) : null;
|
||||
|
||||
for (const row of stale) {
|
||||
// Edit the card in place to show it's been dismissed. The instance arg
|
||||
// routes the edit through the same bot identity that posted the card —
|
||||
// under exact-instance dispatch an omitted instance would edit through
|
||||
// the default (worker) bot and fail on supervisor/tester cards.
|
||||
if (adapter && row.platform_message_id && mg) {
|
||||
try {
|
||||
await adapter.deliver(
|
||||
mg.channel_type,
|
||||
mg.platform_id,
|
||||
session.thread_id,
|
||||
'chat-sdk',
|
||||
JSON.stringify({
|
||||
operation: 'edit',
|
||||
messageId: row.platform_message_id,
|
||||
text: `~${row.title || 'Approval'}~ — Dismissed`,
|
||||
}),
|
||||
undefined,
|
||||
mg.instance,
|
||||
);
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- best-effort card edit; the row delete below is the functional part
|
||||
} catch (err) {
|
||||
log.warn('Failed to edit dismissed approval card', { approvalId: row.approval_id, err });
|
||||
}
|
||||
}
|
||||
deletePendingApproval(row.approval_id);
|
||||
}
|
||||
|
||||
log.info('Dismissed stale approval cards', { count: stale.length, sessionId: session.id });
|
||||
return stale.length;
|
||||
}
|
||||
+38
@@ -0,0 +1,38 @@
|
||||
/**
|
||||
* GH-action seam — pr-factory-core's optional credentialed-GitHub surface.
|
||||
*
|
||||
* The container's `credentialed_gh` MCP tool emits a `pr_gh` system action;
|
||||
* core registers the delivery action (so the agent always gets feedback) but
|
||||
* ships NO executor — running approved `gh` commands with operator
|
||||
* credentials is the `gh-action-approval` component, which registers its
|
||||
* handler here at import time.
|
||||
*
|
||||
* Cross-component contract: keep `GhActionHandler` / `setGhActionHandler`
|
||||
* stable — the gh-action-approval component registers against them.
|
||||
*/
|
||||
import { notifyAgent } from '../approvals/primitive.js';
|
||||
import { log } from '../../log.js';
|
||||
import type { Session } from '../../types.js';
|
||||
|
||||
export type GhActionHandler = (content: Record<string, unknown>, session: Session) => Promise<void>;
|
||||
|
||||
let handler: GhActionHandler | null = null;
|
||||
|
||||
/** Register the executor (or pass null to unregister, e.g. to pin the fallback in tests). */
|
||||
export function setGhActionHandler(h: GhActionHandler | null): void {
|
||||
handler = h;
|
||||
}
|
||||
|
||||
/** Delivery-action entry for `pr_gh`. Registered by core's index.ts. */
|
||||
export async function dispatchGhAction(content: Record<string, unknown>, session: Session): Promise<void> {
|
||||
if (!handler) {
|
||||
log.warn('pr_gh requested but the gh-action-approval component is not installed', { sessionId: session.id });
|
||||
notifyAgent(
|
||||
session,
|
||||
'credentialed_gh is unavailable: the gh-action-approval component is not installed on this host. ' +
|
||||
'Report the intended command in the thread instead.',
|
||||
);
|
||||
return;
|
||||
}
|
||||
await handler(content, session);
|
||||
}
|
||||
+196
@@ -0,0 +1,196 @@
|
||||
/**
|
||||
* pr-factory-core guard — handler.ts's consumption of core seams:
|
||||
* resolveSession / writeSessionMessage / the sessions + pr_threads schema,
|
||||
* plus the cross-process PR_CONTEXT trigger contract the worker's
|
||||
* group instructions (or an operator review skill) parse.
|
||||
*
|
||||
* Real migrated central DB, real bootstrap output, real on-disk session DBs
|
||||
* under a mocked DATA_DIR. Only the external edges are faked: global fetch
|
||||
* (Slack opener/reactions + GitHub diff/stats; OneCLI admin API down → the
|
||||
* direct-fetch fallback path) and the container runtime (wake/kill).
|
||||
*/
|
||||
import Database from 'better-sqlite3';
|
||||
import fs from 'fs';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
vi.mock('../../container-runner.js', () => ({
|
||||
wakeContainer: vi.fn().mockResolvedValue(undefined),
|
||||
isContainerRunning: vi.fn().mockReturnValue(false),
|
||||
killContainer: vi.fn(),
|
||||
buildAgentGroupImage: vi.fn().mockResolvedValue(undefined),
|
||||
}));
|
||||
|
||||
vi.mock('../../config.js', async () => {
|
||||
const actual = await vi.importActual<typeof import('../../config.js')>('../../config.js');
|
||||
return {
|
||||
...actual,
|
||||
DATA_DIR: '/tmp/nanoclaw-test-prf-handler/data',
|
||||
GROUPS_DIR: '/tmp/nanoclaw-test-prf-handler/groups',
|
||||
};
|
||||
});
|
||||
|
||||
// Keep the test hermetic: no repo-mirror git calls, no NDJSON files in the
|
||||
// real data/ directory. triageDirective stays REAL — the trigger-text
|
||||
// assertions below pin the default (group-instructions) directive.
|
||||
vi.mock('./defaults.js', async () => {
|
||||
const actual = await vi.importActual<typeof import('./defaults.js')>('./defaults.js');
|
||||
return {
|
||||
...actual,
|
||||
DEFAULT_REPO: 'acme/widgets',
|
||||
REPO_MIRROR_DIR: '/tmp/nanoclaw-test-prf-handler/no-mirror',
|
||||
};
|
||||
});
|
||||
vi.mock('./activity-log.js', () => ({ prLog: vi.fn() }));
|
||||
|
||||
import { closeDb, initTestDb, runMigrations } from '../../db/index.js';
|
||||
import { getPrThreadByRepoPr } from '../../db/pr-threads.js';
|
||||
import { getSession } from '../../db/sessions.js';
|
||||
import { inboundDbPath } from '../../session-manager.js';
|
||||
import { killContainer, wakeContainer } from '../../container-runner.js';
|
||||
import { bootstrapPrFactory, type BootstrapResult } from './bootstrap.js';
|
||||
import { handlePullRequest, type HandlerConfig } from './handler.js';
|
||||
import type { PREvent } from './webhook.js';
|
||||
|
||||
const TEST_DIR = '/tmp/nanoclaw-test-prf-handler';
|
||||
const WORKER_CHANNEL = 'C0WORK';
|
||||
|
||||
let bootstrap: BootstrapResult;
|
||||
let cfg: HandlerConfig;
|
||||
let slackTs: number;
|
||||
|
||||
function prEvent(overrides: Partial<PREvent> = {}): PREvent {
|
||||
return {
|
||||
action: 'opened',
|
||||
number: 42,
|
||||
title: 'Add widgets',
|
||||
body: 'Adds the widgets.',
|
||||
author: 'octocat',
|
||||
repoFullName: 'acme/widgets',
|
||||
headSha: 'abc123',
|
||||
diffUrl: 'https://github.com/acme/widgets/pull/42.diff',
|
||||
htmlUrl: 'https://github.com/acme/widgets/pull/42',
|
||||
merged: false,
|
||||
draft: false,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function jsonRes(body: unknown, status = 200): Response {
|
||||
return new Response(JSON.stringify(body), { status, headers: { 'Content-Type': 'application/json' } });
|
||||
}
|
||||
|
||||
function readInbound(sessionId: string): Array<{ id: string; kind: string; content: string }> {
|
||||
const session = getSession(sessionId)!;
|
||||
const db = new Database(inboundDbPath(session.agent_group_id, sessionId), { readonly: true });
|
||||
const rows = db.prepare('SELECT id, kind, content FROM messages_in ORDER BY rowid').all() as Array<{
|
||||
id: string;
|
||||
kind: string;
|
||||
content: string;
|
||||
}>;
|
||||
db.close();
|
||||
return rows;
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
if (fs.existsSync(TEST_DIR)) fs.rmSync(TEST_DIR, { recursive: true });
|
||||
fs.mkdirSync(TEST_DIR, { recursive: true });
|
||||
const db = initTestDb();
|
||||
runMigrations(db);
|
||||
|
||||
slackTs = 0;
|
||||
vi.stubGlobal(
|
||||
'fetch',
|
||||
vi.fn(async (url: unknown, init?: { headers?: Record<string, string> }) => {
|
||||
const u = String(url);
|
||||
if (u.includes('/api/agents/default')) return jsonRes({ error: 'down' }, 500); // OneCLI absent → direct fetch
|
||||
if (u.includes('slack.com/api/chat.postMessage')) {
|
||||
slackTs += 1;
|
||||
return jsonRes({ ok: true, ts: `1700000000.00010${slackTs}` });
|
||||
}
|
||||
if (u.includes('slack.com/api/')) return jsonRes({ ok: true });
|
||||
if (u.includes('api.github.com')) {
|
||||
if (u.includes('/files?')) return jsonRes([{ filename: 'src/widgets.ts' }]);
|
||||
if ((init?.headers?.Accept || '').includes('diff')) {
|
||||
return new Response('diff --git a/src/widgets.ts b/src/widgets.ts', { status: 200 });
|
||||
}
|
||||
return jsonRes({ commits: 1, changed_files: 1, additions: 5, deletions: 2 });
|
||||
}
|
||||
throw new Error(`unexpected fetch in test: ${u}`);
|
||||
}),
|
||||
);
|
||||
|
||||
bootstrap = bootstrapPrFactory({ workerChannelId: WORKER_CHANNEL });
|
||||
cfg = { workerBotToken: 'xoxb-test', workerChannelId: WORKER_CHANNEL, bootstrap };
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals();
|
||||
vi.clearAllMocks();
|
||||
closeDb();
|
||||
if (fs.existsSync(TEST_DIR)) fs.rmSync(TEST_DIR, { recursive: true });
|
||||
});
|
||||
|
||||
describe('handlePullRequest', () => {
|
||||
it('opened: records pr_threads, writes the PR_CONTEXT trigger to the session inbound DB, wakes the container', async () => {
|
||||
await handlePullRequest(prEvent(), cfg);
|
||||
|
||||
const thread = getPrThreadByRepoPr('acme/widgets', 42);
|
||||
expect(thread).toBeDefined();
|
||||
expect(thread!.channel_id).toBe(bootstrap.workerPlatformId);
|
||||
expect(thread!.thread_ts).toBe('1700000000.000101');
|
||||
|
||||
const session = getSession(thread!.session_id);
|
||||
expect(session?.agent_group_id).toBe(bootstrap.workerAgentGroupId);
|
||||
|
||||
const messages = readInbound(thread!.session_id);
|
||||
expect(messages).toHaveLength(1);
|
||||
const text = (JSON.parse(messages[0].content) as { text: string }).text;
|
||||
// Default directive: no PR_FACTORY_REVIEW_SKILL → the seeded group
|
||||
// instructions own the workflow.
|
||||
expect(text).toContain('PR triage workflow in your group instructions');
|
||||
expect(text).toContain('diff --git');
|
||||
expect(text).toContain(
|
||||
`[PR_CONTEXT: channel=${bootstrap.workerPlatformId} thread=${thread!.thread_ts} repo=acme/widgets pr=42]`,
|
||||
);
|
||||
|
||||
expect(vi.mocked(wakeContainer)).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('opened: a redelivered webhook for an existing PR thread is a no-op', async () => {
|
||||
await handlePullRequest(prEvent(), cfg);
|
||||
const first = getPrThreadByRepoPr('acme/widgets', 42)!;
|
||||
|
||||
await handlePullRequest(prEvent(), cfg);
|
||||
const second = getPrThreadByRepoPr('acme/widgets', 42)!;
|
||||
expect(second.session_id).toBe(first.session_id);
|
||||
expect(readInbound(first.session_id)).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('synchronize: kills the old container, re-creates the session in the same thread, repoints pr_threads', async () => {
|
||||
await handlePullRequest(prEvent(), cfg);
|
||||
const before = getPrThreadByRepoPr('acme/widgets', 42)!;
|
||||
|
||||
await handlePullRequest(prEvent({ action: 'synchronize' }), cfg);
|
||||
|
||||
expect(vi.mocked(killContainer)).toHaveBeenCalledWith(before.session_id, expect.stringContaining('synchronize'));
|
||||
const after = getPrThreadByRepoPr('acme/widgets', 42)!;
|
||||
expect(after.thread_ts).toBe(before.thread_ts);
|
||||
expect(after.session_id).not.toBe(before.session_id);
|
||||
expect(getSession(before.session_id)).toBeUndefined();
|
||||
|
||||
const messages = readInbound(after.session_id);
|
||||
expect(messages).toHaveLength(1);
|
||||
const text = (JSON.parse(messages[0].content) as { text: string }).text;
|
||||
expect(text).toContain('re-triage PR #42');
|
||||
expect(text).toContain(`thread=${after.thread_ts}`);
|
||||
});
|
||||
|
||||
it('draft opened: creates the thread and pr_threads row but defers triage (no trigger message)', async () => {
|
||||
await handlePullRequest(prEvent({ draft: true }), cfg);
|
||||
|
||||
const thread = getPrThreadByRepoPr('acme/widgets', 42);
|
||||
expect(thread).toBeDefined();
|
||||
expect(readInbound(thread!.session_id)).toHaveLength(0);
|
||||
expect(vi.mocked(wakeContainer)).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
+482
@@ -0,0 +1,482 @@
|
||||
/**
|
||||
* Per-PR session bootstrap.
|
||||
*
|
||||
* On a `pull_request.opened` webhook event:
|
||||
* 1. Post a short opener to the configured Slack channel — the response
|
||||
* `ts` becomes the PR thread.
|
||||
* 2. Resolve a per-thread session under the PR Factory Worker agent group.
|
||||
* 3. Record (channel, thread) → (repo, pr#, session) in `pr_threads`.
|
||||
* 4. Fetch the diff, build the agent prompt with [PR_CONTEXT: …] tag,
|
||||
* write to the session's inbound DB, wake the worker container.
|
||||
*
|
||||
* On a `pull_request.synchronize` event (new commits pushed):
|
||||
* 1. Look up the existing pr_threads row for this repo/PR.
|
||||
* 2. Kill the running container and clear the session.
|
||||
* 3. Re-fetch the diff and write a new trigger into the same thread.
|
||||
*/
|
||||
import { execFile } from 'child_process';
|
||||
import fs from 'fs';
|
||||
import os from 'os';
|
||||
import path from 'path';
|
||||
|
||||
import { fetch as undiciFetch, ProxyAgent } from 'undici';
|
||||
|
||||
import { ONECLI_URL } from '../../config.js';
|
||||
import { getChannelAdapterExact } from '../../channels/channel-registry.js';
|
||||
import { getMessagingGroupByPlatform } from '../../db/messaging-groups.js';
|
||||
import { resolveSession, writeSessionMessage } from '../../session-manager.js';
|
||||
import { killContainer, wakeContainer } from '../../container-runner.js';
|
||||
import { deleteSession, getSession } from '../../db/sessions.js';
|
||||
import { createPrThread, getPrThreadByRepoPr, updatePrThreadSession, type PrThread } from '../../db/pr-threads.js';
|
||||
import { log } from '../../log.js';
|
||||
import { prLog } from './activity-log.js';
|
||||
import { REPO_MIRROR_DIR, triageDirective } from './defaults.js';
|
||||
import { getTestOrchestrator } from './test-orchestration.js';
|
||||
import type { PREvent } from './webhook.js';
|
||||
import type { BootstrapResult } from './bootstrap.js';
|
||||
|
||||
const MAX_DIFF_LENGTH = 50_000;
|
||||
|
||||
/**
|
||||
* Pull latest main in the repo mirror. Best-effort — failures are logged
|
||||
* but never block the review pipeline.
|
||||
*/
|
||||
async function refreshRepoMirror(): Promise<void> {
|
||||
if (!fs.existsSync(REPO_MIRROR_DIR)) return;
|
||||
try {
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
execFile('git', ['fetch', 'origin', 'main', '--quiet'], { cwd: REPO_MIRROR_DIR, timeout: 15_000 }, (err) => {
|
||||
if (err) return reject(err);
|
||||
execFile(
|
||||
'git',
|
||||
['reset', '--hard', 'origin/main', '--quiet'],
|
||||
{ cwd: REPO_MIRROR_DIR, timeout: 10_000 },
|
||||
(err2) => (err2 ? reject(err2) : resolve()),
|
||||
);
|
||||
});
|
||||
});
|
||||
log.debug('Repo mirror refreshed');
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- best-effort by contract: a stale mirror must never block triage
|
||||
} catch (err) {
|
||||
log.warn('Repo mirror refresh failed (non-blocking)', { err });
|
||||
}
|
||||
}
|
||||
|
||||
function generateId(prefix: string): string {
|
||||
return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
}
|
||||
|
||||
// Route GitHub calls through OneCLI's gateway (HTTP forward proxy on
|
||||
// ONECLI_URL's port + 1) so the gateway injects a vault-stored PAT for
|
||||
// `api.github.com`. Three pieces are load-bearing:
|
||||
//
|
||||
// 1. Use undici's own `fetch` + `ProxyAgent`. Node 22's built-in fetch
|
||||
// uses a different bundled undici and rejects an external dispatcher.
|
||||
// 2. Embed the OneCLI agent's access token in the proxy URL as Basic
|
||||
// auth (`http://x:<token>@host:port`). Without it the gateway can't
|
||||
// identify the calling agent and falls back to "no agent" mode, which
|
||||
// injects nothing — symptom is GitHub returning the 60/h
|
||||
// unauthenticated rate limit instead of 5000/h.
|
||||
// 3. Trust the gateway's self-signed CA (~/.onecli/gateway-ca.pem). The
|
||||
// gateway MITMs the TLS to splice in the Authorization header, so
|
||||
// vanilla CA bundles don't cover it. Scoped to the ProxyAgent, no
|
||||
// env-var change needed.
|
||||
//
|
||||
// Inert if any piece is unavailable (no token, no CA file): fetchDiff
|
||||
// then falls back to a direct unauthenticated GitHub call.
|
||||
let onecliProxyAgent: ProxyAgent | null | undefined;
|
||||
async function getOnecliProxyAgent(): Promise<ProxyAgent | null> {
|
||||
if (onecliProxyAgent !== undefined) return onecliProxyAgent;
|
||||
try {
|
||||
const adminRes = await fetch(`${ONECLI_URL}/api/agents/default`);
|
||||
if (!adminRes.ok) throw new Error(`OneCLI admin API ${adminRes.status}`);
|
||||
const agent = (await adminRes.json()) as { accessToken?: string };
|
||||
if (!agent.accessToken) throw new Error('OneCLI default agent has no access token');
|
||||
const caPath = path.join(os.homedir(), '.onecli', 'gateway-ca.pem');
|
||||
const ca = fs.readFileSync(caPath);
|
||||
const gatewayBase = ONECLI_URL.replace(/^https?:\/\//, '').replace(/:\d+$/, ':10255');
|
||||
onecliProxyAgent = new ProxyAgent({
|
||||
uri: `http://x:${agent.accessToken}@${gatewayBase}`,
|
||||
requestTls: { ca },
|
||||
});
|
||||
log.info('OneCLI proxy agent ready', { gateway: gatewayBase });
|
||||
return onecliProxyAgent;
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- documented degradation: no gateway → direct unauthenticated GitHub calls
|
||||
} catch (err) {
|
||||
log.warn('OneCLI proxy agent unavailable — GitHub calls will go direct', { err });
|
||||
onecliProxyAgent = null;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
interface PrStats {
|
||||
commits: number;
|
||||
changed_files: number;
|
||||
additions: number;
|
||||
deletions: number;
|
||||
}
|
||||
|
||||
async function fetchPrStats(repoFullName: string, prNumber: number): Promise<PrStats | null> {
|
||||
const url = `https://api.github.com/repos/${repoFullName}/pulls/${prNumber}`;
|
||||
const dispatcher = await getOnecliProxyAgent();
|
||||
const headers = { Accept: 'application/vnd.github.v3+json', 'User-Agent': 'NanoClaw' };
|
||||
try {
|
||||
const res = dispatcher ? await undiciFetch(url, { headers, dispatcher }) : await fetch(url, { headers });
|
||||
if (!res.ok) return null;
|
||||
const data = (await res.json()) as Record<string, unknown>;
|
||||
return {
|
||||
commits: (data.commits as number) || 0,
|
||||
changed_files: (data.changed_files as number) || 0,
|
||||
additions: (data.additions as number) || 0,
|
||||
deletions: (data.deletions as number) || 0,
|
||||
};
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- stats line is cosmetic; the opener posts without it
|
||||
} catch (err) {
|
||||
log.warn('Failed to fetch PR stats', { err, repo: repoFullName, pr: prNumber });
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchPrAreas(repoFullName: string, prNumber: number): Promise<string> {
|
||||
const url = `https://api.github.com/repos/${repoFullName}/pulls/${prNumber}/files?per_page=100`;
|
||||
const dispatcher = await getOnecliProxyAgent();
|
||||
const headers = { Accept: 'application/vnd.github.v3+json', 'User-Agent': 'NanoClaw' };
|
||||
try {
|
||||
const res = dispatcher ? await undiciFetch(url, { headers, dispatcher }) : await fetch(url, { headers });
|
||||
if (!res.ok) return '';
|
||||
const files = (await res.json()) as { filename: string }[];
|
||||
const counts = new Map<string, number>();
|
||||
for (const f of files) {
|
||||
const parts = f.filename.split('/');
|
||||
const area = parts.length >= 2 ? `${parts[0]}/${parts[1]}` : parts[0];
|
||||
counts.set(area, (counts.get(area) || 0) + 1);
|
||||
}
|
||||
const ranked = [...counts.entries()].sort((a, b) => b[1] - a[1]);
|
||||
const MAX_SHOWN = 1;
|
||||
const shown = ranked.slice(0, MAX_SHOWN).map(([a]) => `\`${a}\``);
|
||||
if (ranked.length <= MAX_SHOWN) return shown.join(' · ');
|
||||
const remaining = ranked.length - MAX_SHOWN;
|
||||
return shown.join(' · ') + ` +${remaining} more`;
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- areas line is cosmetic; the opener posts without it
|
||||
} catch (err) {
|
||||
log.warn('Failed to fetch PR files', { err, repo: repoFullName, pr: prNumber });
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchDiff(repoFullName: string, prNumber: number): Promise<string> {
|
||||
const url = `https://api.github.com/repos/${repoFullName}/pulls/${prNumber}`;
|
||||
const dispatcher = await getOnecliProxyAgent();
|
||||
const headers = { Accept: 'application/vnd.github.v3.diff', 'User-Agent': 'NanoClaw' };
|
||||
const res = dispatcher ? await undiciFetch(url, { headers, dispatcher }) : await fetch(url, { headers });
|
||||
if (!res.ok) {
|
||||
log.warn('Failed to fetch PR diff', { status: res.status, repo: repoFullName, pr: prNumber });
|
||||
return `(Failed to fetch diff: HTTP ${res.status})`;
|
||||
}
|
||||
let diff = await res.text();
|
||||
if (diff.length > MAX_DIFF_LENGTH) {
|
||||
diff =
|
||||
diff.slice(0, MAX_DIFF_LENGTH) +
|
||||
`\n\n... (diff truncated at ${MAX_DIFF_LENGTH} chars — ask to review specific files for the rest)`;
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
async function postSlackThreadOpener(botToken: string, channelId: string, text: string): Promise<string> {
|
||||
const res = await fetch('https://slack.com/api/chat.postMessage', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Authorization: `Bearer ${botToken}`,
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
},
|
||||
body: JSON.stringify({ channel: channelId, text, mrkdwn: true }),
|
||||
});
|
||||
if (!res.ok) {
|
||||
throw new Error(`Slack chat.postMessage HTTP ${res.status}`);
|
||||
}
|
||||
const body = (await res.json()) as { ok: boolean; ts?: string; error?: string };
|
||||
if (!body.ok || !body.ts) {
|
||||
throw new Error(`Slack chat.postMessage failed: ${body.error || 'no ts'}`);
|
||||
}
|
||||
return body.ts;
|
||||
}
|
||||
|
||||
import { addReaction, removeReaction, EMOJI_OPEN, EMOJI_CLOSED, EMOJI_MERGED, EMOJI_DRAFT } from './reactions.js';
|
||||
|
||||
export interface HandlerConfig {
|
||||
/** Slack worker app's bot token — used to post the thread opener. */
|
||||
workerBotToken: string;
|
||||
/** Bare Slack channel id for PR threads, e.g. C0B0XTGUTS5. */
|
||||
workerChannelId: string;
|
||||
/** Output of bootstrapPrFactory() — agent group + messaging group + platform id. */
|
||||
bootstrap: BootstrapResult;
|
||||
}
|
||||
|
||||
export async function handlePullRequest(pr: PREvent, cfg: HandlerConfig): Promise<void> {
|
||||
// closed = PR was closed or merged (from GH UI or our tool)
|
||||
if (pr.action === 'closed') {
|
||||
const existing = getPrThreadByRepoPr(pr.repoFullName, pr.number);
|
||||
if (!existing) return;
|
||||
const bareChannel = cfg.workerChannelId;
|
||||
await removeReaction(cfg.workerBotToken, bareChannel, existing.thread_ts, EMOJI_OPEN);
|
||||
await removeReaction(cfg.workerBotToken, bareChannel, existing.thread_ts, EMOJI_DRAFT);
|
||||
await addReaction(cfg.workerBotToken, bareChannel, existing.thread_ts, pr.merged ? EMOJI_MERGED : EMOJI_CLOSED);
|
||||
prLog(pr.number, pr.repoFullName, 'pr_closed', { merged: pr.merged });
|
||||
log.info('PR status reaction updated', { pr: pr.number, merged: pr.merged });
|
||||
|
||||
// Destroy the test VM if one exists for this PR (vm-test-orchestrator
|
||||
// component; no-op when not installed).
|
||||
const orch = getTestOrchestrator();
|
||||
if (orch) {
|
||||
try {
|
||||
await orch.destroyVm(pr.number);
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- VM teardown is best-effort on close; the orchestrator reaps stale VMs itself
|
||||
} catch (err) {
|
||||
log.warn('Failed to destroy test VM on PR close', { pr: pr.number, err });
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// converted_to_draft = PR marked as draft
|
||||
if (pr.action === 'converted_to_draft') {
|
||||
const existing = getPrThreadByRepoPr(pr.repoFullName, pr.number);
|
||||
if (!existing) return;
|
||||
await removeReaction(cfg.workerBotToken, cfg.workerChannelId, existing.thread_ts, EMOJI_OPEN);
|
||||
await addReaction(cfg.workerBotToken, cfg.workerChannelId, existing.thread_ts, EMOJI_DRAFT);
|
||||
prLog(pr.number, pr.repoFullName, 'converted_to_draft');
|
||||
log.info('PR marked as draft', { pr: pr.number });
|
||||
return;
|
||||
}
|
||||
|
||||
// ready_for_review = draft PR marked as ready — treat like opened
|
||||
if (pr.action === 'ready_for_review') {
|
||||
const existing = getPrThreadByRepoPr(pr.repoFullName, pr.number);
|
||||
if (existing) {
|
||||
// Existing thread — swap emoji and re-triage in same thread
|
||||
await removeReaction(cfg.workerBotToken, cfg.workerChannelId, existing.thread_ts, EMOJI_DRAFT);
|
||||
await addReaction(cfg.workerBotToken, cfg.workerChannelId, existing.thread_ts, EMOJI_OPEN);
|
||||
await handleSynchronize(pr, existing, cfg);
|
||||
return;
|
||||
}
|
||||
// No existing thread — fall through to opened flow
|
||||
}
|
||||
|
||||
// synchronize = new commits pushed to an existing PR
|
||||
if (pr.action === 'synchronize') {
|
||||
if (pr.draft) {
|
||||
prLog(pr.number, pr.repoFullName, 'synchronize_skipped_draft');
|
||||
log.info('PR synchronize on draft — skipping', { pr: pr.number });
|
||||
return;
|
||||
}
|
||||
const existing = getPrThreadByRepoPr(pr.repoFullName, pr.number);
|
||||
if (!existing) {
|
||||
log.info('PR synchronize but no existing thread, treating as opened', {
|
||||
repo: pr.repoFullName,
|
||||
pr: pr.number,
|
||||
});
|
||||
// Fall through to the opened flow below
|
||||
} else {
|
||||
await handleSynchronize(pr, existing, cfg);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const existing = getPrThreadByRepoPr(pr.repoFullName, pr.number);
|
||||
if (existing) {
|
||||
log.info('PR thread already bootstrapped, skipping', {
|
||||
repo: pr.repoFullName,
|
||||
pr: pr.number,
|
||||
threadTs: existing.thread_ts,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const draftLabel = pr.draft ? ' (draft)' : '';
|
||||
const [stats, areas] = await Promise.all([
|
||||
fetchPrStats(pr.repoFullName, pr.number),
|
||||
fetchPrAreas(pr.repoFullName, pr.number),
|
||||
]);
|
||||
const statsLine = stats
|
||||
? `${stats.changed_files} file${stats.changed_files !== 1 ? 's' : ''} · ${stats.commits} commit${stats.commits !== 1 ? 's' : ''} · +${stats.additions} −${stats.deletions}`
|
||||
: '';
|
||||
const areasLine = areas ? `\n${areas}` : '';
|
||||
const opener =
|
||||
`*<${pr.htmlUrl}|PR #${pr.number}: ${pr.title}>*${draftLabel}\n` +
|
||||
statsLine +
|
||||
areasLine +
|
||||
`\nAuthor: <https://github.com/${pr.author}|${pr.author}>`;
|
||||
|
||||
prLog(pr.number, pr.repoFullName, 'thread_creating', { author: pr.author, title: pr.title, draft: pr.draft });
|
||||
const threadTs = await postSlackThreadOpener(cfg.workerBotToken, cfg.workerChannelId, opener);
|
||||
|
||||
// Mark the thread with the appropriate status emoji
|
||||
await addReaction(cfg.workerBotToken, cfg.workerChannelId, threadTs, pr.draft ? EMOJI_DRAFT : EMOJI_OPEN);
|
||||
|
||||
const now = new Date().toISOString();
|
||||
const sessionThreadId = `${cfg.bootstrap.workerPlatformId}:${threadTs}`;
|
||||
const { session } = resolveSession(
|
||||
cfg.bootstrap.workerAgentGroupId,
|
||||
cfg.bootstrap.workerMessagingGroupId,
|
||||
sessionThreadId,
|
||||
'per-thread',
|
||||
);
|
||||
|
||||
createPrThread({
|
||||
channel_id: cfg.bootstrap.workerPlatformId,
|
||||
thread_ts: threadTs,
|
||||
channel_type: 'slack',
|
||||
repo_full_name: pr.repoFullName,
|
||||
pr_number: pr.number,
|
||||
session_id: session.id,
|
||||
created_at: now,
|
||||
});
|
||||
|
||||
// Draft PRs: thread + pr_threads row created, but no triage. Triage
|
||||
// triggers when the author marks the PR as ready_for_review.
|
||||
if (pr.draft) {
|
||||
prLog(pr.number, pr.repoFullName, 'draft_thread_created', { threadTs });
|
||||
log.info('Draft PR — thread created, triage deferred', { pr: pr.number, repo: pr.repoFullName });
|
||||
return;
|
||||
}
|
||||
|
||||
// Pre-subscribe the worker to the new PR thread.
|
||||
try {
|
||||
const workerAdapter = getChannelAdapterExact('slack');
|
||||
if (workerAdapter?.subscribe) {
|
||||
await workerAdapter.subscribe(cfg.bootstrap.workerPlatformId, sessionThreadId);
|
||||
}
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- a missed pre-subscribe self-heals on the first @-mention; never block triage
|
||||
} catch (err) {
|
||||
log.warn('Failed to pre-subscribe worker to PR thread', { threadTs, err });
|
||||
}
|
||||
|
||||
// Refresh repo mirror so the worker has up-to-date code for codebase searches
|
||||
await refreshRepoMirror();
|
||||
|
||||
prLog(pr.number, pr.repoFullName, 'diff_fetching');
|
||||
const diff = await fetchDiff(pr.repoFullName, pr.number);
|
||||
prLog(pr.number, pr.repoFullName, 'diff_fetched', { length: diff.length });
|
||||
const content = [
|
||||
triageDirective(),
|
||||
'',
|
||||
`## Pull Request #${pr.number}: ${pr.title}`,
|
||||
`**Author:** ${pr.author}`,
|
||||
`**Repository:** ${pr.repoFullName}`,
|
||||
`**URL:** ${pr.htmlUrl}`,
|
||||
'',
|
||||
'### Description',
|
||||
pr.body || '(no description)',
|
||||
'',
|
||||
'### Diff',
|
||||
'```diff',
|
||||
diff,
|
||||
'```',
|
||||
'',
|
||||
`[PR_CONTEXT: channel=${cfg.bootstrap.workerPlatformId} thread=${threadTs} repo=${pr.repoFullName} pr=${pr.number}]`,
|
||||
].join('\n');
|
||||
|
||||
writeSessionMessage(cfg.bootstrap.workerAgentGroupId, session.id, {
|
||||
id: generateId('msg-pr'),
|
||||
kind: 'chat',
|
||||
timestamp: now,
|
||||
platformId: cfg.bootstrap.workerPlatformId,
|
||||
channelType: 'slack',
|
||||
threadId: sessionThreadId,
|
||||
content: JSON.stringify({ text: content, sender: 'GitHub', senderId: 'github-webhook' }),
|
||||
});
|
||||
|
||||
prLog(pr.number, pr.repoFullName, 'session_bootstrapped', { sessionId: session.id, threadTs });
|
||||
log.info('PR session bootstrapped', {
|
||||
sessionId: session.id,
|
||||
pr: pr.number,
|
||||
repo: pr.repoFullName,
|
||||
threadTs,
|
||||
});
|
||||
|
||||
const fresh = getSession(session.id);
|
||||
if (fresh) {
|
||||
prLog(pr.number, pr.repoFullName, 'container_waking');
|
||||
await wakeContainer(fresh);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle a synchronize event (new commits pushed to an existing PR).
|
||||
* Kills the old container, creates a fresh session in the same Slack thread,
|
||||
* and re-triggers triage with the updated diff.
|
||||
*/
|
||||
async function handleSynchronize(pr: PREvent, existing: PrThread, cfg: HandlerConfig): Promise<void> {
|
||||
prLog(pr.number, pr.repoFullName, 'synchronize', { oldSessionId: existing.session_id });
|
||||
// Kill the old container + session
|
||||
const oldSession = getSession(existing.session_id);
|
||||
if (oldSession) {
|
||||
killContainer(oldSession.id, 'PR synchronize — new commits pushed');
|
||||
deleteSession(oldSession.id);
|
||||
}
|
||||
|
||||
// Create a fresh session in the same thread. The worker's row is the
|
||||
// default Slack instance — pass it explicitly for an exact lookup that
|
||||
// can never resolve a sibling instance's row on the same channel.
|
||||
const sessionThreadId = `${existing.channel_id}:${existing.thread_ts}`;
|
||||
const messagingGroup = getMessagingGroupByPlatform('slack', existing.channel_id, 'slack');
|
||||
if (!messagingGroup) {
|
||||
log.warn('PR synchronize: no messaging group for channel', { channelId: existing.channel_id });
|
||||
return;
|
||||
}
|
||||
|
||||
const { session } = resolveSession(
|
||||
cfg.bootstrap.workerAgentGroupId,
|
||||
messagingGroup.id,
|
||||
sessionThreadId,
|
||||
'per-thread',
|
||||
);
|
||||
|
||||
// Update pr_threads to point to the new session
|
||||
updatePrThreadSession(existing.channel_id, existing.thread_ts, session.id);
|
||||
|
||||
// Refresh repo mirror + fetch fresh diff
|
||||
await refreshRepoMirror();
|
||||
const diff = await fetchDiff(pr.repoFullName, pr.number);
|
||||
const now = new Date().toISOString();
|
||||
|
||||
const content = [
|
||||
`New commits pushed — re-triage PR #${pr.number}. ${triageDirective()}`,
|
||||
'',
|
||||
`## Pull Request #${pr.number}: ${pr.title}`,
|
||||
`**Author:** ${pr.author}`,
|
||||
`**Repository:** ${pr.repoFullName}`,
|
||||
`**URL:** ${pr.htmlUrl}`,
|
||||
'',
|
||||
'### Diff (updated)',
|
||||
'```diff',
|
||||
diff,
|
||||
'```',
|
||||
'',
|
||||
`[PR_CONTEXT: channel=${existing.channel_id} thread=${existing.thread_ts} repo=${pr.repoFullName} pr=${pr.number}]`,
|
||||
].join('\n');
|
||||
|
||||
writeSessionMessage(cfg.bootstrap.workerAgentGroupId, session.id, {
|
||||
id: generateId('msg-sync'),
|
||||
kind: 'chat',
|
||||
timestamp: now,
|
||||
platformId: existing.channel_id,
|
||||
channelType: 'slack',
|
||||
threadId: sessionThreadId,
|
||||
content: JSON.stringify({ text: content, sender: 'GitHub', senderId: 'github-webhook' }),
|
||||
});
|
||||
|
||||
prLog(pr.number, pr.repoFullName, 'synchronize_bootstrapped', { sessionId: session.id });
|
||||
log.info('PR synchronize: session re-bootstrapped in same thread', {
|
||||
pr: pr.number,
|
||||
repo: pr.repoFullName,
|
||||
sessionId: session.id,
|
||||
threadTs: existing.thread_ts,
|
||||
});
|
||||
|
||||
const fresh = getSession(session.id);
|
||||
if (fresh) {
|
||||
prLog(pr.number, pr.repoFullName, 'container_waking');
|
||||
await wakeContainer(fresh);
|
||||
}
|
||||
}
|
||||
+148
@@ -0,0 +1,148 @@
|
||||
/**
|
||||
* PR Factory module — Slack edition.
|
||||
*
|
||||
* Listens for GitHub `pull_request.opened` events on the shared webhook
|
||||
* server, opens a Slack thread in the configured channel, spins up a
|
||||
* per-thread session under the PR Factory Worker agent group, and seeds
|
||||
* it with the PR diff + a triage instruction.
|
||||
*
|
||||
* If a supervisor admin channel is configured (and the second Slack app
|
||||
* is set up), also bootstraps the PR Factory Supervisor agent group with
|
||||
* wirings to its admin channel and to the worker's PR channel.
|
||||
*
|
||||
* Inert if `GITHUB_WEBHOOK_SECRET` is unset.
|
||||
*
|
||||
* Optional sibling components plug into seams owned here and degrade
|
||||
* gracefully when absent:
|
||||
* - gh-action-approval → setGhActionHandler (gh-action.ts)
|
||||
* - vm-test-orchestrator → registerTestOrchestrator (test-orchestration.ts)
|
||||
* - slack-canvas → registerCanvasProvider (canvas.ts)
|
||||
*
|
||||
* Env vars (read from .env):
|
||||
* GITHUB_WEBHOOK_SECRET — required to enable the module
|
||||
* PR_FACTORY_SLACK_CHANNEL_ID — bare Slack channel id for PR threads
|
||||
* SLACK_BOT_TOKEN — worker bot token (reused from /add-slack)
|
||||
* PR_FACTORY_SUPERVISOR_SLACK_CHANNEL_ID — optional: enables supervisor wiring
|
||||
* SLACK_SUPERVISOR_BOT_TOKEN — required when supervisor enabled
|
||||
* SLACK_SUPERVISOR_SIGNING_SECRET — required when supervisor enabled
|
||||
*/
|
||||
import { readEnvFile } from '../../env.js';
|
||||
import { onShutdown } from '../../response-registry.js';
|
||||
import { onDeliveryAdapterReady, registerDeliveryAction } from '../../delivery.js';
|
||||
import { log } from '../../log.js';
|
||||
import { registerApprovalResolvedHandler } from '../approvals/primitive.js';
|
||||
import { TESTER_INSTANCE } from '../../channels/slack-tester.js';
|
||||
import { getMessagingGroupByPlatform } from '../../db/messaging-groups.js';
|
||||
import { clearAwaitingApproval } from './reactions.js';
|
||||
import { registerGitHubWebhook } from './webhook.js';
|
||||
import { handlePullRequest } from './handler.js';
|
||||
import { bootstrapPrFactory, TESTER_FOLDER } from './bootstrap.js';
|
||||
import { getTestOrchestrator } from './test-orchestration.js';
|
||||
import { dispatchGhAction } from './gh-action.js';
|
||||
import { initOrchestrator, shutdownOrchestrator, handleTestResults } from './orchestrator.js';
|
||||
import { handleSendToTesting } from './testing-approval.js';
|
||||
import { handleProposeSkillEdit } from './skill-edit-approval.js';
|
||||
import { getAgentGroupByFolder } from '../../db/agent-groups.js';
|
||||
|
||||
const env = readEnvFile([
|
||||
'GITHUB_WEBHOOK_SECRET',
|
||||
'PR_FACTORY_SLACK_CHANNEL_ID',
|
||||
'PR_FACTORY_SUPERVISOR_SLACK_CHANNEL_ID',
|
||||
'SLACK_BOT_TOKEN',
|
||||
]);
|
||||
|
||||
const GITHUB_WEBHOOK_SECRET = process.env.GITHUB_WEBHOOK_SECRET || env.GITHUB_WEBHOOK_SECRET || '';
|
||||
const WORKER_CHANNEL_ID = process.env.PR_FACTORY_SLACK_CHANNEL_ID || env.PR_FACTORY_SLACK_CHANNEL_ID || '';
|
||||
const SUPERVISOR_CHANNEL_ID =
|
||||
process.env.PR_FACTORY_SUPERVISOR_SLACK_CHANNEL_ID || env.PR_FACTORY_SUPERVISOR_SLACK_CHANNEL_ID || '';
|
||||
const SLACK_BOT_TOKEN = process.env.SLACK_BOT_TOKEN || env.SLACK_BOT_TOKEN || '';
|
||||
|
||||
// Clear the 👀 awaiting-approval reaction on a PR thread when an admin
|
||||
// rejects an approval card. Approve paths clear it inside each approval
|
||||
// handler; the reject path resolves in core, so we observe it via the
|
||||
// approvals module's resolved hook. No-op for sessions without a
|
||||
// pr_threads row. (The event's userId is namespaced `<channel>:<handle>`;
|
||||
// this consumer ignores it.)
|
||||
registerApprovalResolvedHandler(async ({ session, outcome }) => {
|
||||
if (outcome !== 'reject') return;
|
||||
await clearAwaitingApproval(session);
|
||||
});
|
||||
|
||||
if (!GITHUB_WEBHOOK_SECRET) {
|
||||
log.debug('PR factory: GITHUB_WEBHOOK_SECRET not set, module disabled');
|
||||
} else if (!WORKER_CHANNEL_ID) {
|
||||
log.warn('PR factory: GITHUB_WEBHOOK_SECRET set but PR_FACTORY_SLACK_CHANNEL_ID missing');
|
||||
} else if (!SLACK_BOT_TOKEN) {
|
||||
log.warn('PR factory: GITHUB_WEBHOOK_SECRET set but SLACK_BOT_TOKEN missing (run /add-slack first)');
|
||||
} else {
|
||||
// Register supervisor MCP action handlers up front — they don't depend on
|
||||
// adapter readiness and may need to fire as soon as a session is alive.
|
||||
// The container tools omit `repo` when the agent doesn't pass one — the
|
||||
// default is applied HERE, host-side, from PR_FACTORY_DEFAULT_REPO (the
|
||||
// container never sees that env var). pr_submit_test_results applies the
|
||||
// same default inside handleTestResults. pr_gh dispatches through the
|
||||
// gh-action seam so the agent gets feedback even when the
|
||||
// gh-action-approval component isn't installed.
|
||||
registerDeliveryAction('pr_send_to_testing', async (content, session) => {
|
||||
await handleSendToTesting(content, session);
|
||||
});
|
||||
registerDeliveryAction('pr_propose_skill_edit', async (content, session) => {
|
||||
await handleProposeSkillEdit(content, session);
|
||||
});
|
||||
registerDeliveryAction('pr_gh', async (content, session) => {
|
||||
await dispatchGhAction(content, session);
|
||||
});
|
||||
registerDeliveryAction('pr_submit_test_results', async (content, session) => {
|
||||
await handleTestResults(content, session);
|
||||
});
|
||||
|
||||
// Wait for delivery adapters so the Slack adapter is connected before we
|
||||
// try to look up its messaging_groups row in bootstrap.
|
||||
onDeliveryAdapterReady(() => {
|
||||
log.info('PR factory: onDeliveryAdapterReady callback fired');
|
||||
const bootstrap = bootstrapPrFactory({
|
||||
workerChannelId: WORKER_CHANNEL_ID,
|
||||
supervisorChannelId: SUPERVISOR_CHANNEL_ID || undefined,
|
||||
});
|
||||
|
||||
registerGitHubWebhook(GITHUB_WEBHOOK_SECRET, (pr) =>
|
||||
handlePullRequest(pr, {
|
||||
workerBotToken: SLACK_BOT_TOKEN,
|
||||
workerChannelId: WORKER_CHANNEL_ID,
|
||||
bootstrap,
|
||||
}),
|
||||
);
|
||||
|
||||
// Initialize the coordination layer when BOTH the vm-test-orchestrator
|
||||
// component is installed (registered its module at import time) AND the
|
||||
// operator-created tester agent group + its PR-channel wiring exist.
|
||||
const workerPlatformId = `slack:${WORKER_CHANNEL_ID}`;
|
||||
const testOrchestrator = getTestOrchestrator();
|
||||
const testerAg = getAgentGroupByFolder(TESTER_FOLDER);
|
||||
const testerMg = testerAg ? getMessagingGroupByPlatform('slack', workerPlatformId, TESTER_INSTANCE) : undefined;
|
||||
if (testOrchestrator && testerAg && testerMg) {
|
||||
initOrchestrator(testOrchestrator, testerAg.id, testerMg.id);
|
||||
log.info('Test orchestrator initialized', {
|
||||
testerAgentGroupId: testerAg.id,
|
||||
testerMessagingGroupId: testerMg.id,
|
||||
});
|
||||
} else {
|
||||
log.info('Test orchestrator disabled', {
|
||||
componentInstalled: !!testOrchestrator,
|
||||
testerAgentGroup: !!testerAg,
|
||||
testerMessagingGroup: !!testerMg,
|
||||
});
|
||||
}
|
||||
|
||||
onShutdown(async () => {
|
||||
shutdownOrchestrator();
|
||||
await getTestOrchestrator()?.shutdown();
|
||||
});
|
||||
|
||||
log.info('PR factory module started', {
|
||||
workerChannel: WORKER_CHANNEL_ID,
|
||||
supervisorEnabled: !!SUPERVISOR_CHANNEL_ID,
|
||||
testOrchestratorEnabled: !!(testOrchestrator && testerAg && testerMg),
|
||||
});
|
||||
});
|
||||
}
|
||||
+207
@@ -0,0 +1,207 @@
|
||||
/**
|
||||
* pr-factory-core guard — the orchestrator's consumption of the two-DB
|
||||
* session seam: writeOutboundDirect into the worker session's outbound.db
|
||||
* (depends on core's read-write outbound open), writeSessionMessage +
|
||||
* wakeContainer for the inbound trigger, and resolveSession against the
|
||||
* tester-instance messaging group.
|
||||
*
|
||||
* The VM control plane is a stub conforming to the TestOrchestratorModule
|
||||
* seam — its callbacks are captured from initOrchestrator's init() call and
|
||||
* driven directly. Real central DB, real on-disk session DBs. No canvas
|
||||
* provider is registered, so summaries take the plain-text fallback.
|
||||
*/
|
||||
import Database from 'better-sqlite3';
|
||||
import fs from 'fs';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
vi.mock('../../container-runner.js', () => ({
|
||||
wakeContainer: vi.fn().mockResolvedValue(undefined),
|
||||
isContainerRunning: vi.fn().mockReturnValue(false),
|
||||
killContainer: vi.fn(),
|
||||
buildAgentGroupImage: vi.fn().mockResolvedValue(undefined),
|
||||
}));
|
||||
|
||||
vi.mock('../../config.js', async () => {
|
||||
const actual = await vi.importActual<typeof import('../../config.js')>('../../config.js');
|
||||
return {
|
||||
...actual,
|
||||
DATA_DIR: '/tmp/nanoclaw-test-prf-orch/data',
|
||||
GROUPS_DIR: '/tmp/nanoclaw-test-prf-orch/groups',
|
||||
};
|
||||
});
|
||||
|
||||
vi.mock('./activity-log.js', () => ({ prLog: vi.fn() }));
|
||||
|
||||
import { closeDb, createAgentGroup, createMessagingGroup, initTestDb, runMigrations } from '../../db/index.js';
|
||||
import { createPrThread } from '../../db/pr-threads.js';
|
||||
import { inboundDbPath, outboundDbPath, resolveSession } from '../../session-manager.js';
|
||||
import { wakeContainer } from '../../container-runner.js';
|
||||
import { handleTestResults, initOrchestrator, shutdownOrchestrator } from './orchestrator.js';
|
||||
import type { OrchestratorCallbacks, TestOrchestratorModule } from './test-orchestration.js';
|
||||
import type { Session } from '../../types.js';
|
||||
|
||||
const TEST_DIR = '/tmp/nanoclaw-test-prf-orch';
|
||||
const REPO = 'acme/widgets';
|
||||
const CHANNEL_ID = 'slack:C0WORK';
|
||||
const THREAD_TS = '1700000000.000100';
|
||||
|
||||
let callbacks: OrchestratorCallbacks;
|
||||
let testOrchStub: {
|
||||
init: ReturnType<typeof vi.fn>;
|
||||
completeRun: ReturnType<typeof vi.fn>;
|
||||
cancelRun: ReturnType<typeof vi.fn>;
|
||||
};
|
||||
let workerSession: Session;
|
||||
|
||||
function now(): string {
|
||||
return new Date().toISOString();
|
||||
}
|
||||
|
||||
function readRows(dbPath: string, table: string): Array<{ kind: string; content: string }> {
|
||||
const db = new Database(dbPath, { readonly: true });
|
||||
const rows = db.prepare(`SELECT kind, content FROM ${table} ORDER BY rowid`).all() as Array<{
|
||||
kind: string;
|
||||
content: string;
|
||||
}>;
|
||||
db.close();
|
||||
return rows;
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
if (fs.existsSync(TEST_DIR)) fs.rmSync(TEST_DIR, { recursive: true });
|
||||
fs.mkdirSync(TEST_DIR, { recursive: true });
|
||||
const db = initTestDb();
|
||||
runMigrations(db);
|
||||
|
||||
createAgentGroup({
|
||||
id: 'ag-worker',
|
||||
name: 'Worker',
|
||||
folder: 'pr-factory-worker',
|
||||
agent_provider: null,
|
||||
created_at: now(),
|
||||
});
|
||||
createAgentGroup({ id: 'ag-tester', name: 'Tester', folder: 'pr-tester', agent_provider: null, created_at: now() });
|
||||
createMessagingGroup({
|
||||
id: 'mg-worker',
|
||||
channel_type: 'slack',
|
||||
platform_id: CHANNEL_ID,
|
||||
name: null,
|
||||
is_group: 1,
|
||||
unknown_sender_policy: 'public',
|
||||
created_at: now(),
|
||||
});
|
||||
createMessagingGroup({
|
||||
id: 'mg-tester',
|
||||
channel_type: 'slack',
|
||||
platform_id: CHANNEL_ID,
|
||||
instance: 'slack-tester',
|
||||
name: null,
|
||||
is_group: 1,
|
||||
unknown_sender_policy: 'public',
|
||||
created_at: now(),
|
||||
});
|
||||
|
||||
workerSession = resolveSession('ag-worker', 'mg-worker', `${CHANNEL_ID}:${THREAD_TS}`, 'per-thread').session;
|
||||
createPrThread({
|
||||
channel_id: CHANNEL_ID,
|
||||
thread_ts: THREAD_TS,
|
||||
channel_type: 'slack',
|
||||
repo_full_name: REPO,
|
||||
pr_number: 42,
|
||||
session_id: workerSession.id,
|
||||
created_at: now(),
|
||||
});
|
||||
|
||||
testOrchStub = {
|
||||
init: vi.fn((cbs: OrchestratorCallbacks) => {
|
||||
callbacks = cbs;
|
||||
}),
|
||||
completeRun: vi.fn(),
|
||||
cancelRun: vi.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
initOrchestrator(testOrchStub as unknown as TestOrchestratorModule, 'ag-tester', 'mg-tester');
|
||||
expect(testOrchStub.init).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
shutdownOrchestrator();
|
||||
vi.clearAllMocks();
|
||||
closeDb();
|
||||
if (fs.existsSync(TEST_DIR)) fs.rmSync(TEST_DIR, { recursive: true });
|
||||
});
|
||||
|
||||
describe('orchestrator', () => {
|
||||
it('onVmReady resolves a tester session in the PR thread, writes the plan trigger, and wakes it', async () => {
|
||||
await callbacks.onVmReady(42, REPO, 'test-vm-42.example.test', '## Test Plan');
|
||||
|
||||
const tester = resolveSession('ag-tester', 'mg-tester', `${CHANNEL_ID}:${THREAD_TS}`, 'per-thread');
|
||||
expect(tester.created).toBe(false); // already created by onVmReady
|
||||
|
||||
const inbound = readRows(inboundDbPath('ag-tester', tester.session.id), 'messages_in');
|
||||
expect(inbound).toHaveLength(1);
|
||||
const text = (JSON.parse(inbound[0].content) as { text: string }).text;
|
||||
expect(text).toContain('test-vm-42.example.test');
|
||||
expect(text).toContain('## Test Plan');
|
||||
expect(text).toContain(`[PR_CONTEXT: channel=${CHANNEL_ID} thread=${THREAD_TS} repo=${REPO} pr=42]`);
|
||||
expect(vi.mocked(wakeContainer)).toHaveBeenCalledTimes(1);
|
||||
|
||||
// Clear the 30-minute timeout armed by onVmReady.
|
||||
await handleTestResults({ pr_number: 42, repo: REPO, verdict: 'PASS', content: 'all good' }, workerSession);
|
||||
});
|
||||
|
||||
it('PASS verdict: posts the summary to the worker outbound.db and wakes the worker to propose merge', async () => {
|
||||
await handleTestResults({ pr_number: 42, repo: REPO, verdict: 'PASS', content: 'all good' }, workerSession);
|
||||
|
||||
expect(testOrchStub.completeRun).toHaveBeenCalledWith(42);
|
||||
|
||||
const outbound = readRows(outboundDbPath('ag-worker', workerSession.id), 'messages_out');
|
||||
expect(outbound).toHaveLength(1);
|
||||
expect(outbound[0].kind).toBe('chat');
|
||||
expect((JSON.parse(outbound[0].content) as { text: string }).text).toContain('PASS');
|
||||
|
||||
const inbound = readRows(inboundDbPath('ag-worker', workerSession.id), 'messages_in');
|
||||
expect(inbound).toHaveLength(1);
|
||||
const text = (JSON.parse(inbound[0].content) as { text: string }).text;
|
||||
expect(text).toContain('Propose merge');
|
||||
expect(text).toContain(`[PR_CONTEXT: channel=${CHANNEL_ID} thread=${THREAD_TS} repo=${REPO} pr=42]`);
|
||||
expect(vi.mocked(wakeContainer)).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('FAIL verdict: wakes the worker to analyze instead of proposing merge', async () => {
|
||||
await handleTestResults({ pr_number: 42, repo: REPO, verdict: 'FAIL', content: 'test 3 failed' }, workerSession);
|
||||
|
||||
const inbound = readRows(inboundDbPath('ag-worker', workerSession.id), 'messages_in');
|
||||
expect(inbound).toHaveLength(1);
|
||||
const text = (JSON.parse(inbound[0].content) as { text: string }).text;
|
||||
expect(text).toContain('Analyze the results');
|
||||
expect(text).toContain('test 3 failed');
|
||||
expect(text).not.toContain('Propose merge');
|
||||
expect(vi.mocked(wakeContainer)).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('a second onVmReady for the same PR clears the prior timeout — no orphaned timer fires', async () => {
|
||||
vi.useFakeTimers();
|
||||
try {
|
||||
// Two VM-ready events for the same PR. The fix clears the first timer
|
||||
// before arming the second; without it the first timer is orphaned and
|
||||
// still fires at +30min, posting a spurious "Test Timeout".
|
||||
await callbacks.onVmReady(42, REPO, 'vm-a.example.test', '## Plan A');
|
||||
await callbacks.onVmReady(42, REPO, 'vm-b.example.test', '## Plan B');
|
||||
|
||||
// Results arrive and clear the single live (second) timer.
|
||||
await handleTestResults({ pr_number: 42, repo: REPO, verdict: 'PASS', content: 'all good' }, workerSession);
|
||||
|
||||
// Advance well past the 30-minute ceiling. A leaked first timer would
|
||||
// fire handleTimeout here and append a timeout message.
|
||||
await vi.advanceTimersByTimeAsync(31 * 60 * 1000);
|
||||
|
||||
const outbound = readRows(outboundDbPath('ag-worker', workerSession.id), 'messages_out');
|
||||
const timeoutMsgs = outbound.filter((r) =>
|
||||
(JSON.parse(r.content) as { text: string }).text.includes('Test Timeout'),
|
||||
);
|
||||
expect(timeoutMsgs).toHaveLength(0);
|
||||
} finally {
|
||||
vi.useRealTimers();
|
||||
}
|
||||
});
|
||||
});
|
||||
+332
@@ -0,0 +1,332 @@
|
||||
/**
|
||||
* Orchestrator — coordination layer between the (optional) test-orchestrator
|
||||
* component and NanoClaw.
|
||||
*
|
||||
* Pure NanoClaw coordination. Never SSHes into anything — the VM control
|
||||
* plane lives in the vm-test-orchestrator component, reached only through
|
||||
* the TestOrchestratorModule seam (test-orchestration.ts).
|
||||
*
|
||||
* Responsibilities:
|
||||
* - Wires callbacks into the registered test orchestrator (onVmReady, onRunFailed)
|
||||
* - Wakes the tester agent when a VM is ready
|
||||
* - Handles test results arriving via the submit_test_results MCP tool
|
||||
* - Enforces a 30-minute timeout per test run
|
||||
* - Posts results, wakes the worker to propose merge on PASS / analyze on FAIL
|
||||
*/
|
||||
import { getPrThreadByRepoPr } from '../../db/pr-threads.js';
|
||||
import { getSession } from '../../db/sessions.js';
|
||||
import { resolveSession, writeOutboundDirect, writeSessionMessage } from '../../session-manager.js';
|
||||
import { wakeContainer } from '../../container-runner.js';
|
||||
import { log } from '../../log.js';
|
||||
import { prLog } from './activity-log.js';
|
||||
import { createCanvas } from './canvas.js';
|
||||
import { DEFAULT_REPO } from './defaults.js';
|
||||
import type { TestOrchestratorModule } from './test-orchestration.js';
|
||||
import { postRetryCard } from './testing-approval.js';
|
||||
import type { Session } from '../../types.js';
|
||||
|
||||
// ── Constants ──
|
||||
|
||||
const TIMEOUT_MS = 30 * 60 * 1000; // 30 minutes
|
||||
|
||||
// ── State ──
|
||||
|
||||
let testOrch: TestOrchestratorModule | null = null;
|
||||
let testerAgGroupId: string = '';
|
||||
let testerMgId: string = '';
|
||||
const timeouts = new Map<number, ReturnType<typeof setTimeout>>();
|
||||
|
||||
function generateId(prefix: string): string {
|
||||
return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
}
|
||||
|
||||
// ── Tester agent wake ──
|
||||
|
||||
async function onVmReady(prNumber: number, repo: string, vmHost: string, planContent: string): Promise<void> {
|
||||
const pr = getPrThreadByRepoPr(repo, prNumber);
|
||||
if (!pr) {
|
||||
log.warn('onVmReady: no PR thread found', { prNumber, repo });
|
||||
return;
|
||||
}
|
||||
|
||||
const sessionThreadId = `${pr.channel_id}:${pr.thread_ts}`;
|
||||
const { session } = resolveSession(testerAgGroupId, testerMgId, sessionThreadId, 'per-thread');
|
||||
|
||||
const triggerContent = [
|
||||
`Execute the test plan below on the test VM at \`${vmHost}\`.`,
|
||||
'',
|
||||
`## Test Plan — PR #${prNumber}`,
|
||||
`**Repository:** ${repo}`,
|
||||
`**VM:** ${vmHost}`,
|
||||
'',
|
||||
planContent,
|
||||
'',
|
||||
`[PR_CONTEXT: channel=${pr.channel_id} thread=${pr.thread_ts} repo=${repo} pr=${prNumber}]`,
|
||||
].join('\n');
|
||||
|
||||
const now = new Date().toISOString();
|
||||
writeSessionMessage(testerAgGroupId, session.id, {
|
||||
id: generateId('msg-test'),
|
||||
kind: 'chat',
|
||||
timestamp: now,
|
||||
platformId: pr.channel_id,
|
||||
channelType: 'slack',
|
||||
threadId: sessionThreadId,
|
||||
content: JSON.stringify({
|
||||
text: triggerContent,
|
||||
sender: 'Test Orchestrator',
|
||||
senderId: 'test-orchestrator',
|
||||
}),
|
||||
});
|
||||
|
||||
const fresh = getSession(session.id);
|
||||
if (fresh) {
|
||||
await wakeContainer(fresh);
|
||||
}
|
||||
|
||||
// Start 30-min timeout. Clear any timer already armed for this PR first —
|
||||
// a re-run (e.g. a second VM-ready for the same PR) would otherwise orphan
|
||||
// the previous timer, which then fires against the wrong run and is never
|
||||
// cleared. Keyed by prNumber (a factory instance serves one repo).
|
||||
const prev = timeouts.get(prNumber);
|
||||
if (prev) clearTimeout(prev);
|
||||
const timer = setTimeout(() => {
|
||||
handleTimeout(prNumber, repo).catch((err) => log.error('Timeout handler error', { prNumber, err }));
|
||||
}, TIMEOUT_MS);
|
||||
timeouts.set(prNumber, timer);
|
||||
|
||||
prLog(prNumber, repo, 'vm_ready', { vmHost, sessionId: session.id });
|
||||
log.info('Tester agent woken', { prNumber, vmHost, sessionId: session.id });
|
||||
}
|
||||
|
||||
async function onRunFailed(prNumber: number, repo: string, reason: string, planContent: string): Promise<void> {
|
||||
const pr = getPrThreadByRepoPr(repo, prNumber);
|
||||
if (!pr) {
|
||||
log.warn('onRunFailed: no PR thread found', { prNumber, repo });
|
||||
return;
|
||||
}
|
||||
|
||||
const session = getSession(pr.session_id);
|
||||
if (!session) {
|
||||
log.warn('onRunFailed: worker session not found', { prNumber, sessionId: pr.session_id });
|
||||
return;
|
||||
}
|
||||
|
||||
const sessionThreadId = `${pr.channel_id}:${pr.thread_ts}`;
|
||||
const text = `\n━━━ ❌ Test Setup Failed ━━━━━━━━\n\nPR #${prNumber}: ${reason}`;
|
||||
|
||||
writeOutboundDirect(session.agent_group_id, session.id, {
|
||||
id: generateId('test-fail'),
|
||||
kind: 'chat',
|
||||
platformId: pr.channel_id,
|
||||
channelType: pr.channel_type,
|
||||
threadId: sessionThreadId,
|
||||
content: JSON.stringify({ text }),
|
||||
});
|
||||
|
||||
// Post retry card after a short delay so the error message is delivered first
|
||||
await new Promise((r) => setTimeout(r, 1000));
|
||||
await postRetryCard(session, prNumber, repo, planContent);
|
||||
|
||||
prLog(prNumber, repo, 'vm_setup_failed', { reason });
|
||||
log.info('VM setup failure posted to PR thread', { prNumber, reason });
|
||||
}
|
||||
|
||||
// ── Timeout ──
|
||||
|
||||
async function handleTimeout(prNumber: number, repo: string): Promise<void> {
|
||||
timeouts.delete(prNumber);
|
||||
prLog(prNumber, repo, 'test_timeout', { timeoutMs: TIMEOUT_MS });
|
||||
log.warn('Test run timed out', { prNumber, timeoutMs: TIMEOUT_MS });
|
||||
|
||||
if (testOrch) {
|
||||
await testOrch.cancelRun(prNumber);
|
||||
}
|
||||
|
||||
const pr = getPrThreadByRepoPr(repo, prNumber);
|
||||
if (!pr) return;
|
||||
|
||||
const session = getSession(pr.session_id);
|
||||
if (!session) return;
|
||||
|
||||
const sessionThreadId = `${pr.channel_id}:${pr.thread_ts}`;
|
||||
writeOutboundDirect(session.agent_group_id, session.id, {
|
||||
id: generateId('timeout'),
|
||||
kind: 'chat',
|
||||
platformId: pr.channel_id,
|
||||
channelType: pr.channel_type,
|
||||
threadId: sessionThreadId,
|
||||
content: JSON.stringify({
|
||||
text: `\n━━━ ⏰ Test Timeout ━━━━━━━━━━━━━━\n\nPR #${prNumber}: no results after ${TIMEOUT_MS / 60_000} minutes. Test VM destroyed.`,
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
// ── Result handling (delivery action for pr_submit_test_results) ──
|
||||
|
||||
export async function handleTestResults(content: Record<string, unknown>, _session: Session): Promise<void> {
|
||||
const prNumber = content.pr_number as number;
|
||||
const repo = (content.repo as string) || DEFAULT_REPO;
|
||||
const verdict = content.verdict as string;
|
||||
const resultContent = content.content as string;
|
||||
|
||||
prLog(prNumber || 0, repo, 'test_results_received', { verdict });
|
||||
if (!prNumber || !verdict || !resultContent) {
|
||||
log.warn('handleTestResults: missing required fields', { prNumber, verdict: !!verdict, content: !!resultContent });
|
||||
return;
|
||||
}
|
||||
|
||||
// Cancel timeout
|
||||
const timer = timeouts.get(prNumber);
|
||||
if (timer) {
|
||||
clearTimeout(timer);
|
||||
timeouts.delete(prNumber);
|
||||
}
|
||||
|
||||
// Mark run complete in the test orchestrator (VM stays alive for investigation)
|
||||
if (testOrch) {
|
||||
testOrch.completeRun(prNumber);
|
||||
}
|
||||
|
||||
// Look up worker session via PR thread
|
||||
const pr = getPrThreadByRepoPr(repo, prNumber);
|
||||
if (!pr) {
|
||||
log.warn('handleTestResults: no PR thread found', { prNumber, repo });
|
||||
return;
|
||||
}
|
||||
|
||||
const workerSession = getSession(pr.session_id);
|
||||
if (!workerSession) {
|
||||
log.warn('handleTestResults: worker session not found', { prNumber, sessionId: pr.session_id });
|
||||
return;
|
||||
}
|
||||
|
||||
const sessionThreadId = `${pr.channel_id}:${pr.thread_ts}`;
|
||||
|
||||
// Render the results as a canvas when the slack-canvas component is
|
||||
// installed; otherwise the plain summary carries the verdict.
|
||||
const bareChannel = pr.channel_id.replace(/^slack:/, '');
|
||||
const canvas = await createCanvas(`Test Results — PR #${prNumber}`, resultContent, bareChannel);
|
||||
|
||||
const summaryLine = `Verdict: **${verdict}**`;
|
||||
let summary: string;
|
||||
if (canvas) {
|
||||
summary = `\n━━━ ✅ Test Results ━━━━━━━━━━━━━━\n\n${summaryLine}\n\n[View test results](${canvas.permalink})`;
|
||||
} else {
|
||||
summary = `\n━━━ ✅ Test Results ━━━━━━━━━━━━━━\n\n${summaryLine}`;
|
||||
}
|
||||
|
||||
const msgId = generateId('test-result');
|
||||
writeOutboundDirect(workerSession.agent_group_id, workerSession.id, {
|
||||
id: msgId,
|
||||
kind: 'chat',
|
||||
platformId: pr.channel_id,
|
||||
channelType: pr.channel_type,
|
||||
threadId: sessionThreadId,
|
||||
content: JSON.stringify({ text: summary }),
|
||||
});
|
||||
|
||||
// Verdict-based action — wake the worker for both PASS and FAIL.
|
||||
// Never auto-propose merge from the orchestrator: the test results
|
||||
// chat message (writeOutboundDirect above) triggers a Slack event that
|
||||
// wakes the worker via mention-sticky, so both the orchestrator and
|
||||
// the worker would propose merge, producing duplicate approval cards.
|
||||
if (verdict === 'PASS') {
|
||||
const passPrompt = [
|
||||
`Test results are back for PR #${prNumber}: verdict **${verdict}**. All tests passed.`,
|
||||
'',
|
||||
'Propose merge via `credentialed_gh`.',
|
||||
'',
|
||||
`[PR_CONTEXT: channel=${pr.channel_id} thread=${pr.thread_ts} repo=${pr.repo_full_name} pr=${prNumber}]`,
|
||||
].join('\n');
|
||||
|
||||
const now = new Date().toISOString();
|
||||
writeSessionMessage(workerSession.agent_group_id, workerSession.id, {
|
||||
id: generateId('test-pass'),
|
||||
kind: 'chat',
|
||||
timestamp: now,
|
||||
platformId: pr.channel_id,
|
||||
channelType: pr.channel_type,
|
||||
threadId: sessionThreadId,
|
||||
content: JSON.stringify({
|
||||
text: passPrompt,
|
||||
sender: 'Test Orchestrator',
|
||||
senderId: 'test-orchestrator',
|
||||
}),
|
||||
});
|
||||
|
||||
const freshWorker = getSession(workerSession.id);
|
||||
if (freshWorker) {
|
||||
await wakeContainer(freshWorker);
|
||||
}
|
||||
prLog(prNumber, repo, 'worker_woken_for_merge');
|
||||
log.info('Worker woken to propose merge after tests passed', { prNumber, verdict });
|
||||
} else {
|
||||
// FAIL or PARTIAL — wake worker to analyze
|
||||
const failurePrompt = [
|
||||
`Test results are back for PR #${prNumber}: verdict **${verdict}**.`,
|
||||
'',
|
||||
'Analyze the results and determine if failures are PR-related or pre-existing/environmental.',
|
||||
'Post a ONE-LINE conclusion, then take the appropriate action (merge anyway, request fixes, or close). No preamble.',
|
||||
'',
|
||||
'### Test Results',
|
||||
'```',
|
||||
resultContent,
|
||||
'```',
|
||||
'',
|
||||
`[PR_CONTEXT: channel=${pr.channel_id} thread=${pr.thread_ts} repo=${pr.repo_full_name} pr=${prNumber}]`,
|
||||
].join('\n');
|
||||
|
||||
const now = new Date().toISOString();
|
||||
writeSessionMessage(workerSession.agent_group_id, workerSession.id, {
|
||||
id: generateId('test-analysis'),
|
||||
kind: 'chat',
|
||||
timestamp: now,
|
||||
platformId: pr.channel_id,
|
||||
channelType: pr.channel_type,
|
||||
threadId: sessionThreadId,
|
||||
content: JSON.stringify({
|
||||
text: failurePrompt,
|
||||
sender: 'Test Orchestrator',
|
||||
senderId: 'test-orchestrator',
|
||||
}),
|
||||
});
|
||||
|
||||
const freshWorker = getSession(workerSession.id);
|
||||
if (freshWorker) {
|
||||
await wakeContainer(freshWorker);
|
||||
}
|
||||
prLog(prNumber, repo, 'worker_woken_for_analysis', { verdict });
|
||||
log.info('Worker woken to analyze test results', { prNumber, verdict });
|
||||
}
|
||||
|
||||
log.info('Test results processed', { prNumber, verdict });
|
||||
}
|
||||
|
||||
// ── Init / shutdown ──
|
||||
|
||||
export function initOrchestrator(
|
||||
testOrchestratorRef: TestOrchestratorModule,
|
||||
testerAgentGroupId: string,
|
||||
testerMessagingGroupId: string,
|
||||
): void {
|
||||
testOrch = testOrchestratorRef;
|
||||
testerAgGroupId = testerAgentGroupId;
|
||||
testerMgId = testerMessagingGroupId;
|
||||
|
||||
testOrch.init({
|
||||
onVmReady,
|
||||
onRunFailed,
|
||||
});
|
||||
|
||||
log.info('Orchestrator initialized', { testerAgentGroupId, testerMessagingGroupId });
|
||||
}
|
||||
|
||||
export function shutdownOrchestrator(): void {
|
||||
for (const timer of timeouts.values()) {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
timeouts.clear();
|
||||
testOrch = null;
|
||||
log.info('Orchestrator shut down');
|
||||
}
|
||||
+85
@@ -0,0 +1,85 @@
|
||||
/**
|
||||
* Slack reaction helpers for PR thread status indicators.
|
||||
*
|
||||
* 🟢 open — added on thread creation
|
||||
* 🔴 closed — swapped in on close
|
||||
* 🟣 merged — swapped in on merge
|
||||
* 👀 awaiting approval — added when an approval card is posted, removed when acted on
|
||||
*/
|
||||
import { readEnvFile } from '../../env.js';
|
||||
import { log } from '../../log.js';
|
||||
import { getPrThreadBySession } from '../../db/pr-threads.js';
|
||||
import type { Session } from '../../types.js';
|
||||
|
||||
export const EMOJI_OPEN = 'large_green_circle';
|
||||
export const EMOJI_CLOSED = 'red_circle';
|
||||
export const EMOJI_MERGED = 'large_purple_circle';
|
||||
export const EMOJI_DRAFT = 'white_circle';
|
||||
export const EMOJI_AWAITING = 'warning';
|
||||
|
||||
let cachedBotToken: string | null = null;
|
||||
export function getBotToken(): string {
|
||||
if (!cachedBotToken) {
|
||||
const env = readEnvFile(['SLACK_BOT_TOKEN']);
|
||||
cachedBotToken = env.SLACK_BOT_TOKEN || '';
|
||||
}
|
||||
return cachedBotToken;
|
||||
}
|
||||
|
||||
export async function addReaction(
|
||||
botToken: string,
|
||||
channelId: string,
|
||||
timestamp: string,
|
||||
emoji: string,
|
||||
): Promise<void> {
|
||||
try {
|
||||
const res = await fetch('https://slack.com/api/reactions.add', {
|
||||
method: 'POST',
|
||||
headers: { Authorization: `Bearer ${botToken}`, 'Content-Type': 'application/json; charset=utf-8' },
|
||||
body: JSON.stringify({ channel: channelId, timestamp, name: emoji }),
|
||||
});
|
||||
const body = (await res.json()) as { ok: boolean; error?: string };
|
||||
if (!body.ok && body.error !== 'already_reacted') {
|
||||
log.warn('Slack reactions.add failed', { emoji, error: body.error });
|
||||
}
|
||||
} catch (err) {
|
||||
log.warn('Slack reactions.add error', { emoji, err });
|
||||
}
|
||||
}
|
||||
|
||||
export async function removeReaction(
|
||||
botToken: string,
|
||||
channelId: string,
|
||||
timestamp: string,
|
||||
emoji: string,
|
||||
): Promise<void> {
|
||||
try {
|
||||
const res = await fetch('https://slack.com/api/reactions.remove', {
|
||||
method: 'POST',
|
||||
headers: { Authorization: `Bearer ${botToken}`, 'Content-Type': 'application/json; charset=utf-8' },
|
||||
body: JSON.stringify({ channel: channelId, timestamp, name: emoji }),
|
||||
});
|
||||
const body = (await res.json()) as { ok: boolean; error?: string };
|
||||
if (!body.ok && body.error !== 'no_reaction') {
|
||||
log.warn('Slack reactions.remove failed', { emoji, error: body.error });
|
||||
}
|
||||
} catch (err) {
|
||||
log.warn('Slack reactions.remove error', { emoji, err });
|
||||
}
|
||||
}
|
||||
|
||||
/** Add 👀 to a PR thread opener to signal it needs attention. */
|
||||
export async function markAwaitingApproval(session: Session): Promise<void> {
|
||||
const pr = getPrThreadBySession(session.id);
|
||||
if (!pr) return;
|
||||
const bareChannel = pr.channel_id.replace(/^slack:/, '');
|
||||
await addReaction(getBotToken(), bareChannel, pr.thread_ts, EMOJI_AWAITING);
|
||||
}
|
||||
|
||||
/** Remove 👀 from a PR thread opener after approval is handled. */
|
||||
export async function clearAwaitingApproval(session: Session): Promise<void> {
|
||||
const pr = getPrThreadBySession(session.id);
|
||||
if (!pr) return;
|
||||
const bareChannel = pr.channel_id.replace(/^slack:/, '');
|
||||
await removeReaction(getBotToken(), bareChannel, pr.thread_ts, EMOJI_AWAITING);
|
||||
}
|
||||
+229
@@ -0,0 +1,229 @@
|
||||
/**
|
||||
* pr-factory-core guard — the modules-barrel line (`import
|
||||
* './pr-factory/index.js'` in src/modules/index.ts), the four pr_* delivery
|
||||
* actions, the three core-owned pr_* approval handlers, the gh-action seam's
|
||||
* not-installed fallback, and the GITHUB_WEBHOOK_SECRET env gate.
|
||||
*
|
||||
* Imports the REAL modules barrel (unmocked module graph — also exercises the
|
||||
* undici dependency through handler.ts) and asserts both registries through
|
||||
* their read sides (getDeliveryAction / getApprovalHandler). Deleting the
|
||||
* barrel line, any registerDeliveryAction call, or any registerApprovalHandler
|
||||
* call goes red.
|
||||
*
|
||||
* Env-gated import-time registration: process.env is primed BEFORE the import
|
||||
* and the legs are isolated with vi.resetModules() + a chdir to an .env-less
|
||||
* temp dir, so a developer's real .env can never flip the inert leg.
|
||||
*
|
||||
* Also pins the host side of the repo-default contract: the container tools
|
||||
* omit `repo` when the agent doesn't pass one, and the HOST action handlers
|
||||
* apply PR_FACTORY_DEFAULT_REPO (pairs with
|
||||
* container/agent-runner/src/mcp-tools/pr-factory-tools.test.ts).
|
||||
*/
|
||||
import Database from 'better-sqlite3';
|
||||
import fs from 'fs';
|
||||
import { afterAll, beforeAll, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
vi.mock('../../container-runner.js', () => ({
|
||||
wakeContainer: vi.fn().mockResolvedValue(undefined),
|
||||
isContainerRunning: vi.fn().mockReturnValue(false),
|
||||
killContainer: vi.fn(),
|
||||
buildAgentGroupImage: vi.fn().mockResolvedValue(undefined),
|
||||
}));
|
||||
|
||||
vi.mock('../../config.js', async () => {
|
||||
const actual = await vi.importActual<typeof import('../../config.js')>('../../config.js');
|
||||
return {
|
||||
...actual,
|
||||
DATA_DIR: '/tmp/nanoclaw-test-prf-registration/data',
|
||||
GROUPS_DIR: '/tmp/nanoclaw-test-prf-registration/groups',
|
||||
};
|
||||
});
|
||||
|
||||
const TEST_DIR = '/tmp/nanoclaw-test-prf-registration';
|
||||
const ORIGINAL_CWD = process.cwd();
|
||||
|
||||
const DELIVERY_ACTIONS = [
|
||||
'pr_send_to_testing',
|
||||
'pr_propose_skill_edit',
|
||||
'pr_gh',
|
||||
'pr_submit_test_results',
|
||||
];
|
||||
|
||||
// pr_gh's approval handler belongs to the gh-action-approval component —
|
||||
// core only owns the delivery-action seam for it.
|
||||
const APPROVAL_ACTIONS = ['pr_send_to_testing', 'pr_retry_test', 'pr_propose_skill_edit'];
|
||||
|
||||
const ENV_KEYS = ['GITHUB_WEBHOOK_SECRET', 'PR_FACTORY_SLACK_CHANNEL_ID', 'SLACK_BOT_TOKEN', 'PR_FACTORY_DEFAULT_REPO'];
|
||||
|
||||
beforeAll(() => {
|
||||
if (fs.existsSync(TEST_DIR)) fs.rmSync(TEST_DIR, { recursive: true });
|
||||
fs.mkdirSync(TEST_DIR, { recursive: true });
|
||||
// readEnvFile resolves .env from cwd — run from a dir guaranteed to have none.
|
||||
process.chdir(TEST_DIR);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
process.chdir(ORIGINAL_CWD);
|
||||
fs.rmSync(TEST_DIR, { recursive: true, force: true });
|
||||
for (const key of ENV_KEYS) delete process.env[key];
|
||||
});
|
||||
|
||||
describe('pr-factory module registration via the real modules barrel', () => {
|
||||
it('without GITHUB_WEBHOOK_SECRET the module is inert: approval handlers register, delivery actions do not', async () => {
|
||||
vi.resetModules();
|
||||
for (const key of ENV_KEYS) delete process.env[key];
|
||||
|
||||
await import('../index.js');
|
||||
const { getDeliveryAction } = await import('../../delivery.js');
|
||||
const { getApprovalHandler } = await import('../approvals/primitive.js');
|
||||
|
||||
for (const action of DELIVERY_ACTIONS) {
|
||||
expect(getDeliveryAction(action), `${action} must NOT be registered in inert mode`).toBeUndefined();
|
||||
}
|
||||
// The approval handlers live at module top level (outside the env gate):
|
||||
// they are bound whenever the module file loads, env or not.
|
||||
for (const action of APPROVAL_ACTIONS) {
|
||||
expect(getApprovalHandler(action), `approval handler ${action} missing`).toBeDefined();
|
||||
}
|
||||
});
|
||||
|
||||
it('with the env trio primed before import, all four pr_* delivery actions register', async () => {
|
||||
vi.resetModules();
|
||||
process.env.GITHUB_WEBHOOK_SECRET = 'test-secret';
|
||||
process.env.PR_FACTORY_SLACK_CHANNEL_ID = 'C0TEST';
|
||||
process.env.SLACK_BOT_TOKEN = 'xoxb-test';
|
||||
|
||||
await import('../index.js');
|
||||
const { getDeliveryAction } = await import('../../delivery.js');
|
||||
const { getApprovalHandler } = await import('../approvals/primitive.js');
|
||||
|
||||
for (const action of DELIVERY_ACTIONS) {
|
||||
expect(getDeliveryAction(action), `delivery action ${action} missing`).toBeDefined();
|
||||
}
|
||||
for (const action of APPROVAL_ACTIONS) {
|
||||
expect(getApprovalHandler(action), `approval handler ${action} missing`).toBeDefined();
|
||||
}
|
||||
});
|
||||
|
||||
it('applies PR_FACTORY_DEFAULT_REPO host-side when an action payload omits repo', async () => {
|
||||
vi.resetModules();
|
||||
process.env.GITHUB_WEBHOOK_SECRET = 'test-secret';
|
||||
process.env.PR_FACTORY_SLACK_CHANNEL_ID = 'C0TEST';
|
||||
process.env.SLACK_BOT_TOKEN = 'xoxb-test';
|
||||
// Primed BEFORE the barrel import — defaults.ts reads it at module load.
|
||||
process.env.PR_FACTORY_DEFAULT_REPO = 'acme/defaulted';
|
||||
|
||||
const { initTestDb, runMigrations, closeDb } = await import('../../db/index.js');
|
||||
const db = initTestDb();
|
||||
runMigrations(db);
|
||||
try {
|
||||
const now = new Date().toISOString();
|
||||
const { createAgentGroup } = await import('../../db/agent-groups.js');
|
||||
const { createSession } = await import('../../db/sessions.js');
|
||||
const { createPrThread } = await import('../../db/pr-threads.js');
|
||||
const { initSessionFolder, outboundDbPath } = await import('../../session-manager.js');
|
||||
createAgentGroup({
|
||||
id: 'ag-prf',
|
||||
name: 'Worker',
|
||||
folder: 'pr-factory-worker',
|
||||
agent_provider: null,
|
||||
created_at: now,
|
||||
});
|
||||
createSession({
|
||||
id: 'sess-prf',
|
||||
agent_group_id: 'ag-prf',
|
||||
messaging_group_id: null,
|
||||
thread_id: null,
|
||||
agent_provider: null,
|
||||
status: 'active',
|
||||
container_status: 'stopped',
|
||||
last_active: null,
|
||||
created_at: now,
|
||||
});
|
||||
initSessionFolder('ag-prf', 'sess-prf');
|
||||
// The pr_threads row is keyed by the DEFAULT repo — only a handler that
|
||||
// fills the omitted `repo` with PR_FACTORY_DEFAULT_REPO can find it.
|
||||
createPrThread({
|
||||
channel_id: 'C0TEST',
|
||||
thread_ts: '111.222',
|
||||
channel_type: 'slack',
|
||||
repo_full_name: 'acme/defaulted',
|
||||
pr_number: 42,
|
||||
session_id: 'sess-prf',
|
||||
created_at: now,
|
||||
});
|
||||
|
||||
await import('../index.js');
|
||||
const { getDeliveryAction } = await import('../../delivery.js');
|
||||
const handler = getDeliveryAction('pr_submit_test_results');
|
||||
expect(handler).toBeDefined();
|
||||
|
||||
const session = { id: 'sess-prf', agent_group_id: 'ag-prf' };
|
||||
// No `repo` in the payload — the handler must default it to acme/defaulted
|
||||
// to find the pr_threads row keyed by that repo and post the verdict.
|
||||
await handler!({ pr_number: 42, verdict: 'PASS', content: '## results' }, session as never, undefined as never);
|
||||
|
||||
const outDb = new Database(outboundDbPath('ag-prf', 'sess-prf'), { readonly: true });
|
||||
const rows = outDb.prepare('SELECT content FROM messages_out').all() as Array<{ content: string }>;
|
||||
outDb.close();
|
||||
expect(rows.length, 'verdict summary should reach the worker session resolved via the default repo').toBe(1);
|
||||
expect((JSON.parse(rows[0].content) as { text: string }).text).toContain('PASS');
|
||||
} finally {
|
||||
closeDb();
|
||||
}
|
||||
});
|
||||
|
||||
it('pr_gh without the gh-action-approval component notifies the agent instead of dropping silently', async () => {
|
||||
vi.resetModules();
|
||||
process.env.GITHUB_WEBHOOK_SECRET = 'test-secret';
|
||||
process.env.PR_FACTORY_SLACK_CHANNEL_ID = 'C0TEST';
|
||||
process.env.SLACK_BOT_TOKEN = 'xoxb-test';
|
||||
|
||||
const { initTestDb, runMigrations, closeDb } = await import('../../db/index.js');
|
||||
const db = initTestDb();
|
||||
runMigrations(db);
|
||||
try {
|
||||
const now = new Date().toISOString();
|
||||
const { createAgentGroup } = await import('../../db/agent-groups.js');
|
||||
const { createSession, getSession } = await import('../../db/sessions.js');
|
||||
const { initSessionFolder, inboundDbPath } = await import('../../session-manager.js');
|
||||
createAgentGroup({ id: 'ag-gh', name: 'Worker', folder: 'gh-worker', agent_provider: null, created_at: now });
|
||||
createSession({
|
||||
id: 'sess-gh',
|
||||
agent_group_id: 'ag-gh',
|
||||
messaging_group_id: null,
|
||||
thread_id: null,
|
||||
agent_provider: null,
|
||||
status: 'active',
|
||||
container_status: 'stopped',
|
||||
last_active: null,
|
||||
created_at: now,
|
||||
});
|
||||
initSessionFolder('ag-gh', 'sess-gh');
|
||||
|
||||
await import('../index.js');
|
||||
// In a composed tree the gh-action-approval component may be installed
|
||||
// (its barrel import registers an executor on the seam) — clear the
|
||||
// seam so this case pins core's not-installed fallback either way.
|
||||
const { setGhActionHandler } = await import('../pr-factory/gh-action.js');
|
||||
setGhActionHandler(null);
|
||||
const { getDeliveryAction } = await import('../../delivery.js');
|
||||
const handler = getDeliveryAction('pr_gh');
|
||||
expect(handler).toBeDefined();
|
||||
|
||||
const session = getSession('sess-gh')!;
|
||||
await handler!({ commands: ['gh pr view 42'], description: 'view' }, session, undefined as never);
|
||||
|
||||
// The seam's fallback notifies the agent via its real inbound DB.
|
||||
const inDb = new Database(inboundDbPath('ag-gh', 'sess-gh'), { readonly: true });
|
||||
const rows = inDb.prepare('SELECT content FROM messages_in').all() as Array<{ content: string }>;
|
||||
inDb.close();
|
||||
expect(rows).toHaveLength(1);
|
||||
expect((JSON.parse(rows[0].content) as { text: string }).text).toContain(
|
||||
'gh-action-approval component is not installed',
|
||||
);
|
||||
} finally {
|
||||
closeDb();
|
||||
}
|
||||
});
|
||||
});
|
||||
+185
@@ -0,0 +1,185 @@
|
||||
/**
|
||||
* PR Factory skill edit approval — technical gate for supervisor skill edits.
|
||||
*
|
||||
* Flow:
|
||||
* 1. Supervisor reads skill at /app/skills/ (RO mount)
|
||||
* 2. Supervisor calls `propose_skill_edit` MCP tool with new content
|
||||
* 3. Host computes diff, posts it in the supervisor's thread as a .diff file
|
||||
* 4. Host posts an approval card in the same thread (via supervisor bot)
|
||||
* 5. Human clicks Accept → host writes the new content to disk
|
||||
* 6. Human clicks Reject → change dropped, agent notified
|
||||
*/
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { execFileSync } from 'child_process';
|
||||
|
||||
import { normalizeOptions, type RawOption } from '../../channels/ask-question.js';
|
||||
import { getMessagingGroup } from '../../db/messaging-groups.js';
|
||||
import { createPendingApproval, updatePendingApprovalPlatformMessageId } from '../../db/sessions.js';
|
||||
import { getDeliveryAdapter } from '../../delivery.js';
|
||||
import { log } from '../../log.js';
|
||||
import { prLog } from './activity-log.js';
|
||||
import { DEFAULT_REPO } from './defaults.js';
|
||||
import { dismissStaleApprovals } from './dismiss-approvals.js';
|
||||
import { registerApprovalHandler, notifyAgent } from '../approvals/primitive.js';
|
||||
import type { ApprovalHandlerContext } from '../approvals/primitive.js';
|
||||
import type { Session } from '../../types.js';
|
||||
|
||||
const SKILLS_DIR = path.resolve(process.cwd(), 'container/skills');
|
||||
|
||||
const APPROVAL_OPTIONS: RawOption[] = [
|
||||
{ label: 'Apply Edit', selectedLabel: '✅ Applied', value: 'approve' },
|
||||
{ label: 'Reject', selectedLabel: '❌ Rejected', value: 'reject' },
|
||||
];
|
||||
|
||||
function computeDiff(oldContent: string, newContent: string, filePath: string): string {
|
||||
const tmpOld = path.join('/tmp', `skill-old-${Date.now()}`);
|
||||
const tmpNew = path.join('/tmp', `skill-new-${Date.now()}`);
|
||||
try {
|
||||
fs.writeFileSync(tmpOld, oldContent);
|
||||
fs.writeFileSync(tmpNew, newContent);
|
||||
const diff = execFileSync('diff', ['-u', '--label', `a/${filePath}`, '--label', `b/${filePath}`, tmpOld, tmpNew], {
|
||||
encoding: 'utf8',
|
||||
timeout: 5000,
|
||||
});
|
||||
return diff || '(no changes)';
|
||||
} catch (err: unknown) {
|
||||
// diff exits 1 when files differ — that's the normal case
|
||||
if (err && typeof err === 'object' && 'stdout' in err) {
|
||||
const stdout = (err as { stdout: string }).stdout;
|
||||
if (stdout) return stdout;
|
||||
}
|
||||
return '(diff unavailable)';
|
||||
} finally {
|
||||
try {
|
||||
fs.unlinkSync(tmpOld);
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- tmp cleanup; nothing to do on failure
|
||||
} catch {
|
||||
// already gone
|
||||
}
|
||||
try {
|
||||
fs.unlinkSync(tmpNew);
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- tmp cleanup; nothing to do on failure
|
||||
} catch {
|
||||
// already gone
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function handleProposeSkillEdit(content: Record<string, unknown>, session: Session): Promise<void> {
|
||||
const skillName = content.skill_name as string;
|
||||
const fileName = content.file_name as string;
|
||||
const newContent = content.content as string;
|
||||
|
||||
if (!skillName || !fileName || !newContent) {
|
||||
notifyAgent(session, 'propose_skill_edit requires skill_name, file_name, and content.');
|
||||
return;
|
||||
}
|
||||
|
||||
// Validate path safety — no traversal
|
||||
const relPath = path.join(skillName, fileName);
|
||||
const fullPath = path.resolve(SKILLS_DIR, relPath);
|
||||
if (!fullPath.startsWith(SKILLS_DIR + path.sep)) {
|
||||
log.warn('propose_skill_edit: path traversal attempt', { skillName, fileName, fullPath });
|
||||
notifyAgent(session, `Invalid skill path: ${relPath}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const mg = getMessagingGroup(session.messaging_group_id!);
|
||||
if (!mg) {
|
||||
log.warn('propose_skill_edit: messaging group not found', { sessionId: session.id });
|
||||
return;
|
||||
}
|
||||
|
||||
const adapter = getDeliveryAdapter();
|
||||
if (!adapter) {
|
||||
log.warn('propose_skill_edit: no delivery adapter');
|
||||
return;
|
||||
}
|
||||
|
||||
// Compute diff against current file (or show full content for new files)
|
||||
const isNew = !fs.existsSync(fullPath);
|
||||
const oldContent = isNew ? '' : fs.readFileSync(fullPath, 'utf8');
|
||||
const diff = isNew ? `(new file)\n\n${newContent}` : computeDiff(oldContent, newContent, relPath);
|
||||
|
||||
// Post diff as a .diff file in the supervisor's thread. The instance arg
|
||||
// routes through the bot identity that owns this messaging group (the
|
||||
// supervisor instance) — under exact-instance dispatch an omitted instance
|
||||
// would post through the default worker bot.
|
||||
const threadId = session.thread_id;
|
||||
const diffFile = [{ filename: `${skillName}-${fileName}.diff`, data: Buffer.from(diff) }];
|
||||
await adapter.deliver(
|
||||
mg.channel_type,
|
||||
mg.platform_id,
|
||||
threadId,
|
||||
'chat',
|
||||
JSON.stringify({ text: `Proposed edit to \`${relPath}\`` }),
|
||||
diffFile,
|
||||
mg.instance,
|
||||
);
|
||||
|
||||
// Dismiss any existing approval cards in this thread before posting a new one
|
||||
await dismissStaleApprovals(session);
|
||||
|
||||
// Post approval card in the same thread via the supervisor bot
|
||||
const approvalId = `appr-skill-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
const normalizedOptions = normalizeOptions(APPROVAL_OPTIONS);
|
||||
|
||||
createPendingApproval({
|
||||
approval_id: approvalId,
|
||||
session_id: session.id,
|
||||
request_id: approvalId,
|
||||
action: 'pr_propose_skill_edit',
|
||||
payload: JSON.stringify({ skillName, fileName, content: newContent }),
|
||||
created_at: new Date().toISOString(),
|
||||
title: 'Skill Edit',
|
||||
options_json: JSON.stringify(normalizedOptions),
|
||||
});
|
||||
|
||||
const platformMsgId = await adapter.deliver(
|
||||
mg.channel_type,
|
||||
mg.platform_id,
|
||||
threadId,
|
||||
'chat-sdk',
|
||||
JSON.stringify({
|
||||
type: 'ask_question',
|
||||
questionId: approvalId,
|
||||
title: 'Skill Edit',
|
||||
question: `Apply edit to \`${relPath}\`?`,
|
||||
options: APPROVAL_OPTIONS,
|
||||
}),
|
||||
undefined,
|
||||
mg.instance,
|
||||
);
|
||||
if (platformMsgId) updatePendingApprovalPlatformMessageId(approvalId, platformMsgId);
|
||||
|
||||
// Skill edits are supervisor-level, not per-PR — log to PR 0 as a system event
|
||||
prLog(0, DEFAULT_REPO, 'skill_edit_proposed', { skillName, fileName });
|
||||
log.info('Skill edit approval card posted', { approvalId, skillName, fileName, sessionId: session.id });
|
||||
}
|
||||
|
||||
// Approval handler — fires when human clicks Accept
|
||||
async function onSkillEditApproved(ctx: ApprovalHandlerContext): Promise<void> {
|
||||
const { payload } = ctx;
|
||||
const skillName = payload.skillName as string;
|
||||
const fileName = payload.fileName as string;
|
||||
const content = payload.content as string;
|
||||
|
||||
const relPath = path.join(skillName, fileName);
|
||||
const fullPath = path.resolve(SKILLS_DIR, relPath);
|
||||
|
||||
// Re-validate path safety
|
||||
if (!fullPath.startsWith(SKILLS_DIR + path.sep)) {
|
||||
ctx.notify(`Invalid skill path: ${relPath}`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure directory exists (for new skills)
|
||||
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
|
||||
fs.writeFileSync(fullPath, content);
|
||||
prLog(0, DEFAULT_REPO, 'skill_edit_applied', { skillName, fileName });
|
||||
log.info('Skill edit applied', { skillName, fileName });
|
||||
ctx.notify(`Skill edit applied to \`${relPath}\`.`);
|
||||
}
|
||||
|
||||
registerApprovalHandler('pr_propose_skill_edit', onSkillEditApproved);
|
||||
+66
@@ -0,0 +1,66 @@
|
||||
/**
|
||||
* PR Factory Supervisor — instruction text seeded into the supervisor
|
||||
* agent group on first bootstrap.
|
||||
*
|
||||
* The actual agent-group / messaging-group / wiring creation lives in
|
||||
* bootstrap.ts. This file only owns the operator-facing playbook so it
|
||||
* can evolve without touching wiring code. Operators tune the live copy
|
||||
* at groups/pr-factory-supervisor/CLAUDE.local.md — bootstrap only seeds
|
||||
* it on first creation and never overwrites.
|
||||
*/
|
||||
|
||||
export const SUPERVISOR_FOLDER = 'pr-factory-supervisor';
|
||||
|
||||
export const SUPERVISOR_INSTRUCTIONS = `# PR Factory Supervisor
|
||||
|
||||
You improve the PR Factory Worker based on human feedback. You speak as a separate Slack bot from the worker so humans can address you distinctly with @Supervisor.
|
||||
|
||||
## Where you live
|
||||
|
||||
- **Your admin channel** (this Slack channel for routine messages to you) — humans bring you questions, batch reviews, and explicit asks.
|
||||
- **PR threads** — humans tag @Supervisor in a worker's PR thread when something the worker did needs fixing. You see the thread context (accumulated) and can act there.
|
||||
|
||||
## Identifying the PR
|
||||
|
||||
The PR number is visible in every worker message (e.g. "PR #2318" in the triage report). Use it directly — no need to parse context tags.
|
||||
|
||||
## MCP tools you own
|
||||
|
||||
- \`mcp__nanoclaw__propose_skill_edit({ skill_name, file_name, content })\` — propose a skill file edit. Read the current file from \`/app/skills/\` first, then pass the full new content. The host posts the diff for human approval — the file is only written if approved. **Always use this tool to edit skills — never write to the filesystem directly.**
|
||||
|
||||
Skill edits apply to the **next** PR each affected worker session triages — running sessions keep their old read-only skill view until they next spawn. Tell the human the edit lands going forward; there is no force-rerun of an in-flight session.
|
||||
|
||||
## Two workflows
|
||||
|
||||
### A — Quick fix in a PR thread
|
||||
|
||||
1. Read the thread (already accumulated). Identify what went wrong.
|
||||
2. Propose the change. Use \`propose_skill_edit\` — the host posts the diff and the human approves or rejects.
|
||||
3. On approval, tell the human what changed and that it applies to the next PR the worker triages.
|
||||
|
||||
### B — Batch review in admin channel
|
||||
|
||||
1. **Collect**: when @mentioned in a PR thread, ack briefly ("noted, saved"), then append to \`/workspace/group/feedback.md\`:
|
||||
\`\`\`
|
||||
## PR #N (channel=slack:CXXXX thread=...)
|
||||
**Feedback:** <what the human said>
|
||||
**Suggested fix:** <your read>
|
||||
\`\`\`
|
||||
2. **Review**: when the human asks you in admin channel, walk them through the collected feedback, propose skill diffs (don't apply yet), iterate.
|
||||
3. **Implement**: on approval — use \`propose_skill_edit\` for each file. The edits apply to subsequent PRs going forward.
|
||||
|
||||
## Where things are
|
||||
|
||||
| What | Where |
|
||||
|------|-------|
|
||||
| Container skills (read-only) | \`/app/skills/\` |
|
||||
| Your feedback log | \`/workspace/group/feedback.md\` |
|
||||
|
||||
## Principles
|
||||
|
||||
- **Smallest fix first** — one-line edit beats a rewrite.
|
||||
- **Patterns over one-offs** — fix the skill, not the individual PR.
|
||||
- **Evidence first** — quote the worker's actual output before proposing a fix.
|
||||
- **Human approves** — propose, don't apply.
|
||||
- **Edits apply going forward** — a skill edit changes how the worker triages the next PR; it does not re-run PRs already in flight.
|
||||
`;
|
||||
+49
@@ -0,0 +1,49 @@
|
||||
/**
|
||||
* Test-orchestrator seam — pr-factory-core's optional VM-testing surface.
|
||||
*
|
||||
* Core owns the coordination side (orchestrator.ts: result handling,
|
||||
* timeouts, worker wakes) but ships NO VM control plane. The
|
||||
* `vm-test-orchestrator` component registers its module here at import time;
|
||||
* until then `getTestOrchestrator()` returns null and the send-to-testing
|
||||
* flow tells the human/agent that no orchestrator is installed.
|
||||
*
|
||||
* Cross-component contract: keep `TestRun`, `OrchestratorCallbacks`,
|
||||
* `TestOrchestratorModule`, and `registerTestOrchestrator` stable — the
|
||||
* vm-test-orchestrator component implements and registers against them.
|
||||
*/
|
||||
|
||||
export interface TestRun {
|
||||
prNumber: number;
|
||||
repo: string;
|
||||
planContent: string;
|
||||
}
|
||||
|
||||
export interface OrchestratorCallbacks {
|
||||
onVmReady: (prNumber: number, repo: string, vmHost: string, planContent: string) => Promise<void>;
|
||||
onRunFailed: (prNumber: number, repo: string, reason: string, planContent: string) => Promise<void>;
|
||||
}
|
||||
|
||||
export interface TestOrchestratorModule {
|
||||
/** Wire the coordination callbacks. Called once by core's initOrchestrator. */
|
||||
init(cbs: OrchestratorCallbacks): void;
|
||||
/** Queue an approved test plan for execution. */
|
||||
submitTest(run: TestRun): void;
|
||||
/** Mark a run complete (VM stays alive for investigation). */
|
||||
completeRun(prNumber: number): void;
|
||||
/** Cancel a run and tear down its VM (timeout path). */
|
||||
cancelRun(prNumber: number): Promise<void>;
|
||||
/** Destroy the VM for a PR (close/merge path). */
|
||||
destroyVm(prNumber: number): Promise<void>;
|
||||
/** Tear down all VMs and stop the queue. */
|
||||
shutdown(): Promise<void>;
|
||||
}
|
||||
|
||||
let orchestrator: TestOrchestratorModule | null = null;
|
||||
|
||||
export function registerTestOrchestrator(mod: TestOrchestratorModule): void {
|
||||
orchestrator = mod;
|
||||
}
|
||||
|
||||
export function getTestOrchestrator(): TestOrchestratorModule | null {
|
||||
return orchestrator;
|
||||
}
|
||||
+320
@@ -0,0 +1,320 @@
|
||||
/**
|
||||
* PR Factory testing approval — staged gate between review and orchestrator.
|
||||
*
|
||||
* Flow:
|
||||
* 1. Worker agent writes test plan as .md.pending to /workspace/agent/test-plans/
|
||||
* 2. Worker calls `send_to_testing` MCP tool → system action lands here
|
||||
* 3. Host reads the .pending file, posts it in the PR thread, adds approval card
|
||||
* 4. Human clicks Accept → host submits plan to the registered test orchestrator
|
||||
* 5. Human clicks Reject → plan deleted, agent notified
|
||||
*
|
||||
* The VM control plane is the vm-test-orchestrator component — reached only
|
||||
* through the test-orchestration seam, so this flow degrades to an
|
||||
* informative notify when no orchestrator is installed.
|
||||
*/
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
|
||||
import { normalizeOptions, type RawOption } from '../../channels/ask-question.js';
|
||||
import { GROUPS_DIR } from '../../config.js';
|
||||
import { getMessagingGroup } from '../../db/messaging-groups.js';
|
||||
import { getPrThreadBySession } from '../../db/pr-threads.js';
|
||||
import { createPendingApproval, updatePendingApprovalPlatformMessageId } from '../../db/sessions.js';
|
||||
import { getDeliveryAdapter } from '../../delivery.js';
|
||||
import { log } from '../../log.js';
|
||||
import { prLog } from './activity-log.js';
|
||||
import { dismissStaleApprovals } from './dismiss-approvals.js';
|
||||
import { registerApprovalHandler, notifyAgent } from '../approvals/primitive.js';
|
||||
import type { ApprovalHandlerContext } from '../approvals/primitive.js';
|
||||
import type { Session } from '../../types.js';
|
||||
|
||||
import { WORKER_FOLDER } from './bootstrap.js';
|
||||
import { createCanvas } from './canvas.js';
|
||||
import { DEFAULT_REPO } from './defaults.js';
|
||||
import { markAwaitingApproval, clearAwaitingApproval } from './reactions.js';
|
||||
import { getTestOrchestrator } from './test-orchestration.js';
|
||||
|
||||
const TEST_PLAN_DIR = path.resolve(GROUPS_DIR, WORKER_FOLDER, 'test-plans');
|
||||
|
||||
function testingOptions(summaryLine: string): RawOption[] {
|
||||
const ctx = summaryLine || 'test plan';
|
||||
return [
|
||||
{ label: 'Send to Testing', selectedLabel: `✅ Sent — ${ctx}`, value: 'approve' },
|
||||
{ label: 'Reject', selectedLabel: `❌ Rejected — ${ctx}`, value: 'reject' },
|
||||
];
|
||||
}
|
||||
|
||||
function retryOptions(summaryLine: string): RawOption[] {
|
||||
const ctx = summaryLine || 'test plan';
|
||||
return [
|
||||
{ label: 'Retry Test', selectedLabel: `✅ Retrying — ${ctx}`, value: 'approve' },
|
||||
{ label: 'Dismiss', selectedLabel: `❌ Dismissed`, value: 'reject' },
|
||||
];
|
||||
}
|
||||
|
||||
function extractSummaryLine(planContent: string): string {
|
||||
const depthMatch = planContent.match(/\*\*Depth:\*\*\s*(.+)/);
|
||||
const depth = depthMatch ? depthMatch[1].trim() : '';
|
||||
const tableRows = (planContent.match(/^\|\s*\d+\s*\|/gm) || []).length;
|
||||
return [tableRows ? `${tableRows} tests` : '', depth ? `Depth: ${depth}` : ''].filter(Boolean).join(', ');
|
||||
}
|
||||
|
||||
function findPlanFile(sessionId: string): { filePath: string; fileName: string; content: string } | null {
|
||||
const pr = getPrThreadBySession(sessionId);
|
||||
if (!pr) return null;
|
||||
|
||||
let files: string[];
|
||||
try {
|
||||
files = fs.readdirSync(TEST_PLAN_DIR).filter((f) => f.startsWith('pr-') && f.endsWith('.md.pending'));
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- a missing test-plans dir means "no plan yet"; the caller notifies the agent
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
||||
const prPrefix = `pr-${pr.pr_number}-thread-`;
|
||||
const match = files.find((f) => f.startsWith(prPrefix));
|
||||
if (!match) return null;
|
||||
|
||||
const filePath = path.join(TEST_PLAN_DIR, match);
|
||||
try {
|
||||
return { filePath, fileName: match, content: fs.readFileSync(filePath, 'utf8') };
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- a vanished plan file means "no plan yet"; the caller notifies the agent
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function handleSendToTesting(_content: Record<string, unknown>, session: Session): Promise<void> {
|
||||
const plan = findPlanFile(session.id);
|
||||
if (!plan) {
|
||||
log.warn('pr_send_to_testing: no plan file found', { sessionId: session.id });
|
||||
notifyAgent(
|
||||
session,
|
||||
'No test plan file found. Write the plan to /workspace/agent/test-plans/ as .md.pending first.',
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const pr = getPrThreadBySession(session.id);
|
||||
if (!pr) {
|
||||
log.warn('pr_send_to_testing: no pr_threads entry', { sessionId: session.id });
|
||||
return;
|
||||
}
|
||||
|
||||
const mg = getMessagingGroup(session.messaging_group_id!);
|
||||
if (!mg) {
|
||||
log.warn('pr_send_to_testing: messaging group not found', { sessionId: session.id });
|
||||
return;
|
||||
}
|
||||
|
||||
const threadId = session.thread_id;
|
||||
const adapter = getDeliveryAdapter();
|
||||
if (!adapter) {
|
||||
log.warn('pr_send_to_testing: no delivery adapter');
|
||||
return;
|
||||
}
|
||||
|
||||
const summaryLine = extractSummaryLine(plan.content);
|
||||
|
||||
// Render the plan as a canvas when the slack-canvas component is installed.
|
||||
const bareChannel = mg.platform_id.replace(/^slack:/, '');
|
||||
const canvas = await createCanvas(`Test Plan — PR #${pr.pr_number}`, plan.content, bareChannel);
|
||||
|
||||
let summary: string;
|
||||
let planFiles: { filename: string; data: Buffer }[] | undefined;
|
||||
|
||||
if (canvas) {
|
||||
summary =
|
||||
`\n━━━ 🧪 Test Plan ━━━━━━━━━━━━━━━━\n` +
|
||||
(summaryLine ? `\n${summaryLine}` : '') +
|
||||
`\n\n[View test plan](${canvas.permalink})`;
|
||||
} else {
|
||||
// No canvas — upload the plan as an .md file instead
|
||||
summary = `\n━━━ 🧪 Test Plan ━━━━━━━━━━━━━━━━\n` + (summaryLine ? `\n${summaryLine}` : '');
|
||||
planFiles = [{ filename: `test-plan-pr-${pr.pr_number}.md`, data: Buffer.from(plan.content) }];
|
||||
}
|
||||
|
||||
// Post plan (canvas link or file) first, then the approval card
|
||||
await adapter.deliver(
|
||||
mg.channel_type,
|
||||
mg.platform_id,
|
||||
threadId,
|
||||
'chat',
|
||||
JSON.stringify({ text: summary }),
|
||||
planFiles,
|
||||
mg.instance,
|
||||
);
|
||||
|
||||
// Dismiss any existing approval cards in this thread before posting a new one
|
||||
await dismissStaleApprovals(session);
|
||||
|
||||
// Post approval card after file is delivered
|
||||
const approvalId = `appr-test-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
const options = testingOptions(summaryLine);
|
||||
const normalizedOptions = normalizeOptions(options);
|
||||
|
||||
createPendingApproval({
|
||||
approval_id: approvalId,
|
||||
session_id: session.id,
|
||||
request_id: approvalId,
|
||||
action: 'pr_send_to_testing',
|
||||
payload: JSON.stringify({
|
||||
filePath: plan.filePath,
|
||||
fileName: plan.fileName,
|
||||
prNumber: pr.pr_number,
|
||||
repo: pr.repo_full_name,
|
||||
}),
|
||||
created_at: new Date().toISOString(),
|
||||
title: 'Send to Testing',
|
||||
options_json: JSON.stringify(normalizedOptions),
|
||||
});
|
||||
|
||||
// Small delay to ensure Slack finishes processing the file upload
|
||||
await new Promise((r) => setTimeout(r, 1000));
|
||||
|
||||
const platformMsgId = await adapter.deliver(
|
||||
mg.channel_type,
|
||||
mg.platform_id,
|
||||
threadId,
|
||||
'chat-sdk',
|
||||
JSON.stringify({
|
||||
type: 'ask_question',
|
||||
questionId: approvalId,
|
||||
title: 'Send to Testing',
|
||||
question: summaryLine || 'Send to testing?',
|
||||
options,
|
||||
}),
|
||||
undefined,
|
||||
mg.instance,
|
||||
);
|
||||
if (platformMsgId) updatePendingApprovalPlatformMessageId(approvalId, platformMsgId);
|
||||
|
||||
await markAwaitingApproval(session);
|
||||
prLog(pr.pr_number, pr.repo_full_name, 'test_plan_posted', { approvalId });
|
||||
log.info('Testing approval card posted', { approvalId, prNumber: pr.pr_number, sessionId: session.id });
|
||||
}
|
||||
|
||||
// Approval handler — fires when human clicks Accept
|
||||
async function onTestingApproved(ctx: ApprovalHandlerContext): Promise<void> {
|
||||
await clearAwaitingApproval(ctx.session);
|
||||
const { payload } = ctx;
|
||||
const filePath = payload.filePath as string;
|
||||
const fileName = payload.fileName as string;
|
||||
|
||||
if (!fs.existsSync(filePath)) {
|
||||
ctx.notify(`Test plan file ${fileName} no longer exists.`);
|
||||
return;
|
||||
}
|
||||
|
||||
const orch = getTestOrchestrator();
|
||||
if (!orch) {
|
||||
ctx.notify('No test orchestrator installed — cannot submit test plan (vm-test-orchestrator component missing).');
|
||||
return;
|
||||
}
|
||||
|
||||
const prNumber = payload.prNumber as number;
|
||||
const repo = (payload.repo as string) || DEFAULT_REPO;
|
||||
try {
|
||||
const planContent = fs.readFileSync(filePath, 'utf8');
|
||||
orch.submitTest({ prNumber, repo, planContent });
|
||||
fs.unlinkSync(filePath);
|
||||
prLog(prNumber, repo, 'testing_approved', { fileName });
|
||||
log.info('Testing approved — plan submitted to test queue', { prNumber, fileName });
|
||||
ctx.notify(`Test plan approved — submitted to test queue.`);
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- the human already approved; surface the failure to the agent instead of crashing the response handler
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
log.error('Failed to submit approved plan to test queue', { fileName, err: msg });
|
||||
ctx.notify(`Test plan approved but submission failed: ${msg}.`);
|
||||
}
|
||||
}
|
||||
|
||||
registerApprovalHandler('pr_send_to_testing', onTestingApproved);
|
||||
|
||||
// ── Retry after technical failure ──
|
||||
|
||||
export async function postRetryCard(
|
||||
session: Session,
|
||||
prNumber: number,
|
||||
repo: string,
|
||||
planContent: string,
|
||||
): Promise<void> {
|
||||
const mg = getMessagingGroup(session.messaging_group_id!);
|
||||
if (!mg) {
|
||||
log.warn('postRetryCard: messaging group not found', { sessionId: session.id });
|
||||
return;
|
||||
}
|
||||
|
||||
const adapter = getDeliveryAdapter();
|
||||
if (!adapter) {
|
||||
log.warn('postRetryCard: no delivery adapter');
|
||||
return;
|
||||
}
|
||||
|
||||
// Dismiss any existing approval cards in this thread before posting a new one
|
||||
await dismissStaleApprovals(session);
|
||||
|
||||
const summaryLine = extractSummaryLine(planContent);
|
||||
const approvalId = `appr-retry-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
const options = retryOptions(summaryLine);
|
||||
const normalizedOptions = normalizeOptions(options);
|
||||
|
||||
createPendingApproval({
|
||||
approval_id: approvalId,
|
||||
session_id: session.id,
|
||||
request_id: approvalId,
|
||||
action: 'pr_retry_test',
|
||||
payload: JSON.stringify({ prNumber, repo, planContent }),
|
||||
created_at: new Date().toISOString(),
|
||||
title: 'Retry Test',
|
||||
options_json: JSON.stringify(normalizedOptions),
|
||||
});
|
||||
|
||||
const platformMsgId = await adapter.deliver(
|
||||
mg.channel_type,
|
||||
mg.platform_id,
|
||||
session.thread_id,
|
||||
'chat-sdk',
|
||||
JSON.stringify({
|
||||
type: 'ask_question',
|
||||
questionId: approvalId,
|
||||
title: 'Retry Test',
|
||||
question: summaryLine ? `Retry: ${summaryLine}` : 'Retry test?',
|
||||
options,
|
||||
}),
|
||||
undefined,
|
||||
mg.instance,
|
||||
);
|
||||
if (platformMsgId) updatePendingApprovalPlatformMessageId(approvalId, platformMsgId);
|
||||
|
||||
await markAwaitingApproval(session);
|
||||
prLog(prNumber, repo, 'retry_card_posted', { approvalId });
|
||||
log.info('Retry test card posted', { approvalId, prNumber, sessionId: session.id });
|
||||
}
|
||||
|
||||
async function onRetryTestApproved(ctx: ApprovalHandlerContext): Promise<void> {
|
||||
await clearAwaitingApproval(ctx.session);
|
||||
const { payload } = ctx;
|
||||
const prNumber = payload.prNumber as number;
|
||||
const repo = payload.repo as string;
|
||||
const planContent = payload.planContent as string;
|
||||
|
||||
const orch = getTestOrchestrator();
|
||||
if (!orch) {
|
||||
ctx.notify('No test orchestrator installed — cannot retry test (vm-test-orchestrator component missing).');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
orch.submitTest({ prNumber, repo, planContent });
|
||||
prLog(prNumber, repo, 'retry_approved');
|
||||
log.info('Test retry approved — plan re-submitted to test queue', { prNumber });
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- the human already approved; surface the failure to the agent instead of crashing the response handler
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
log.error('Failed to re-submit test plan on retry', { prNumber, err: msg });
|
||||
ctx.notify(`Retry failed: ${msg}.`);
|
||||
}
|
||||
}
|
||||
|
||||
registerApprovalHandler('pr_retry_test', onRetryTestApproved);
|
||||
+144
@@ -0,0 +1,144 @@
|
||||
/**
|
||||
* pr-factory-core guard — the GitHub receiver's consumption of core's raw
|
||||
* webhook registry (registerWebhookHandler) plus its HMAC-SHA256 signature
|
||||
* verification and pull_request event filtering.
|
||||
*
|
||||
* Drives the REAL shared webhook server over HTTP on an ephemeral
|
||||
* WEBHOOK_PORT: registerGitHubWebhook mounts /webhook/github through
|
||||
* registerWebhookHandler, so deleting the registration call — or core's raw
|
||||
* dispatch branch drifting under it — turns every leg red.
|
||||
*/
|
||||
import crypto from 'crypto';
|
||||
import { afterAll, describe, expect, it } from 'vitest';
|
||||
|
||||
import { registerWebhookHandler, stopWebhookServer } from '../../webhook-server.js';
|
||||
import { registerGitHubWebhook, type PREvent } from './webhook.js';
|
||||
|
||||
const PORT = 21000 + Math.floor(Math.random() * 20000);
|
||||
const SECRET = 'test-webhook-secret';
|
||||
|
||||
const received: PREvent[] = [];
|
||||
let registered = false;
|
||||
|
||||
function sign(body: string, secret: string): string {
|
||||
return 'sha256=' + crypto.createHmac('sha256', secret).update(body).digest('hex');
|
||||
}
|
||||
|
||||
function prPayload(action: string, overrides: Record<string, unknown> = {}): string {
|
||||
return JSON.stringify({
|
||||
action,
|
||||
pull_request: {
|
||||
number: 42,
|
||||
title: 'Add widgets',
|
||||
body: 'Body text',
|
||||
user: { login: 'octocat' },
|
||||
head: { sha: 'abc123' },
|
||||
diff_url: 'https://github.com/acme/widgets/pull/42.diff',
|
||||
html_url: 'https://github.com/acme/widgets/pull/42',
|
||||
merged: false,
|
||||
draft: false,
|
||||
...overrides,
|
||||
},
|
||||
repository: { full_name: 'acme/widgets' },
|
||||
});
|
||||
}
|
||||
|
||||
async function post(path: string, body: string, headers: Record<string, string>): Promise<globalThis.Response> {
|
||||
if (!registered) {
|
||||
process.env.WEBHOOK_PORT = String(PORT);
|
||||
registerGitHubWebhook(SECRET, async (pr) => {
|
||||
received.push(pr);
|
||||
});
|
||||
registerWebhookHandler('boom', () => {
|
||||
throw new Error('handler exploded');
|
||||
});
|
||||
registered = true;
|
||||
}
|
||||
for (let attempt = 0; ; attempt++) {
|
||||
try {
|
||||
return await fetch(`http://127.0.0.1:${PORT}/webhook/${path}`, { method: 'POST', body, headers });
|
||||
} catch (err) {
|
||||
if (attempt >= 40) throw err;
|
||||
await new Promise((r) => setTimeout(r, 50));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function settle(): Promise<void> {
|
||||
// The receiver responds 200 before dispatching; give the async callback a tick.
|
||||
await new Promise((r) => setTimeout(r, 20));
|
||||
}
|
||||
|
||||
afterAll(async () => {
|
||||
await stopWebhookServer();
|
||||
delete process.env.WEBHOOK_PORT;
|
||||
});
|
||||
|
||||
describe('GitHub webhook on the shared raw-handler route', () => {
|
||||
it('accepts a signed pull_request.opened and hands the parsed PREvent to the callback', async () => {
|
||||
const body = prPayload('opened');
|
||||
const res = await post('github', body, {
|
||||
'x-hub-signature-256': sign(body, SECRET),
|
||||
'x-github-event': 'pull_request',
|
||||
});
|
||||
await settle();
|
||||
|
||||
expect(res.status).toBe(200);
|
||||
expect(received).toHaveLength(1);
|
||||
expect(received[0]).toMatchObject({
|
||||
action: 'opened',
|
||||
number: 42,
|
||||
author: 'octocat',
|
||||
repoFullName: 'acme/widgets',
|
||||
headSha: 'abc123',
|
||||
draft: false,
|
||||
merged: false,
|
||||
});
|
||||
});
|
||||
|
||||
it('rejects a bad signature with 401 and never calls the callback', async () => {
|
||||
received.length = 0;
|
||||
const body = prPayload('opened');
|
||||
const res = await post('github', body, {
|
||||
'x-hub-signature-256': sign(body, 'wrong-secret'),
|
||||
'x-github-event': 'pull_request',
|
||||
});
|
||||
await settle();
|
||||
|
||||
expect(res.status).toBe(401);
|
||||
expect(received).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('silently drops non-pull_request events and unhandled actions', async () => {
|
||||
received.length = 0;
|
||||
|
||||
const pushBody = JSON.stringify({ ref: 'refs/heads/main' });
|
||||
const pushRes = await post('github', pushBody, {
|
||||
'x-hub-signature-256': sign(pushBody, SECRET),
|
||||
'x-github-event': 'push',
|
||||
});
|
||||
expect(pushRes.status).toBe(200);
|
||||
|
||||
const labeled = prPayload('labeled');
|
||||
const labeledRes = await post('github', labeled, {
|
||||
'x-hub-signature-256': sign(labeled, SECRET),
|
||||
'x-github-event': 'pull_request',
|
||||
});
|
||||
expect(labeledRes.status).toBe(200);
|
||||
|
||||
await settle();
|
||||
expect(received).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('returns 405 for non-POST requests', async () => {
|
||||
// touch the server first so the route exists
|
||||
await post('boom', '{}', {}).catch(() => undefined);
|
||||
const res = await fetch(`http://127.0.0.1:${PORT}/webhook/github`, { method: 'GET' });
|
||||
expect(res.status).toBe(405);
|
||||
});
|
||||
|
||||
it('a throwing raw handler yields 500 from the shared dispatch branch', async () => {
|
||||
const res = await post('boom', '{}', {});
|
||||
expect(res.status).toBe(500);
|
||||
});
|
||||
});
|
||||
+113
@@ -0,0 +1,113 @@
|
||||
/**
|
||||
* GitHub webhook receiver. Mounts on the shared webhook server at
|
||||
* /webhook/github. Verifies HMAC-SHA256 signature, filters for
|
||||
* pull_request.opened and pull_request.synchronize, hands the parsed
|
||||
* PR off to the caller.
|
||||
*/
|
||||
import crypto from 'crypto';
|
||||
import http from 'http';
|
||||
|
||||
import { registerWebhookHandler } from '../../webhook-server.js';
|
||||
import { log } from '../../log.js';
|
||||
import { prLog } from './activity-log.js';
|
||||
|
||||
export interface PREvent {
|
||||
action: string;
|
||||
number: number;
|
||||
title: string;
|
||||
body: string;
|
||||
author: string;
|
||||
repoFullName: string;
|
||||
headSha: string;
|
||||
diffUrl: string;
|
||||
htmlUrl: string;
|
||||
merged: boolean;
|
||||
draft: boolean;
|
||||
}
|
||||
|
||||
function verifySignature(payload: string, signature: string, secret: string): boolean {
|
||||
const expected = 'sha256=' + crypto.createHmac('sha256', secret).update(payload).digest('hex');
|
||||
try {
|
||||
return crypto.timingSafeEqual(Buffer.from(signature), Buffer.from(expected));
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function readBody(req: http.IncomingMessage): Promise<string> {
|
||||
return new Promise((resolve) => {
|
||||
const chunks: Buffer[] = [];
|
||||
req.on('data', (chunk: Buffer) => chunks.push(chunk));
|
||||
req.on('end', () => resolve(Buffer.concat(chunks).toString()));
|
||||
});
|
||||
}
|
||||
|
||||
export function registerGitHubWebhook(secret: string, onPullRequest: (pr: PREvent) => Promise<void>): void {
|
||||
registerWebhookHandler('github', async (req, res) => {
|
||||
if (req.method !== 'POST') {
|
||||
res.writeHead(405);
|
||||
res.end('Method not allowed');
|
||||
return;
|
||||
}
|
||||
|
||||
const body = await readBody(req);
|
||||
const signature = req.headers['x-hub-signature-256'] as string;
|
||||
|
||||
if (!signature || !verifySignature(body, signature, secret)) {
|
||||
log.warn('GitHub webhook: invalid signature');
|
||||
res.writeHead(401);
|
||||
res.end('Invalid signature');
|
||||
return;
|
||||
}
|
||||
|
||||
res.writeHead(200);
|
||||
res.end('OK');
|
||||
|
||||
const event = req.headers['x-github-event'] as string;
|
||||
if (event !== 'pull_request') return;
|
||||
|
||||
let payload: { action?: string; pull_request?: Record<string, unknown>; repository?: Record<string, unknown> };
|
||||
try {
|
||||
payload = JSON.parse(body);
|
||||
} catch (err) {
|
||||
log.error('GitHub webhook: failed to parse payload', { err });
|
||||
return;
|
||||
}
|
||||
const validActions = ['opened', 'synchronize', 'closed', 'ready_for_review', 'converted_to_draft'];
|
||||
if (!validActions.includes(payload.action!)) return;
|
||||
|
||||
const pr = payload.pull_request as Record<string, unknown>;
|
||||
const repo = payload.repository as Record<string, unknown>;
|
||||
const head = pr.head as Record<string, unknown>;
|
||||
const user = pr.user as Record<string, unknown>;
|
||||
const prEvent: PREvent = {
|
||||
action: payload.action!,
|
||||
number: pr.number as number,
|
||||
title: pr.title as string,
|
||||
body: (pr.body as string) || '',
|
||||
author: user.login as string,
|
||||
repoFullName: repo.full_name as string,
|
||||
headSha: head.sha as string,
|
||||
diffUrl: pr.diff_url as string,
|
||||
htmlUrl: pr.html_url as string,
|
||||
merged: !!(pr.merged as boolean),
|
||||
draft: !!(pr.draft as boolean),
|
||||
};
|
||||
|
||||
log.info('GitHub webhook: PR event', { action: prEvent.action, pr: prEvent.number, repo: prEvent.repoFullName });
|
||||
prLog(prEvent.number, prEvent.repoFullName, 'webhook_received', {
|
||||
action: prEvent.action,
|
||||
author: prEvent.author,
|
||||
title: prEvent.title,
|
||||
draft: prEvent.draft,
|
||||
});
|
||||
onPullRequest(prEvent).catch((err) => {
|
||||
log.error('Failed to handle PR event', {
|
||||
err,
|
||||
prNumber: prEvent.number,
|
||||
repo: prEvent.repoFullName,
|
||||
category: 'pr-webhook',
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
+123
@@ -0,0 +1,123 @@
|
||||
/**
|
||||
* PR Factory Worker — default triage / review / test-plan instructions
|
||||
* seeded into the worker agent group on first bootstrap.
|
||||
*
|
||||
* This is the OPERATOR OVERRIDE POINT for the review workflow. Bootstrap
|
||||
* seeds the text below into groups/pr-factory-worker/CLAUDE.local.md once,
|
||||
* on group creation, and never overwrites it — edit that file to tune the
|
||||
* workflow for your repo (trusted contributors, merge policy, review
|
||||
* depth). Operators who maintain their own container skill instead set
|
||||
* PR_FACTORY_REVIEW_SKILL (see defaults.ts): the PR trigger prompts then
|
||||
* invoke /<skill> and this default text is ignored by the agent.
|
||||
*/
|
||||
|
||||
export const WORKER_INSTRUCTIONS = `# PR Factory Worker
|
||||
|
||||
You triage, review, and test-plan incoming GitHub pull requests. Each PR gets its own chat thread; the trigger message carries the PR metadata, description, and diff.
|
||||
|
||||
## Hard constraints
|
||||
|
||||
- **Never act on GitHub directly.** Use the \`credentialed_gh\` MCP tool for any write action (merge, close, comment, label, approve) — it requires human approval before executing. For read-only lookups (viewing PRs, listing checks, fetching user info), use \`gh\` directly in your shell.
|
||||
- All review output goes to the PR's chat thread via \`mcp__nanoclaw__send_message\`.
|
||||
- Every trigger message ends with a \`[PR_CONTEXT: channel=... thread=... repo=... pr=...]\` tag. Use it for repo/PR identifiers and file naming; never invent these values.
|
||||
|
||||
## PR triage workflow
|
||||
|
||||
Work through the three stages internally; output nothing until the report format at the end.
|
||||
|
||||
### Stage 1 — High-level read
|
||||
|
||||
Read the title, description, and diff at a high level. What does the PR do, how many files and which areas, is the scope coherent or does it mix unrelated concerns? If a local checkout is mounted (look under \`/workspace/extra/\`), use it to understand surrounding code and call sites — don't rely solely on the diff.
|
||||
|
||||
### Stage 2 — Author assessment
|
||||
|
||||
Check the "Trusted contributors" section at the end of this file. Authors listed there are exempt from alignment checks — classify as **Trusted contributor** and go straight to the report with decision = REVIEW.
|
||||
|
||||
For everyone else, look up their profile and history:
|
||||
|
||||
\`\`\`bash
|
||||
gh api users/{author} --jq '{login, created_at, public_repos, followers, bio}'
|
||||
gh api "repos/{owner}/{repo}/commits?author={author}&per_page=5" --jq 'length'
|
||||
\`\`\`
|
||||
|
||||
Classify — this shifts the decision threshold, not just the report label: **Known contributor** (prior merged commits here) and **Senior developer** (extensive public presence) get elevated credibility; **Established** / **New contributor** get the normal threshold; **New account** (young account, minimal activity) gets extra scrutiny on borderline calls. Suspicion is context, not a decision — a suspicious account with a strong PR still gets REVIEW.
|
||||
|
||||
### Stage 3 — Categorize and decide
|
||||
|
||||
Classify what the diff actually does (not what the author claims): feature, fix, simplification, documentation, test-only, dependency bump, skill/extension addition.
|
||||
|
||||
Then weigh functionality against size: a large change for something marginal → CLOSE; a small change for a genuine broadly-useful improvement → acceptable. Bundling unrelated changes → CLOSE.
|
||||
|
||||
- **CLOSE**: spam or junk; empty/broken; mixed unrelated changes; size unjustified by the gain.
|
||||
- **MERGE**: trivial, obviously-correct, low-risk (typo fixes, broken links) needing no further review.
|
||||
- **REVIEW**: everything that looks reasonable but needs careful examination — the default for real changes.
|
||||
|
||||
### Triage report
|
||||
|
||||
Post via \`mcp__nanoclaw__send_message\`. The thread opener already shows PR number, title, author, link — don't repeat them:
|
||||
|
||||
\`\`\`
|
||||
━━━ 📋 Triage ━━━━━━━━━━━━━━━━━━━
|
||||
|
||||
{author} ({classification}) · {PR type} · {N files}
|
||||
|
||||
{CLOSE / MERGE / REVIEW} — {one-line reason}
|
||||
\`\`\`
|
||||
|
||||
Write standard Markdown; the chat adapter converts to platform formatting.
|
||||
|
||||
After posting: **CLOSE** → call \`credentialed_gh\` with a \`gh pr close\` command and stop. **MERGE** → call \`credentialed_gh\` with a \`gh pr merge\` command (follow the repo's merge policy in "Repo policy" below) and stop. **REVIEW** → proceed to the in-depth review.
|
||||
|
||||
## In-depth review (REVIEW decisions)
|
||||
|
||||
Review the diff line by line for: correctness (does it do what it claims; edge cases; error paths), scope (no unrelated edits), consistency with the surrounding codebase's patterns, tests (does the change carry tests that would go red if it regressed), security (input handling, credentials, injection, path traversal), and docs (README/docs updated when behavior changes).
|
||||
|
||||
Post a compact review to the thread: verdict first (approve / request changes), then findings as a short bulleted list, most severe first, each with file:line. Save the full review as markdown to \`/workspace/agent/\` if it's long. For "request changes", post the specific asks as a \`gh pr comment\` via \`credentialed_gh\`.
|
||||
|
||||
If the change looks mergeable, write a test plan next.
|
||||
|
||||
## Test plan
|
||||
|
||||
Produce a high-level, human-readable plan — what needs testing and why, not step-by-step commands; the testing agent works out execution. Scale depth to the change: security/architecture → thorough; features and core fixes → moderate; simple fixes → light; docs/CI → minimal.
|
||||
|
||||
Format:
|
||||
|
||||
\`\`\`markdown
|
||||
# Test Plan: PR #<number> — <short title>
|
||||
|
||||
**PR:** <link>
|
||||
**What changed:** <1-2 lines>
|
||||
**Depth:** <Thorough / Moderate / Light / Minimal>
|
||||
|
||||
| # | What's being tested | Priority | Type | Requires |
|
||||
|---|---------------------|----------|------|----------|
|
||||
| 1 | One-line description | Must pass / Should pass / Nice to have | E2E / Security / Integration / Regression | capability tags or — |
|
||||
\`\`\`
|
||||
|
||||
For each area: what, why it matters, priority. Flag special requirements (platform differences, DB migrations, credentials, concurrency). Skip exact commands and expected log lines.
|
||||
|
||||
Save the plan to the worker group's test-plans directory — the file name pattern is load-bearing:
|
||||
|
||||
\`\`\`bash
|
||||
mkdir -p /workspace/agent/test-plans
|
||||
# pr-{prNum}-thread-{threadTsSafe}.md.pending — both values from the PR_CONTEXT tag;
|
||||
# threadTsSafe is the thread value with '.' replaced by '-'.
|
||||
cat > /workspace/agent/test-plans/pr-{prNum}-thread-{threadTsSafe}.md.pending << 'PLAN'
|
||||
(full plan)
|
||||
PLAN
|
||||
\`\`\`
|
||||
|
||||
Then call \`mcp__nanoclaw__send_to_testing()\`. The host posts the plan with an approval card; if a human approves, it goes to the test orchestrator. Do not post the plan text yourself, and do not output anything after the tool call.
|
||||
|
||||
## After test results
|
||||
|
||||
Test results arrive in the thread with a verdict. **PASS** → propose merge via \`credentialed_gh\` (one command, following the repo's merge policy). **FAIL/PARTIAL** → analyze whether failures are PR-related or pre-existing/environmental, post a one-line conclusion, then act (merge anyway, request fixes, or close). No preamble.
|
||||
|
||||
## Repo policy (operator-edited)
|
||||
|
||||
- **Merge strategy:** default (\`gh pr merge --merge\`). Edit this line if the repo requires squash or rebase.
|
||||
|
||||
## Trusted contributors (operator-edited)
|
||||
|
||||
List GitHub logins exempt from alignment checks, one per line. None are configured by default.
|
||||
`;
|
||||
@@ -0,0 +1,103 @@
|
||||
# Remove slack-bots (PR Factory component)
|
||||
|
||||
Deletes the five skill-owned modules and the four guard tests, removes the two barrel lines, reverts the 3-line patch in `src/channels/slack.ts`, the router suppression hunk, and the migrations-barrel insert. Each step is idempotent: if the file already has the stock form, leave it as is and continue.
|
||||
|
||||
> **Remove dependents first.** The `pr-factory-core` component imports `SUPERVISOR_INSTANCE` from `src/channels/slack-supervisor.ts` and `TESTER_INSTANCE` from `src/channels/slack-tester.ts`. Removing this component while `pr-factory-core` is installed breaks the build. Remove `pr-factory-core` first.
|
||||
>
|
||||
> **Data notes.**
|
||||
> - `messaging_groups` rows with `instance` `'slack-supervisor'` or `'slack-tester'` reference adapters that no longer run. Outbound delivery for their sessions is exact-key, so it gets the normal offline-adapter handling (warn + retry path) — it never falls back through the worker bot. Inbound from the removed apps stops arriving once the apps are disabled. Delete or re-wire those rows (and their wirings/sessions) when removing for good — use the `pnpm run ncl` group/wiring verbs, not raw SQL.
|
||||
> - If the legacy-upgrade migration already ran on this DB, its `schema_version` row (`module-slack-bots-bot-id-to-instance`) and the converted data stay — the conversion is forward-only and matches core's own 016 schema, so nothing needs reversing.
|
||||
|
||||
## 1. Delete the skill-owned files and tests
|
||||
|
||||
```bash
|
||||
rm -f src/channels/slack-supervisor.ts
|
||||
rm -f src/channels/slack-tester.ts
|
||||
rm -f src/channels/slack-bot-ids.ts
|
||||
rm -f src/channels/sibling-mention.ts
|
||||
rm -f src/db/migrations/module-slack-bots-bot-id-to-instance.ts
|
||||
rm -f src/channels/multibot-registration.test.ts
|
||||
rm -f src/channels/slack-ignore-senders.test.ts
|
||||
rm -f src/router-sibling-mention.test.ts
|
||||
rm -f src/db/slack-bots-migration.test.ts
|
||||
```
|
||||
|
||||
## 2. Delete the barrel imports (`src/channels/index.ts`)
|
||||
|
||||
Delete (not comment out) both lines:
|
||||
|
||||
```typescript
|
||||
import './slack-supervisor.js';
|
||||
import './slack-tester.js';
|
||||
```
|
||||
|
||||
## 3. Revert the worker adapter patch (`src/channels/slack.ts`)
|
||||
|
||||
Delete the import line:
|
||||
|
||||
```typescript
|
||||
import { slackBotUserIds, registerSlackBotUserId, withSiblingEchoGuard } from './slack-bot-ids.js';
|
||||
```
|
||||
|
||||
Delete the `void registerSlackBotUserId(env.SLACK_BOT_TOKEN, 'worker');` line, and change the factory's final return back to:
|
||||
|
||||
```typescript
|
||||
return bridge;
|
||||
```
|
||||
|
||||
Leave every other line (e.g. the `resolveChannelName` block) untouched — it belongs to the stock `/add-slack` file.
|
||||
|
||||
## 4. Revert the router hunk (`src/router.ts`)
|
||||
|
||||
Delete the import line:
|
||||
|
||||
```typescript
|
||||
import { hasSiblingMention } from './channels/sibling-mention.js';
|
||||
```
|
||||
|
||||
In `evaluateEngage`'s `'mention-sticky'` case, delete the comment and the call:
|
||||
|
||||
```typescript
|
||||
// Suppress if the message mentions a sibling bot on the same channel.
|
||||
if (hasSiblingMention(mg, text)) return false;
|
||||
```
|
||||
|
||||
## 5. Revert the migrations barrel (`src/db/migrations/index.ts`)
|
||||
|
||||
Delete the import line:
|
||||
|
||||
```typescript
|
||||
import { moduleSlackBotsBotIdToInstance } from './module-slack-bots-bot-id-to-instance.js';
|
||||
```
|
||||
|
||||
and the `moduleSlackBotsBotIdToInstance,` entry (plus its ordering comment block, if present) from the `migrations` array.
|
||||
|
||||
## 6. Remove the environment lines
|
||||
|
||||
Delete these four lines from `.env`:
|
||||
|
||||
```bash
|
||||
SLACK_SUPERVISOR_BOT_TOKEN=...
|
||||
SLACK_SUPERVISOR_SIGNING_SECRET=...
|
||||
SLACK_TESTER_BOT_TOKEN=...
|
||||
SLACK_TESTER_SIGNING_SECRET=...
|
||||
```
|
||||
|
||||
## 7. Slack side
|
||||
|
||||
Disable or uninstall the "PR Supervisor" and "PR Tester" Slack apps in the workspace (events posted to `/webhook/slack-supervisor` and `/webhook/slack-tester` now get a 404).
|
||||
|
||||
## 8. Validate
|
||||
|
||||
> **Skip this step during full-recipe removal.** Removing the whole PR Factory? This runs last in the reverse order, but the recipe-level validation is the binding one — run this block only when removing `slack-bots` in isolation.
|
||||
|
||||
```bash
|
||||
pnpm run build
|
||||
pnpm test
|
||||
pnpm exec tsc -p container/agent-runner/tsconfig.json --noEmit
|
||||
cd container/agent-runner && bun test; cd ../..
|
||||
```
|
||||
|
||||
All suites green.
|
||||
|
||||
Note: if this component's folder (`.claude/skills/recipes/pr-factory/skills/slack-bots/`) stays committed after removal, the skill-sync drift guard (`src/skill-sync.test.ts`) goes red on the now-missing in-tree files — delete the folder (the clean path) or remove/amend its `files.txt`.
|
||||
@@ -0,0 +1,202 @@
|
||||
---
|
||||
name: slack-bots
|
||||
description: PR Factory component — add the supervisor and tester Slack bot adapters as named channel instances ('slack-supervisor', 'slack-tester') alongside the stock /add-slack worker bot, with shared sibling-bot echo suppression so the three bots never echo-loop each other, sibling-mention suppression for sticky threads, and an upgrade migration for legacy bot_id-shaped DBs.
|
||||
---
|
||||
|
||||
# slack-bots (PR Factory component)
|
||||
|
||||
Runs two extra Slack bot identities alongside the stock `/add-slack` worker bot, on core's native channel-instance substrate:
|
||||
|
||||
- **`slack-supervisor`** (`src/channels/slack-supervisor.ts`) — instance `'slack-supervisor'` (exported as `SUPERVISOR_INSTANCE`), webhook `/webhook/slack-supervisor`, env `SLACK_SUPERVISOR_BOT_TOKEN` / `SLACK_SUPERVISOR_SIGNING_SECRET`.
|
||||
- **`slack-tester`** (`src/channels/slack-tester.ts`) — instance `'slack-tester'` (exported as `TESTER_INSTANCE`), webhook `/webhook/slack-tester`, env `SLACK_TESTER_BOT_TOKEN` / `SLACK_TESTER_SIGNING_SECRET`.
|
||||
- **`src/channels/slack-bot-ids.ts`** — a shared module-level `Set` of Slack bot user IDs, `registerSlackBotUserId(token, label)` (resolves a bot's user id via Slack `auth.test`), and `withSiblingEchoGuard(bridge, ids)`. Every Slack adapter (worker, supervisor, tester) pushes its own id into the Set at factory time and returns its bridge wrapped in the guard — each bot silently drops inbound messages authored by its siblings, across **all four** Chat SDK dispatch paths (subscribed, new-mention, DM, plain), preventing cross-bot echo loops in shared channels. Trade-off (documented in the file header): a sibling message's attachments are downloaded before the drop.
|
||||
- **`src/channels/sibling-mention.ts`** — `hasSiblingMention(mg, text)`: suppresses a mention-sticky follow-up that starts with `@` when a sibling named-instance mention-mode bot shares the channel (the `@` is addressed to the sibling; without this, the supervisor's sticky thread would also fire on every `@pr-tester ...` message).
|
||||
- **`src/db/migrations/module-slack-bots-bot-id-to-instance.ts`** — legacy-upgrade migration: converts a `bot_id`-shaped multi-bot DB (the legacy pre-instance substrate) to migration 016's instance schema, including the Chat SDK state-namespace rewrite. Pure no-op on fresh installs.
|
||||
|
||||
Each adapter self-registers on import and is inert when its bot token is unset.
|
||||
|
||||
**Cross-component export contract.** `SUPERVISOR_INSTANCE` and `TESTER_INSTANCE` are the single owners of the `'slack-supervisor'` / `'slack-tester'` instance strings. The `pr-factory-core` component imports both; the webhook routes (`/webhook/<instance>`) and Chat SDK state namespaces derive from them. Keep the exports stable.
|
||||
|
||||
Integration surface: two appended barrel lines in `src/channels/index.ts`, one 3-line patch into the skill-installed `src/channels/slack.ts`, one one-line suppression hunk (+ import) in `src/router.ts`, one barrel insert in `src/db/migrations/index.ts`, and four `.env` reads. Everything else is added files.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Probe each before applying; stop on a failed probe and do what it names first.
|
||||
|
||||
1. **`/add-slack` is applied** — the worker bot exists and this skill patches its factory:
|
||||
|
||||
```bash
|
||||
test -f src/channels/slack.ts && grep -q '"@chat-adapter/slack"' package.json && echo OK
|
||||
```
|
||||
|
||||
If it fails: run `/add-slack`, then return here.
|
||||
|
||||
2. **Core ships the channel-instance substrate** (nanoclaw ≥ 2.1.11) — the adapters pass `instance` to `createChatSdkBridge`, the registry keys by instance, and `messaging_groups` carries the `instance` column:
|
||||
|
||||
```bash
|
||||
grep -q 'instance?: string' src/channels/adapter.ts && test -f src/db/migrations/016-messaging-group-instance.ts && echo OK
|
||||
```
|
||||
|
||||
If it fails: **stop — update core to a version that ships the native channel-instance substrate (nanoclaw ≥ 2.1.11) first.** This skill makes no core edits to substitute for it.
|
||||
|
||||
3. **Two additional Slack apps** in the same workspace as the worker bot (created in the Credentials section below), with their bot tokens and signing secrets at hand.
|
||||
|
||||
Each step below is idempotent: if the file already contains the patched form, leave it as is and continue.
|
||||
|
||||
## Apply
|
||||
|
||||
All copy sources are under this component's folder; run every command from the repo root:
|
||||
|
||||
```bash
|
||||
SKILL=.claude/skills/recipes/pr-factory/skills/slack-bots
|
||||
```
|
||||
|
||||
### 1. Copy the skill-owned modules
|
||||
|
||||
```bash
|
||||
cp $SKILL/files/src/channels/slack-bot-ids.ts src/channels/slack-bot-ids.ts
|
||||
cp $SKILL/files/src/channels/slack-supervisor.ts src/channels/slack-supervisor.ts
|
||||
cp $SKILL/files/src/channels/slack-tester.ts src/channels/slack-tester.ts
|
||||
cp $SKILL/files/src/channels/sibling-mention.ts src/channels/sibling-mention.ts
|
||||
cp $SKILL/files/src/db/migrations/module-slack-bots-bot-id-to-instance.ts src/db/migrations/module-slack-bots-bot-id-to-instance.ts
|
||||
```
|
||||
|
||||
### 2. Append the self-registration imports
|
||||
|
||||
Append to `src/channels/index.ts` (skip any line already present):
|
||||
|
||||
```typescript
|
||||
import './slack-supervisor.js';
|
||||
import './slack-tester.js';
|
||||
```
|
||||
|
||||
### 3. Patch the worker adapter (`src/channels/slack.ts`)
|
||||
|
||||
Three lines into the stock `/add-slack` file, so the worker joins the shared sibling-suppression Set and gets the echo guard.
|
||||
|
||||
> **Do not re-run `/add-slack`'s copy step after this.** `/add-slack` rewrites `src/channels/slack.ts` from scratch, silently dropping these three lines — the worker then echo-loops with its siblings. `src/channels/slack-ignore-senders.test.ts` catches it (the factory's `auth.test` registration no longer lands in the shared Set), but re-apply this patch immediately if you ever re-run `/add-slack`.
|
||||
|
||||
**3a.** Append to the import block (skip if already present):
|
||||
|
||||
```typescript
|
||||
import { slackBotUserIds, registerSlackBotUserId, withSiblingEchoGuard } from './slack-bot-ids.js';
|
||||
```
|
||||
|
||||
**3b.** In the factory, insert immediately after the `createSlackAdapter(...)` statement:
|
||||
|
||||
```typescript
|
||||
void registerSlackBotUserId(env.SLACK_BOT_TOKEN, 'worker');
|
||||
```
|
||||
|
||||
**3c.** Change the factory's final `return bridge;` to:
|
||||
|
||||
```typescript
|
||||
return withSiblingEchoGuard(bridge, slackBotUserIds);
|
||||
```
|
||||
|
||||
If the stock file's shape has drifted cosmetically (formatting, extra config fields), apply the same three semantic edits and leave every other line (e.g. the `resolveChannelName` block) untouched.
|
||||
|
||||
### 4. Router suppression hunk (`src/router.ts`)
|
||||
|
||||
**4a.** Append to the import block (skip if already present):
|
||||
|
||||
```typescript
|
||||
import { hasSiblingMention } from './channels/sibling-mention.js';
|
||||
```
|
||||
|
||||
**4b.** In `evaluateEngage`, the `'mention-sticky'` case reads:
|
||||
|
||||
```typescript
|
||||
if (mg.is_group === 0) return false; // DMs never use mention-sticky sensibly
|
||||
const existing = findSessionForAgent(agent.agent_group_id, mg.id, threadId);
|
||||
```
|
||||
|
||||
Insert one line between them:
|
||||
|
||||
```typescript
|
||||
if (mg.is_group === 0) return false; // DMs never use mention-sticky sensibly
|
||||
// Suppress if the message mentions a sibling bot on the same channel.
|
||||
if (hasSiblingMention(mg, text)) return false;
|
||||
const existing = findSessionForAgent(agent.agent_group_id, mg.id, threadId);
|
||||
```
|
||||
|
||||
### 5. Register the legacy-upgrade migration (`src/db/migrations/index.ts`)
|
||||
|
||||
**5a.** Append to the import block (skip if already present):
|
||||
|
||||
```typescript
|
||||
import { moduleSlackBotsBotIdToInstance } from './module-slack-bots-bot-id-to-instance.js';
|
||||
```
|
||||
|
||||
**5b.** In the `migrations` array, insert `moduleSlackBotsBotIdToInstance,` **immediately before `migration016,`**. The ordering is load-bearing: on a DB shaped by the legacy `bot_id` substrate, 016's recreate would silently drop `bot_id` and then collide on `UNIQUE(channel_type, platform_id, instance)` — a boot crash-loop. On fresh DBs the migration is a guarded no-op.
|
||||
|
||||
### 6. Copy the guard tests
|
||||
|
||||
```bash
|
||||
cp $SKILL/files/src/channels/multibot-registration.test.ts src/channels/multibot-registration.test.ts
|
||||
cp $SKILL/files/src/channels/slack-ignore-senders.test.ts src/channels/slack-ignore-senders.test.ts
|
||||
cp $SKILL/files/src/router-sibling-mention.test.ts src/router-sibling-mention.test.ts
|
||||
cp $SKILL/files/src/db/slack-bots-migration.test.ts src/db/slack-bots-migration.test.ts
|
||||
```
|
||||
|
||||
| Test | Guards |
|
||||
|------|--------|
|
||||
| `src/channels/multibot-registration.test.ts` | Both barrel imports (real-barrel registration assertion), the unmocked `@chat-adapter/slack` dependency, and the `SUPERVISOR_INSTANCE`/`TESTER_INSTANCE` export contract |
|
||||
| `src/channels/slack-ignore-senders.test.ts` | The slack.ts 3-line delta, behaviorally: the factory's `auth.test` registration lands in the shared Set, the guard drops sibling-authored messages (incl. the DM dispatch path) via live Set identity through the real Chat SDK dispatch, and all three adapters resolve by exact instance key with `channelType === 'slack'` (no cross-instance hijack) |
|
||||
| `src/router-sibling-mention.test.ts` | The router's one-line `hasSiblingMention` call at the mention-sticky seam + the helper's instance-keyed sibling query, via the real `routeInbound` on a real migrated DB |
|
||||
| `src/db/slack-bots-migration.test.ts` | The migration's barrel presence AND its before-016 ordering, the bot_id→instance mapping, the chat_sdk namespace rewrite (leading-prefix-only), lock clearing, idempotence, and the fresh-DB no-op |
|
||||
|
||||
## Credentials
|
||||
|
||||
### Create the two Slack apps
|
||||
|
||||
Repeat the **Credentials** section of the `/add-slack` skill twice — once for a "PR Supervisor" app and once for a "PR Tester" app, in the **same workspace** as the worker bot. Same bot token scopes, Messages Tab, and Interactivity settings. The only difference is the webhook URL each app posts to:
|
||||
|
||||
- Supervisor app — Event Subscriptions and Interactivity **Request URL**: `https://your-domain/webhook/slack-supervisor`
|
||||
- Tester app — Event Subscriptions and Interactivity **Request URL**: `https://your-domain/webhook/slack-tester`
|
||||
|
||||
(The worker app stays on `/webhook/slack`. The shared webhook server serves all three paths on the same port.)
|
||||
|
||||
### Configure environment
|
||||
|
||||
Add to `.env`:
|
||||
|
||||
```bash
|
||||
SLACK_SUPERVISOR_BOT_TOKEN=xoxb-supervisor-bot-token
|
||||
SLACK_SUPERVISOR_SIGNING_SECRET=supervisor-signing-secret
|
||||
SLACK_TESTER_BOT_TOKEN=xoxb-tester-bot-token
|
||||
SLACK_TESTER_SIGNING_SECRET=tester-signing-secret
|
||||
```
|
||||
|
||||
## Using it
|
||||
|
||||
Wire each bot's `messaging_groups` row with the matching `instance` (`slack-supervisor` / `slack-tester`; the worker uses the default instance, which is the literal value `slack`). The `pr-factory-core` component's bootstrap creates these wirings itself. A bot with no wired row engages nothing: inbound auto-creates an unwired per-instance messaging group with `unknown_sender_policy = 'request_approval'`.
|
||||
|
||||
Restart the host after applying so the new adapters connect.
|
||||
|
||||
## Upgrading a legacy bot_id install
|
||||
|
||||
If the install previously ran the legacy pre-instance multi-bot substrate (`messaging_groups.bot_id`, namespace-prefixed `chat_sdk_*` keys), the copied migration converts everything at next boot: `bot_id NULL → instance = channel_type`, `'pr-supervisor' → 'slack-supervisor'`, `'pr-tester' → 'slack-tester'`, Chat SDK keys re-namespaced (worker unprefixed, named instances renamed), `chat_sdk_locks` cleared (TTL-bound; expect at most one re-@mention per subscribed thread). Webhook URLs are byte-identical to the legacy install's, so the Slack app consoles need zero changes. **Do not boot a tree without this skill applied on such a DB** — bare 016 crash-loops on it (see the migration header).
|
||||
|
||||
## Known smell (declared)
|
||||
|
||||
`src/channels/sibling-mention.ts` performs a raw SQL read against the core central DB (`messaging_groups` joined to `messaging_group_agents`) — skill-guidelines anti-pattern #4. The logic lives in the skill-owned file so the core touch is a one-line call, but the clean fix is a core helper in `src/db/messaging-groups.ts` (e.g. `countSiblingMentionBots(channelType, platformId, instance)`), a natural follow-on to the channel-instance substrate. Until then the query lives here, guarded by `src/router-sibling-mention.test.ts`.
|
||||
|
||||
## Validate
|
||||
|
||||
```bash
|
||||
pnpm run build
|
||||
pnpm test
|
||||
pnpm exec tsc -p container/agent-runner/tsconfig.json --noEmit
|
||||
cd container/agent-runner && bun test; cd ../..
|
||||
```
|
||||
|
||||
All suites green. Any failure means a step didn't apply cleanly.
|
||||
|
||||
## Channel Info
|
||||
|
||||
- **type**: `slack` (both adapters reuse the Slack channel type; they register under their instance names)
|
||||
- **registry keys**: `slack-supervisor`, `slack-tester` (the worker stays on the default key `slack`)
|
||||
- **platform-id-format**: same as `/add-slack` (`slack:{channelId}`); disambiguation happens via `messaging_groups.instance`, not the platform id
|
||||
- **supports-threads**: yes
|
||||
- **typical-use**: the PR Factory supervisor/tester/worker trio — separate bot identities for triage oversight and test orchestration in the same channels the worker posts to
|
||||
@@ -0,0 +1,13 @@
|
||||
# slack-bots — files this component owns outright: the four channel modules,
|
||||
# the legacy-upgrade migration, and the four guard tests. The barrel lines, the
|
||||
# slack.ts 3-line patch, the router hunk, and the migrations-barrel insert are
|
||||
# applied as edits per SKILL.md, not as file copies.
|
||||
src/channels/slack-bot-ids.ts
|
||||
src/channels/slack-supervisor.ts
|
||||
src/channels/slack-tester.ts
|
||||
src/channels/sibling-mention.ts
|
||||
src/db/migrations/module-slack-bots-bot-id-to-instance.ts
|
||||
src/channels/multibot-registration.test.ts
|
||||
src/channels/slack-ignore-senders.test.ts
|
||||
src/router-sibling-mention.test.ts
|
||||
src/db/slack-bots-migration.test.ts
|
||||
+63
@@ -0,0 +1,63 @@
|
||||
/**
|
||||
* pr-factory slack-bots guard — barrel registration of the supervisor and
|
||||
* tester Slack adapters.
|
||||
*
|
||||
* The skill's registration reach-in is two appended lines in the
|
||||
* `src/channels/index.ts` barrel (`import './slack-supervisor.js';` and
|
||||
* `import './slack-tester.js';`). Importing the barrel runs each module's
|
||||
* top-level `registerChannelAdapter(...)` call; without its import line the
|
||||
* adapter is silently absent at boot.
|
||||
*
|
||||
* Behavior, not structural: this imports the REAL barrel and asserts the
|
||||
* registry actually contains both adapters. It goes red on a deleted barrel
|
||||
* line, a barrel that fails to evaluate, or a missing `@chat-adapter/slack`
|
||||
* package (the unmocked import throws) — so it also covers the dependency
|
||||
* integration point. Do not mock the adapter package here.
|
||||
*
|
||||
* Importing the barrel is safe and needs no env priming: registration is a
|
||||
* pure top-level `registerChannelAdapter` call. All env-gating
|
||||
* (SLACK_SUPERVISOR_BOT_TOKEN / SLACK_TESTER_BOT_TOKEN) lives inside each
|
||||
* factory, which only runs at host startup via initChannelAdapters(). The
|
||||
* factory-level instance behavior (exact-key resolution, echo guard) is
|
||||
* guarded in slack-ignore-senders.test.ts, where the adapter package's
|
||||
* network edge is stubbed.
|
||||
*
|
||||
* The `'slack'` assertion pins this skill's prerequisite: the stock
|
||||
* /add-slack channel must be installed (this skill patches its factory).
|
||||
*/
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import { getRegisteredChannelNames } from './channel-registry.js';
|
||||
import './index.js'; // the real barrel — triggers every channel's self-registration
|
||||
|
||||
// Snapshot the registry immediately after the barrel evaluates. Registration
|
||||
// must be attributable to the barrel's own import lines: statically importing
|
||||
// slack-supervisor.js / slack-tester.js here (e.g. for their instance-name
|
||||
// constants) would run their top-level registerChannelAdapter calls and mask
|
||||
// a deleted barrel line. The constants test below uses dynamic import, which
|
||||
// only evaluates after this snapshot is taken.
|
||||
const namesAfterBarrel = getRegisteredChannelNames();
|
||||
|
||||
describe('pr-factory slack bots registration', () => {
|
||||
it('registers slack-supervisor via the channel barrel', () => {
|
||||
expect(namesAfterBarrel).toContain('slack-supervisor');
|
||||
});
|
||||
|
||||
it('registers slack-tester via the channel barrel', () => {
|
||||
expect(namesAfterBarrel).toContain('slack-tester');
|
||||
});
|
||||
|
||||
it('stock slack channel is present (prerequisite: /add-slack)', () => {
|
||||
expect(namesAfterBarrel).toContain('slack');
|
||||
});
|
||||
|
||||
it('instance-name exports stay stable (cross-skill contract with pr-factory-core)', async () => {
|
||||
// pr-factory-core's bootstrap stamps messaging_groups.instance with these
|
||||
// values; the webhook routes /webhook/<instance> and the Chat SDK state
|
||||
// namespaces derive from them too. Renaming either breaks live installs.
|
||||
const { SUPERVISOR_INSTANCE } = await import('./slack-supervisor.js');
|
||||
const { TESTER_INSTANCE } = await import('./slack-tester.js');
|
||||
expect(SUPERVISOR_INSTANCE).toBe('slack-supervisor');
|
||||
expect(TESTER_INSTANCE).toBe('slack-tester');
|
||||
});
|
||||
});
|
||||
+44
@@ -0,0 +1,44 @@
|
||||
/**
|
||||
* Sibling-mention suppression for multi-instance channels (owned by the
|
||||
* pr-factory `slack-bots` component skill).
|
||||
*
|
||||
* When several adapter instances share one platform channel (distinct
|
||||
* `instance` rows on the same `channel_type` + `platform_id`), a message
|
||||
* that starts with an `@` but did NOT mention this bot (`isMention` false)
|
||||
* is usually addressed to one of the siblings. A mention-sticky wiring
|
||||
* would still fire on it because the subscribed-thread session already
|
||||
* exists — so the router asks this helper before letting the sticky
|
||||
* follow-up engage.
|
||||
*
|
||||
* Returns true when at least one sibling bot (a different NAMED instance —
|
||||
* `instance != channel_type` — wired with engage_mode='mention') sits on
|
||||
* the same platform channel and the text starts with '@'. The caller
|
||||
* (evaluateEngage in src/router.ts) suppresses the sticky follow-up in
|
||||
* that case. The default instance (instance = channel_type) never counts
|
||||
* as a sibling — matching the validated fork semantics where only named
|
||||
* bots are mention-addressed.
|
||||
*
|
||||
* KNOWN SMELL (skill-guidelines anti-pattern #4): this is a raw SQL read
|
||||
* against the core central DB, dependent on the messaging_groups /
|
||||
* messaging_group_agents schema. The clean fix is a core helper in
|
||||
* src/db/messaging-groups.ts (e.g. `countSiblingMentionBots(channelType,
|
||||
* platformId, instance)`) — a natural follow-on to the native instance
|
||||
* substrate, tracked as an upstream carve-out. Until then the query lives
|
||||
* here, in skill-owned code, guarded by src/router-sibling-mention.test.ts.
|
||||
*/
|
||||
import { getDb } from '../db/connection.js';
|
||||
import type { MessagingGroup } from '../types.js';
|
||||
|
||||
export function hasSiblingMention(mg: MessagingGroup, text: string): boolean {
|
||||
if (!text.startsWith('@')) return false;
|
||||
const count = getDb()
|
||||
.prepare(
|
||||
`SELECT count(*) as n FROM messaging_groups mg2
|
||||
JOIN messaging_group_agents mga ON mg2.id = mga.messaging_group_id
|
||||
WHERE mg2.channel_type = ? AND mg2.platform_id = ?
|
||||
AND mg2.instance != mg2.channel_type AND mg2.instance != ?
|
||||
AND mga.engage_mode = 'mention'`,
|
||||
)
|
||||
.get(mg.channel_type, mg.platform_id, mg.instance ?? mg.channel_type) as { n: number };
|
||||
return count.n > 0;
|
||||
}
|
||||
+76
@@ -0,0 +1,76 @@
|
||||
/**
|
||||
* Shared sibling-bot suppression for multi-instance Slack installs
|
||||
* (owned by the pr-factory `slack-bots` component skill).
|
||||
*
|
||||
* Three Slack apps (worker, supervisor, tester) share one workspace. Each
|
||||
* adapter resolves its own bot user ID via auth.test at factory time and adds
|
||||
* it to the shared `slackBotUserIds` Set; each adapter's bridge is wrapped
|
||||
* with `withSiblingEchoGuard`, which silently drops inbound messages authored
|
||||
* by any registered sibling — preventing cross-bot echo loops in shared
|
||||
* channels.
|
||||
*
|
||||
* The guard wraps `bridge.setup` and intercepts the host's
|
||||
* `ChannelSetup.onInbound` callback, so one wrapper covers all four Chat SDK
|
||||
* dispatch paths (onSubscribedMessage, onNewMention, onDirectMessage,
|
||||
* onNewMessage) with zero core edits. The bridge stamps `content.senderId`
|
||||
* from the SDK author in messageToInbound (src/channels/chat-sdk-bridge.ts),
|
||||
* which is what the guard matches on.
|
||||
*
|
||||
* Accepted trade-off: the guard sits on the host side of the bridge's
|
||||
* messageToInbound, so a sibling message's attachments are downloaded before
|
||||
* the message is discarded. Negligible for Slack bot chatter (sibling bots
|
||||
* post text/cards); a native bridge-side `ignoreSenderIds` hook that drops
|
||||
* before attachment download is a candidate upstream carve-out, not part of
|
||||
* this skill.
|
||||
*/
|
||||
import { log } from '../log.js';
|
||||
import type { ChannelAdapter, ChannelSetup } from './adapter.js';
|
||||
|
||||
export const slackBotUserIds = new Set<string>();
|
||||
|
||||
/**
|
||||
* Resolve a Slack bot's user ID from its token and add it to the shared set.
|
||||
* Safe to call multiple times — idempotent. Fire-and-forget at factory time:
|
||||
* the bridge holds the Set by identity and consults it per message, so an id
|
||||
* that lands after setup is still honored.
|
||||
*/
|
||||
export async function registerSlackBotUserId(token: string, label: string): Promise<void> {
|
||||
try {
|
||||
const res = await fetch('https://slack.com/api/auth.test', {
|
||||
method: 'POST',
|
||||
headers: { Authorization: `Bearer ${token}`, 'Content-Type': 'application/x-www-form-urlencoded' },
|
||||
});
|
||||
const data = (await res.json()) as { ok: boolean; user_id?: string };
|
||||
if (data.ok && data.user_id) {
|
||||
slackBotUserIds.add(data.user_id);
|
||||
log.info('Registered Slack bot user ID', { label, userId: data.user_id });
|
||||
}
|
||||
} catch (err) {
|
||||
log.warn('Failed to resolve Slack bot user ID', { label, err });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap a bridge so inbound messages authored by a sibling bot (senderId in
|
||||
* `ids`) are dropped before they reach the host router. Mutates and returns
|
||||
* the same adapter object so factory-attached extras (resolveChannelName,
|
||||
* openDM) survive regardless of whether they're set before or after wrapping.
|
||||
*/
|
||||
export function withSiblingEchoGuard(bridge: ChannelAdapter, ids: ReadonlySet<string>): ChannelAdapter {
|
||||
const originalSetup = bridge.setup.bind(bridge);
|
||||
bridge.setup = async (hostConfig: ChannelSetup): Promise<void> => {
|
||||
const forward = hostConfig.onInbound.bind(hostConfig);
|
||||
await originalSetup({
|
||||
...hostConfig,
|
||||
onInbound(platformId, threadId, message) {
|
||||
const senderId = (message.content as { senderId?: unknown } | null | undefined)?.senderId;
|
||||
if (typeof senderId === 'string' && ids.has(senderId)) {
|
||||
log.debug('Dropped sibling-bot inbound', { adapter: bridge.name, platformId, senderId });
|
||||
return;
|
||||
}
|
||||
return forward(platformId, threadId, message);
|
||||
},
|
||||
});
|
||||
};
|
||||
return bridge;
|
||||
}
|
||||
+278
@@ -0,0 +1,278 @@
|
||||
/**
|
||||
* pr-factory slack-bots guard — the sibling echo guard and the slack.ts
|
||||
* delta this skill patches into the stock /add-slack adapter:
|
||||
*
|
||||
* 1. `import { slackBotUserIds, registerSlackBotUserId, withSiblingEchoGuard } from './slack-bot-ids.js';`
|
||||
* 2. `void registerSlackBotUserId(env.SLACK_BOT_TOKEN, 'worker');` in the factory
|
||||
* 3. `return withSiblingEchoGuard(bridge, slackBotUserIds);` replacing `return bridge;`
|
||||
*
|
||||
* Behavior, not structural — driven through the REAL entry
|
||||
* (initChannelAdapters runs the real slack.ts / slack-supervisor.ts /
|
||||
* slack-tester.ts factories; messages flow through the REAL Chat SDK dispatch
|
||||
* against the real SqliteStateAdapter on the real migrated central DB).
|
||||
* Hermetic at the external edge only: `@chat-adapter/slack` is stubbed (its
|
||||
* initialize() makes a live auth.test call through @slack/web-api — the
|
||||
* unmocked-dependency guard lives in multibot-registration.test.ts instead),
|
||||
* readEnvFile injects test credentials (key-filtered, so a drifted env-var
|
||||
* name in a factory still goes red), and global fetch serves the
|
||||
* slack-bot-ids auth.test lookup.
|
||||
*
|
||||
* What goes red:
|
||||
* - slack.ts line 2 deleted → the worker's bot user id never lands in the
|
||||
* shared Set;
|
||||
* - slack.ts line 3 deleted → the bridge's inbound path loses the guard and
|
||||
* the sibling-bot message is forwarded instead of dropped;
|
||||
* - slack.ts line 1 deleted → build/typecheck leg, and this file's import
|
||||
* of './slack.js' fails to evaluate;
|
||||
* - the wrapper stops covering the onDirectMessage dispatch path (the DM
|
||||
* case below — newly covered by withSiblingEchoGuard; the old
|
||||
* three-site bridge hack missed it);
|
||||
* - an adapter loses its `instance` wiring (exact-key resolution cases) —
|
||||
* supervisor/tester traffic would silently hijack or shadow the worker.
|
||||
*
|
||||
* The sibling id is added to the EXPORTED Set after the bridge is created:
|
||||
* that pins the cross-adapter contract — the supervisor/tester factories push
|
||||
* their bot user ids into the same module-level Set object, and the guard
|
||||
* must consult it live (Set identity, not a snapshot).
|
||||
*/
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
import { Message, parseMarkdown, type Adapter, type Chat } from 'chat';
|
||||
|
||||
const fixture = vi.hoisted(() => ({
|
||||
env: {} as Record<string, string>,
|
||||
chats: new Map<string, unknown>(), // routingPath → captured Chat instance
|
||||
adapters: [] as unknown[],
|
||||
}));
|
||||
|
||||
// External platform edge — the real adapter's initialize() calls Slack's
|
||||
// auth.test over the network. The package's presence is guarded unmocked in
|
||||
// multibot-registration.test.ts.
|
||||
vi.mock('@chat-adapter/slack', () => ({
|
||||
createSlackAdapter: (config: { botToken?: string; signingSecret?: string }) => {
|
||||
const adapter = {
|
||||
name: 'slack',
|
||||
botToken: config.botToken,
|
||||
initialize: async () => {},
|
||||
channelIdFromThreadId: (threadId: string) => `slack:${threadId}`,
|
||||
isDM: (threadId: string) => threadId.startsWith('D'),
|
||||
fetchThread: async () => ({ channelName: null }),
|
||||
};
|
||||
fixture.adapters.push(adapter);
|
||||
return adapter as unknown as Adapter;
|
||||
},
|
||||
}));
|
||||
|
||||
// Credential injection. Key-filtered like the real readEnvFile: the factory
|
||||
// only receives values for the keys it actually asks for.
|
||||
vi.mock('../env.js', () => ({
|
||||
readEnvFile: (keys: string[]) =>
|
||||
Object.fromEntries(keys.filter((k) => fixture.env[k] !== undefined).map((k) => [k, fixture.env[k]])),
|
||||
}));
|
||||
|
||||
// Capture the real Chat instances instead of binding the shared webhook
|
||||
// server. Tip signature: registerWebhookAdapter(chat, adapterName, routingPath)
|
||||
// — the bridge passes routingPath = instance ?? adapter.name.
|
||||
vi.mock('../webhook-server.js', () => ({
|
||||
registerWebhookAdapter: vi.fn((chat: unknown, _adapterName: string, routingPath: string) => {
|
||||
fixture.chats.set(routingPath, chat);
|
||||
}),
|
||||
}));
|
||||
|
||||
import { closeDb, initTestDb, runMigrations } from '../db/index.js';
|
||||
import { getChannelAdapterExact, initChannelAdapters, teardownChannelAdapters } from './channel-registry.js';
|
||||
import { slackBotUserIds } from './slack-bot-ids.js';
|
||||
import './slack.js'; // real worker module — registers the real (patched) factory
|
||||
import './slack-supervisor.js'; // real supervisor module
|
||||
import './slack-tester.js'; // real tester module
|
||||
|
||||
const WORKER_ENV = { SLACK_BOT_TOKEN: 'xoxb-test-worker', SLACK_SIGNING_SECRET: 'test-signing-secret' };
|
||||
const ALL_ENV = {
|
||||
...WORKER_ENV,
|
||||
SLACK_SUPERVISOR_BOT_TOKEN: 'xoxb-test-supervisor',
|
||||
SLACK_SUPERVISOR_SIGNING_SECRET: 'test-super-secret',
|
||||
SLACK_TESTER_BOT_TOKEN: 'xoxb-test-tester',
|
||||
SLACK_TESTER_SIGNING_SECRET: 'test-tester-secret',
|
||||
};
|
||||
const BOT_USER_BY_TOKEN: Record<string, string> = {
|
||||
'xoxb-test-worker': 'U-WORKER-BOT',
|
||||
'xoxb-test-supervisor': 'U-SUPER-BOT',
|
||||
'xoxb-test-tester': 'U-TESTER-BOT',
|
||||
};
|
||||
|
||||
function makeMessage(authorId: string, text: string, threadId: string): Message {
|
||||
return new Message({
|
||||
id: `m-${authorId}-${Math.random().toString(36).slice(2, 8)}`,
|
||||
threadId,
|
||||
text,
|
||||
formatted: parseMarkdown(text),
|
||||
raw: {},
|
||||
author: { userId: authorId, userName: authorId, fullName: authorId, isBot: false, isMe: false },
|
||||
metadata: { dateSent: new Date(), edited: false },
|
||||
attachments: [],
|
||||
});
|
||||
}
|
||||
|
||||
const onInbound = vi.fn();
|
||||
|
||||
async function initAdapters() {
|
||||
await initChannelAdapters(() => ({
|
||||
onInbound,
|
||||
onInboundEvent: () => {},
|
||||
onMetadata: () => {},
|
||||
onAction: () => {},
|
||||
}));
|
||||
}
|
||||
|
||||
function workerChat(): Chat {
|
||||
const chat = fixture.chats.get('slack') as Chat;
|
||||
expect(chat).toBeTruthy();
|
||||
return chat;
|
||||
}
|
||||
|
||||
function workerSdkAdapter(): Adapter {
|
||||
// With WORKER_ENV only one factory produces an adapter.
|
||||
return fixture.adapters[0] as Adapter;
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
fixture.env = {};
|
||||
fixture.chats.clear();
|
||||
fixture.adapters.length = 0;
|
||||
onInbound.mockReset();
|
||||
slackBotUserIds.clear();
|
||||
const db = initTestDb();
|
||||
runMigrations(db);
|
||||
vi.stubGlobal(
|
||||
'fetch',
|
||||
vi.fn(async (url: unknown, init?: { headers?: Record<string, string> }) => {
|
||||
if (String(url).includes('slack.com/api/auth.test')) {
|
||||
const token = (init?.headers?.Authorization ?? '').replace('Bearer ', '');
|
||||
return { json: async () => ({ ok: true, user_id: BOT_USER_BY_TOKEN[token] ?? 'U-UNKNOWN' }) };
|
||||
}
|
||||
throw new Error(`unexpected fetch in test: ${String(url)}`);
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await teardownChannelAdapters();
|
||||
closeDb();
|
||||
vi.unstubAllGlobals();
|
||||
});
|
||||
|
||||
describe('slack worker bot — slack-bot-ids delta', () => {
|
||||
it('factory is inert without SLACK_BOT_TOKEN (reads the token from .env)', async () => {
|
||||
fixture.env = {};
|
||||
await initAdapters();
|
||||
expect(getChannelAdapterExact('slack')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('factory registers the worker bot user id in the shared Set via auth.test', async () => {
|
||||
fixture.env = { ...WORKER_ENV };
|
||||
await initAdapters();
|
||||
expect(getChannelAdapterExact('slack')).toBeDefined();
|
||||
// `void registerSlackBotUserId(env.SLACK_BOT_TOKEN, 'worker')` is
|
||||
// fire-and-forget — wait for the stubbed auth.test round-trip to land.
|
||||
await vi.waitFor(() => expect(slackBotUserIds.has('U-WORKER-BOT')).toBe(true));
|
||||
});
|
||||
|
||||
it('guard drops sibling-bot messages via the shared Set and forwards humans', async () => {
|
||||
fixture.env = { ...WORKER_ENV };
|
||||
await initAdapters();
|
||||
const bridge = getChannelAdapterExact('slack');
|
||||
expect(bridge).toBeDefined();
|
||||
const chat = workerChat();
|
||||
|
||||
// Subscribe the thread through the bridge (real state adapter) so both
|
||||
// messages take the onSubscribedMessage dispatch path.
|
||||
await bridge!.subscribe!('slack:T-1', 'T-1');
|
||||
|
||||
// Added AFTER bridge creation — a sibling adapter (supervisor/tester)
|
||||
// registering its bot user id later must still be honored: the guard
|
||||
// holds the shared Set by identity and reads it per message.
|
||||
slackBotUserIds.add('U-SIBLING');
|
||||
|
||||
chat.processMessage(workerSdkAdapter(), 'T-1', makeMessage('U-SIBLING', 'sibling bot noise', 'T-1'));
|
||||
chat.processMessage(workerSdkAdapter(), 'T-1', makeMessage('U-HUMAN', 'real user message', 'T-1'));
|
||||
|
||||
await vi.waitFor(() => expect(onInbound).toHaveBeenCalledTimes(1));
|
||||
// Give the dropped message's async dispatch time to (not) land.
|
||||
await new Promise((r) => setTimeout(r, 100));
|
||||
|
||||
expect(onInbound).toHaveBeenCalledTimes(1);
|
||||
const [channelId, threadId, inbound] = onInbound.mock.calls[0];
|
||||
expect(channelId).toBe('slack:T-1');
|
||||
expect(threadId).toBe('T-1');
|
||||
expect(JSON.parse(JSON.stringify(inbound.content)).senderId).toBe('U-HUMAN');
|
||||
});
|
||||
|
||||
it('guard covers the DM dispatch path too', async () => {
|
||||
fixture.env = { ...WORKER_ENV };
|
||||
await initAdapters();
|
||||
const chat = workerChat();
|
||||
|
||||
slackBotUserIds.add('U-SIBLING');
|
||||
|
||||
// 'D'-prefixed thread → adapter.isDM() true → Chat SDK dispatches via
|
||||
// onDirectMessage. The fork's old three-site bridge hack missed this
|
||||
// path; the setup-level wrapper must cover it.
|
||||
chat.processMessage(workerSdkAdapter(), 'D-1', makeMessage('U-SIBLING', 'sibling DM noise', 'D-1'));
|
||||
chat.processMessage(workerSdkAdapter(), 'D-1', makeMessage('U-HUMAN', 'human DM', 'D-1'));
|
||||
|
||||
await vi.waitFor(() => expect(onInbound).toHaveBeenCalledTimes(1));
|
||||
await new Promise((r) => setTimeout(r, 100));
|
||||
|
||||
expect(onInbound).toHaveBeenCalledTimes(1);
|
||||
const [channelId, , inbound] = onInbound.mock.calls[0];
|
||||
expect(channelId).toBe('slack:D-1');
|
||||
expect(JSON.parse(JSON.stringify(inbound.content)).senderId).toBe('U-HUMAN');
|
||||
expect(inbound.isMention).toBe(true); // DMs are addressed to the bot by definition
|
||||
});
|
||||
});
|
||||
|
||||
describe('slack supervisor/tester — instance keying', () => {
|
||||
it('all three adapters resolve by EXACT instance key with channelType slack', async () => {
|
||||
fixture.env = { ...ALL_ENV };
|
||||
await initAdapters();
|
||||
|
||||
const worker = getChannelAdapterExact('slack');
|
||||
const supervisor = getChannelAdapterExact('slack-supervisor');
|
||||
const tester = getChannelAdapterExact('slack-tester');
|
||||
|
||||
expect(worker).toBeDefined();
|
||||
expect(worker!.channelType).toBe('slack');
|
||||
expect(worker!.instance).toBeUndefined(); // default instance — keyed by channelType
|
||||
|
||||
expect(supervisor).toBeDefined();
|
||||
expect(supervisor!.channelType).toBe('slack');
|
||||
expect(supervisor!.instance).toBe('slack-supervisor');
|
||||
|
||||
expect(tester).toBeDefined();
|
||||
expect(tester!.channelType).toBe('slack');
|
||||
expect(tester!.instance).toBe('slack-tester');
|
||||
|
||||
// No cross-instance identity mixups: three distinct live adapters.
|
||||
expect(new Set([worker, supervisor, tester]).size).toBe(3);
|
||||
|
||||
// Each factory registered its own bot user id in the shared Set.
|
||||
await vi.waitFor(() => {
|
||||
expect(slackBotUserIds.has('U-WORKER-BOT')).toBe(true);
|
||||
expect(slackBotUserIds.has('U-SUPER-BOT')).toBe(true);
|
||||
expect(slackBotUserIds.has('U-TESTER-BOT')).toBe(true);
|
||||
});
|
||||
|
||||
// The tester keeps its resolveChannelName extra; the supervisor ships
|
||||
// without one (deliberate asymmetry, ported from the validated fork).
|
||||
expect(typeof tester!.resolveChannelName).toBe('function');
|
||||
expect(supervisor!.resolveChannelName).toBeUndefined();
|
||||
});
|
||||
|
||||
it('a named instance never hijacks the worker key (and vice versa)', async () => {
|
||||
fixture.env = { ...WORKER_ENV }; // supervisor/tester unconfigured
|
||||
await initAdapters();
|
||||
expect(getChannelAdapterExact('slack')).toBeDefined();
|
||||
expect(getChannelAdapterExact('slack-supervisor')).toBeUndefined();
|
||||
expect(getChannelAdapterExact('slack-tester')).toBeUndefined();
|
||||
});
|
||||
});
|
||||
+44
@@ -0,0 +1,44 @@
|
||||
/**
|
||||
* Second Slack adapter — the PR Factory Supervisor bot.
|
||||
*
|
||||
* Runs alongside the primary Slack worker bot (slack.ts) so the same
|
||||
* workspace has two distinct bot identities. The named `instance` drives the
|
||||
* registry key, the webhook route (/webhook/slack-supervisor), the Chat SDK
|
||||
* state namespace, and `messaging_groups.instance` — the router
|
||||
* disambiguates per-instance, channelType stays 'slack'.
|
||||
*
|
||||
* Self-registers on import. Inert if SLACK_SUPERVISOR_BOT_TOKEN is unset.
|
||||
*
|
||||
* Env (read from .env):
|
||||
* SLACK_SUPERVISOR_BOT_TOKEN — supervisor app's bot token
|
||||
* SLACK_SUPERVISOR_SIGNING_SECRET — supervisor app's signing secret
|
||||
*/
|
||||
import { createSlackAdapter } from '@chat-adapter/slack';
|
||||
|
||||
import { readEnvFile } from '../env.js';
|
||||
import { createChatSdkBridge } from './chat-sdk-bridge.js';
|
||||
import { registerChannelAdapter } from './channel-registry.js';
|
||||
import { slackBotUserIds, registerSlackBotUserId, withSiblingEchoGuard } from './slack-bot-ids.js';
|
||||
|
||||
/** Single owner of the supervisor instance name — imported by the
|
||||
* pr-factory module's bootstrap; keep the export stable. */
|
||||
export const SUPERVISOR_INSTANCE = 'slack-supervisor';
|
||||
|
||||
registerChannelAdapter(SUPERVISOR_INSTANCE, {
|
||||
factory: () => {
|
||||
const env = readEnvFile(['SLACK_SUPERVISOR_BOT_TOKEN', 'SLACK_SUPERVISOR_SIGNING_SECRET']);
|
||||
if (!env.SLACK_SUPERVISOR_BOT_TOKEN) return null;
|
||||
const adapter = createSlackAdapter({
|
||||
botToken: env.SLACK_SUPERVISOR_BOT_TOKEN,
|
||||
signingSecret: env.SLACK_SUPERVISOR_SIGNING_SECRET,
|
||||
});
|
||||
void registerSlackBotUserId(env.SLACK_SUPERVISOR_BOT_TOKEN, 'supervisor');
|
||||
const bridge = createChatSdkBridge({
|
||||
adapter,
|
||||
instance: SUPERVISOR_INSTANCE,
|
||||
concurrency: 'concurrent',
|
||||
supportsThreads: true,
|
||||
});
|
||||
return withSiblingEchoGuard(bridge, slackBotUserIds);
|
||||
},
|
||||
});
|
||||
+52
@@ -0,0 +1,52 @@
|
||||
/**
|
||||
* Slack channel adapter for the PR Tester bot.
|
||||
*
|
||||
* Runs alongside the primary Slack bot (slack.ts) with a distinct bot
|
||||
* identity. The named `instance` drives the registry key, the webhook route
|
||||
* (/webhook/slack-tester), the Chat SDK state namespace, and
|
||||
* `messaging_groups.instance` — the router disambiguates per-instance,
|
||||
* channelType stays 'slack'.
|
||||
*
|
||||
* Self-registers on import. Inert if SLACK_TESTER_BOT_TOKEN is unset.
|
||||
*
|
||||
* Env (read from .env):
|
||||
* SLACK_TESTER_BOT_TOKEN — tester app's bot token
|
||||
* SLACK_TESTER_SIGNING_SECRET — tester app's signing secret
|
||||
*/
|
||||
import { createSlackAdapter } from '@chat-adapter/slack';
|
||||
|
||||
import { readEnvFile } from '../env.js';
|
||||
import { createChatSdkBridge } from './chat-sdk-bridge.js';
|
||||
import { registerChannelAdapter } from './channel-registry.js';
|
||||
import { slackBotUserIds, registerSlackBotUserId, withSiblingEchoGuard } from './slack-bot-ids.js';
|
||||
|
||||
/** Single owner of the tester instance name — imported by the pr-factory
|
||||
* module; keep the export stable. */
|
||||
export const TESTER_INSTANCE = 'slack-tester';
|
||||
|
||||
registerChannelAdapter(TESTER_INSTANCE, {
|
||||
factory: () => {
|
||||
const env = readEnvFile(['SLACK_TESTER_BOT_TOKEN', 'SLACK_TESTER_SIGNING_SECRET']);
|
||||
if (!env.SLACK_TESTER_BOT_TOKEN) return null;
|
||||
const slackAdapter = createSlackAdapter({
|
||||
botToken: env.SLACK_TESTER_BOT_TOKEN,
|
||||
signingSecret: env.SLACK_TESTER_SIGNING_SECRET,
|
||||
});
|
||||
void registerSlackBotUserId(env.SLACK_TESTER_BOT_TOKEN, 'tester');
|
||||
const bridge = createChatSdkBridge({
|
||||
adapter: slackAdapter,
|
||||
instance: TESTER_INSTANCE,
|
||||
concurrency: 'concurrent',
|
||||
supportsThreads: true,
|
||||
});
|
||||
bridge.resolveChannelName = async (platformId: string) => {
|
||||
try {
|
||||
const info = await slackAdapter.fetchThread(platformId);
|
||||
return (info as { channelName?: string }).channelName ?? null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
return withSiblingEchoGuard(bridge, slackBotUserIds);
|
||||
},
|
||||
});
|
||||
+101
@@ -0,0 +1,101 @@
|
||||
/**
|
||||
* Fork-upgrade migration (owned by the pr-factory `slack-bots` component
|
||||
* skill): convert a bot_id-shaped multi-bot DB to the native channel-instance
|
||||
* substrate. Pure no-op on fresh installs.
|
||||
*
|
||||
* Ordering is load-bearing: this runs immediately BEFORE migration 016
|
||||
* (messaging-group-instance) in the barrel. On a DB produced by the old
|
||||
* fork's `messaging-groups-bot-id` migration, 016's naked recreate would
|
||||
* silently drop `bot_id` and then its `instance = channel_type` backfill
|
||||
* would collide on UNIQUE(channel_type, platform_id, instance) for
|
||||
* supervisor/tester rows sharing the worker's platform_id — a boot
|
||||
* crash-loop. This migration recreates messaging_groups to 016's EXACT
|
||||
* target schema first, mapping bot_id → instance:
|
||||
*
|
||||
* bot_id NULL → instance = channel_type (default instance)
|
||||
* bot_id 'pr-supervisor' → instance = 'slack-supervisor'
|
||||
* bot_id 'pr-tester' → instance = 'slack-tester'
|
||||
* any other bot_id → instance = bot_id (carried verbatim)
|
||||
*
|
||||
* 016's idempotency guard then sees the `instance` column and early-returns
|
||||
* (and the runner still records it as applied — the schema_version insert is
|
||||
* unconditional after up()).
|
||||
*
|
||||
* The same guarded arm rewrites the Chat SDK state namespace. The old fork's
|
||||
* bridge prefixed EVERY key with `botId ?? channelType` (worker keys were
|
||||
* 'slack:…'); the native substrate keeps the default instance UNPREFIXED and
|
||||
* prefixes named instances with the new instance names. So: strip the
|
||||
* LEADING 'slack:' prefix only (dedupe keys contain ':slack:' internally —
|
||||
* never blanket-replace), rename the 'pr-supervisor:'/'pr-tester:' prefixes
|
||||
* to the instance names, mirror all three on chat_sdk_subscriptions.thread_id,
|
||||
* and clear chat_sdk_locks (TTL-bound; at most one re-@mention per subscribed
|
||||
* thread after upgrade). Safe zero-touch: migrations run at boot before any
|
||||
* adapter starts, and this arm only fires on DBs that had `bot_id`, which
|
||||
* only the old fork bridge could have produced.
|
||||
*
|
||||
* disableForeignKeys: table recreate needs the DROP+RENAME window with FK
|
||||
* enforcement off (five child tables reference messaging_groups(id) — see
|
||||
* 016's header).
|
||||
*/
|
||||
import type Database from 'better-sqlite3';
|
||||
import type { Migration } from './index.js';
|
||||
|
||||
export const moduleSlackBotsBotIdToInstance: Migration = {
|
||||
version: 16, // ordering hint only — runner stores applied-order; sits right before 016
|
||||
name: 'module-slack-bots-bot-id-to-instance',
|
||||
disableForeignKeys: true,
|
||||
up: (db: Database.Database) => {
|
||||
const cols = db.prepare("PRAGMA table_info('messaging_groups')").all() as Array<{ name: string }>;
|
||||
if (!cols.some((c) => c.name === 'bot_id')) return; // fresh install / already converted
|
||||
|
||||
// Old fork shape carried denied_at (migration 012 predates the fork's
|
||||
// bot_id migration), but tolerate a fork DB that lacks it.
|
||||
const deniedAt = cols.some((c) => c.name === 'denied_at') ? 'denied_at' : 'NULL';
|
||||
|
||||
db.exec(`
|
||||
CREATE TABLE messaging_groups_new (
|
||||
id TEXT PRIMARY KEY,
|
||||
channel_type TEXT NOT NULL,
|
||||
platform_id TEXT NOT NULL,
|
||||
instance TEXT NOT NULL,
|
||||
name TEXT,
|
||||
is_group INTEGER DEFAULT 0,
|
||||
unknown_sender_policy TEXT NOT NULL DEFAULT 'strict',
|
||||
created_at TEXT NOT NULL,
|
||||
denied_at TEXT,
|
||||
UNIQUE(channel_type, platform_id, instance)
|
||||
);
|
||||
INSERT INTO messaging_groups_new
|
||||
(id, channel_type, platform_id, instance, name, is_group, unknown_sender_policy, created_at, denied_at)
|
||||
SELECT id, channel_type, platform_id,
|
||||
CASE
|
||||
WHEN bot_id IS NULL THEN channel_type
|
||||
WHEN bot_id = 'pr-supervisor' THEN 'slack-supervisor'
|
||||
WHEN bot_id = 'pr-tester' THEN 'slack-tester'
|
||||
ELSE bot_id
|
||||
END,
|
||||
name, is_group, unknown_sender_policy, created_at, ${deniedAt}
|
||||
FROM messaging_groups;
|
||||
DROP TABLE messaging_groups;
|
||||
ALTER TABLE messaging_groups_new RENAME TO messaging_groups;
|
||||
`);
|
||||
|
||||
// Chat SDK state rewrite — keyed tables first, then subscriptions, then
|
||||
// the TTL-bound locks. Leading-prefix matches only.
|
||||
for (const { table, col } of [
|
||||
{ table: 'chat_sdk_kv', col: 'key' },
|
||||
{ table: 'chat_sdk_lists', col: 'key' },
|
||||
{ table: 'chat_sdk_subscriptions', col: 'thread_id' },
|
||||
]) {
|
||||
db.exec(`
|
||||
UPDATE ${table} SET ${col} = substr(${col}, ${'slack:'.length + 1})
|
||||
WHERE ${col} LIKE 'slack:%';
|
||||
UPDATE ${table} SET ${col} = 'slack-supervisor:' || substr(${col}, ${'pr-supervisor:'.length + 1})
|
||||
WHERE ${col} LIKE 'pr-supervisor:%';
|
||||
UPDATE ${table} SET ${col} = 'slack-tester:' || substr(${col}, ${'pr-tester:'.length + 1})
|
||||
WHERE ${col} LIKE 'pr-tester:%';
|
||||
`);
|
||||
}
|
||||
db.exec('DELETE FROM chat_sdk_locks');
|
||||
},
|
||||
};
|
||||
+190
@@ -0,0 +1,190 @@
|
||||
/**
|
||||
* pr-factory slack-bots guard — the fork-upgrade migration
|
||||
* (module-slack-bots-bot-id-to-instance) and its barrel placement.
|
||||
*
|
||||
* The migration's whole job is ordering-sensitive: it must convert a
|
||||
* bot_id-shaped fork DB to migration 016's exact target schema BEFORE 016
|
||||
* runs, or 016's recreate silently drops bot_id and its
|
||||
* `instance = channel_type` backfill collides on
|
||||
* UNIQUE(channel_type, platform_id, instance) for supervisor/tester rows
|
||||
* sharing the worker's platform_id — a boot crash-loop.
|
||||
*
|
||||
* Driven through the REAL runMigrations with the REAL barrel against a
|
||||
* synthetic fork-shaped DB (built by running the real pre-instance
|
||||
* migrations, then replaying the old fork's bot_id recreate + namespace-
|
||||
* prefixed Chat SDK state + stale fork schema_version names). Goes red if:
|
||||
* - the migration is removed from the barrel, or moved AFTER 016
|
||||
* (016 throws on the UNIQUE collision);
|
||||
* - the bot_id → instance mapping drifts;
|
||||
* - the Chat SDK key rewrite over-strips (internal ':slack:' dedupe
|
||||
* segments), under-strips, or stops renaming the named prefixes;
|
||||
* - the no-op guard breaks on fresh DBs.
|
||||
*/
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
|
||||
import type Database from 'better-sqlite3';
|
||||
import { closeDb, initTestDb, runMigrations } from './index.js';
|
||||
import { migrations } from './migrations/index.js';
|
||||
import { moduleSlackBotsBotIdToInstance } from './migrations/module-slack-bots-bot-id-to-instance.js';
|
||||
|
||||
function tableCols(db: Database.Database, table: string): string[] {
|
||||
return (db.prepare(`PRAGMA table_info('${table}')`).all() as Array<{ name: string }>).map((c) => c.name);
|
||||
}
|
||||
|
||||
function appliedNames(db: Database.Database): string[] {
|
||||
return (db.prepare('SELECT name FROM schema_version ORDER BY version').all() as Array<{ name: string }>).map(
|
||||
(r) => r.name,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a synthetic fork-shaped DB:
|
||||
* 1. real migrations up to (not including) the slack-bots shim — the
|
||||
* pre-instance schema the fork forked from;
|
||||
* 2. the old fork's `messaging-groups-bot-id` recreate (bot_id column,
|
||||
* UNIQUE(channel_type, platform_id, bot_id), partial NULL index);
|
||||
* 3. worker/supervisor/tester rows sharing one platform_id;
|
||||
* 4. namespace-prefixed chat_sdk_* state (the old fork bridge prefixed
|
||||
* EVERY key with botId ?? channelType);
|
||||
* 5. stale fork migration names recorded in schema_version.
|
||||
*/
|
||||
function buildForkDb(db: Database.Database): void {
|
||||
const shimIdx = migrations.indexOf(moduleSlackBotsBotIdToInstance);
|
||||
expect(shimIdx).toBeGreaterThan(0); // barrel-presence leg: shim must be registered
|
||||
// Ordering leg: the shim must sit immediately before 016.
|
||||
expect(migrations[shimIdx + 1]?.name).toBe('messaging-group-instance');
|
||||
runMigrations(db, migrations.slice(0, shimIdx));
|
||||
|
||||
db.pragma('foreign_keys = OFF');
|
||||
db.exec(`
|
||||
CREATE TABLE messaging_groups_old (
|
||||
id TEXT PRIMARY KEY,
|
||||
channel_type TEXT NOT NULL,
|
||||
platform_id TEXT NOT NULL,
|
||||
bot_id TEXT,
|
||||
name TEXT,
|
||||
is_group INTEGER DEFAULT 0,
|
||||
unknown_sender_policy TEXT NOT NULL DEFAULT 'strict',
|
||||
created_at TEXT NOT NULL,
|
||||
denied_at TEXT,
|
||||
UNIQUE(channel_type, platform_id, bot_id)
|
||||
);
|
||||
INSERT INTO messaging_groups_old (id, channel_type, platform_id, bot_id, name, is_group, unknown_sender_policy, created_at, denied_at)
|
||||
SELECT id, channel_type, platform_id, NULL, name, is_group, unknown_sender_policy, created_at, denied_at
|
||||
FROM messaging_groups;
|
||||
DROP TABLE messaging_groups;
|
||||
ALTER TABLE messaging_groups_old RENAME TO messaging_groups;
|
||||
CREATE UNIQUE INDEX uq_messaging_groups_no_bot
|
||||
ON messaging_groups (channel_type, platform_id) WHERE bot_id IS NULL;
|
||||
`);
|
||||
db.pragma('foreign_keys = ON');
|
||||
|
||||
const ins = db.prepare(
|
||||
`INSERT INTO messaging_groups (id, channel_type, platform_id, bot_id, name, is_group, unknown_sender_policy, created_at)
|
||||
VALUES (?, 'slack', 'C1', ?, NULL, 1, 'public', '2025-01-01T00:00:00Z')`,
|
||||
);
|
||||
ins.run('mg-worker', null);
|
||||
ins.run('mg-super', 'pr-supervisor');
|
||||
ins.run('mg-tester', 'pr-tester');
|
||||
|
||||
const kv = db.prepare('INSERT INTO chat_sdk_kv (key, value, expires_at) VALUES (?, ?, NULL)');
|
||||
kv.run('slack:dedupe:slack:M1', '"1"'); // worker key with an INTERNAL :slack: segment
|
||||
kv.run('pr-supervisor:dedupe:slack:M2', '"1"');
|
||||
kv.run('pr-tester:kv1', '"v"');
|
||||
db.prepare('INSERT INTO chat_sdk_lists (key, idx, value, expires_at) VALUES (?, 0, \'"x"\', NULL)').run(
|
||||
'pr-supervisor:list1',
|
||||
);
|
||||
const sub = db.prepare('INSERT INTO chat_sdk_subscriptions (thread_id) VALUES (?)');
|
||||
sub.run('slack:T-1');
|
||||
sub.run('pr-supervisor:T-2');
|
||||
sub.run('pr-tester:T-3');
|
||||
db.prepare('INSERT INTO chat_sdk_locks (thread_id, token, expires_at) VALUES (?, ?, ?)').run(
|
||||
'slack:T-1',
|
||||
'tok',
|
||||
Date.now() + 60_000,
|
||||
);
|
||||
|
||||
// Stale fork migration names — recorded forever, harmlessly (the runner
|
||||
// dedupes by name; tip never reuses these).
|
||||
const ver = db.prepare(
|
||||
`INSERT INTO schema_version (version, name, applied)
|
||||
VALUES ((SELECT COALESCE(MAX(version), 0) + 1 FROM schema_version), ?, ?)`,
|
||||
);
|
||||
ver.run('messaging-groups-bot-id', '2025-01-01T00:00:00Z');
|
||||
ver.run('module-pr-factory-pr-threads', '2025-01-01T00:00:00Z');
|
||||
}
|
||||
|
||||
let db: Database.Database;
|
||||
|
||||
beforeEach(() => {
|
||||
db = initTestDb();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
closeDb();
|
||||
});
|
||||
|
||||
describe('module-slack-bots-bot-id-to-instance', () => {
|
||||
it('converts a fork-shaped DB: bot_id → instance, chat_sdk namespace rewrite, locks cleared', () => {
|
||||
buildForkDb(db);
|
||||
|
||||
runMigrations(db); // full barrel — shim, then 016 (early-return), then the rest
|
||||
|
||||
// messaging_groups is at 016's exact target schema.
|
||||
const cols = tableCols(db, 'messaging_groups');
|
||||
expect(cols).toContain('instance');
|
||||
expect(cols).not.toContain('bot_id');
|
||||
|
||||
const byId = (id: string) =>
|
||||
db.prepare('SELECT instance FROM messaging_groups WHERE id = ?').get(id) as { instance: string };
|
||||
expect(byId('mg-worker').instance).toBe('slack');
|
||||
expect(byId('mg-super').instance).toBe('slack-supervisor');
|
||||
expect(byId('mg-tester').instance).toBe('slack-tester');
|
||||
|
||||
// 016 must be RECORDED as applied even though its guard early-returned.
|
||||
const names = appliedNames(db);
|
||||
expect(names).toContain('module-slack-bots-bot-id-to-instance');
|
||||
expect(names).toContain('messaging-group-instance');
|
||||
|
||||
// Chat SDK keyspace: worker unprefixed (internal ':slack:' untouched),
|
||||
// named prefixes renamed to the instance names.
|
||||
const kvKeys = (db.prepare('SELECT key FROM chat_sdk_kv ORDER BY key').all() as Array<{ key: string }>).map(
|
||||
(r) => r.key,
|
||||
);
|
||||
expect(kvKeys).toEqual(['dedupe:slack:M1', 'slack-supervisor:dedupe:slack:M2', 'slack-tester:kv1']);
|
||||
const listKeys = (db.prepare('SELECT key FROM chat_sdk_lists').all() as Array<{ key: string }>).map((r) => r.key);
|
||||
expect(listKeys).toEqual(['slack-supervisor:list1']);
|
||||
const subs = (
|
||||
db.prepare('SELECT thread_id FROM chat_sdk_subscriptions ORDER BY thread_id').all() as Array<{
|
||||
thread_id: string;
|
||||
}>
|
||||
).map((r) => r.thread_id);
|
||||
expect(subs).toEqual(['T-1', 'slack-supervisor:T-2', 'slack-tester:T-3']);
|
||||
expect((db.prepare('SELECT count(*) AS n FROM chat_sdk_locks').get() as { n: number }).n).toBe(0);
|
||||
});
|
||||
|
||||
it('is idempotent: a second runMigrations pass is a clean no-op', () => {
|
||||
buildForkDb(db);
|
||||
runMigrations(db);
|
||||
const before = {
|
||||
names: appliedNames(db),
|
||||
rows: db.prepare('SELECT id, instance FROM messaging_groups ORDER BY id').all(),
|
||||
kv: db.prepare('SELECT key FROM chat_sdk_kv ORDER BY key').all(),
|
||||
};
|
||||
expect(() => runMigrations(db)).not.toThrow();
|
||||
expect(appliedNames(db)).toEqual(before.names);
|
||||
expect(db.prepare('SELECT id, instance FROM messaging_groups ORDER BY id').all()).toEqual(before.rows);
|
||||
expect(db.prepare('SELECT key FROM chat_sdk_kv ORDER BY key').all()).toEqual(before.kv);
|
||||
});
|
||||
|
||||
it('is a pure no-op on fresh DBs (016 does its normal recreate)', () => {
|
||||
runMigrations(db);
|
||||
const cols = tableCols(db, 'messaging_groups');
|
||||
expect(cols).toContain('instance');
|
||||
expect(cols).not.toContain('bot_id');
|
||||
expect(appliedNames(db)).toContain('module-slack-bots-bot-id-to-instance');
|
||||
// The default-instance backfill semantics are 016's own (guarded by
|
||||
// core's messaging-groups tests); here we only pin that the shim didn't
|
||||
// interfere with a fresh-path run.
|
||||
});
|
||||
});
|
||||
+209
@@ -0,0 +1,209 @@
|
||||
/**
|
||||
* pr-factory slack-bots guard — the router's sibling-mention suppression
|
||||
* reach-in: the one-line `if (hasSiblingMention(mg, text)) return false;`
|
||||
* in evaluateEngage's 'mention-sticky' case (src/router.ts), immediately
|
||||
* after the DM short-circuit and before the sticky-session lookup.
|
||||
*
|
||||
* Driven through the REAL routeInbound against a real migrated central DB
|
||||
* and real on-disk session DBs (container spawn mocked away). Goes red if
|
||||
* the router call is deleted, or if the helper's sibling query
|
||||
* (src/channels/sibling-mention.ts — named-instance rows on the same
|
||||
* channel, engage_mode='mention') drifts off the messaging_groups instance
|
||||
* schema.
|
||||
*
|
||||
* Scenario (the PR Factory channel): the supervisor holds a mention-sticky
|
||||
* wiring on a shared Slack channel; the tester sits on the same channel as a
|
||||
* named-instance mention-mode sibling. A follow-up in an already-engaged
|
||||
* thread that starts with '@' (addressed to the sibling) must NOT fire the
|
||||
* sticky wiring; a plain follow-up must. With no sibling, '@'-prefixed
|
||||
* follow-ups pass through unchanged.
|
||||
*/
|
||||
import Database from 'better-sqlite3';
|
||||
import fs from 'fs';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
vi.mock('./container-runner.js', () => ({
|
||||
wakeContainer: vi.fn().mockResolvedValue(true),
|
||||
isContainerRunning: vi.fn().mockReturnValue(false),
|
||||
getActiveContainerCount: vi.fn().mockReturnValue(0),
|
||||
killContainer: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock('./config.js', async () => {
|
||||
const actual = await vi.importActual('./config.js');
|
||||
return { ...actual, DATA_DIR: '/tmp/nanoclaw-test-sibling-mention' };
|
||||
});
|
||||
|
||||
const TEST_DIR = '/tmp/nanoclaw-test-sibling-mention';
|
||||
|
||||
import {
|
||||
closeDb,
|
||||
createAgentGroup,
|
||||
createMessagingGroup,
|
||||
createMessagingGroupAgent,
|
||||
initTestDb,
|
||||
runMigrations,
|
||||
} from './db/index.js';
|
||||
import { stopTypingRefresh } from './modules/typing/index.js';
|
||||
import { routeInbound } from './router.js';
|
||||
import { inboundDbPath, resolveSession } from './session-manager.js';
|
||||
import type { InboundEvent } from './channels/adapter.js';
|
||||
|
||||
function now(): string {
|
||||
return new Date().toISOString();
|
||||
}
|
||||
|
||||
function event(partial: Partial<InboundEvent> & { text: string; isMention?: boolean }): InboundEvent {
|
||||
return {
|
||||
channelType: 'slack',
|
||||
platformId: 'CS',
|
||||
threadId: null,
|
||||
instance: partial.instance,
|
||||
message: {
|
||||
id: `m-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
|
||||
kind: 'chat',
|
||||
content: JSON.stringify({ text: partial.text, sender: 'User', senderId: 'U1' }),
|
||||
timestamp: now(),
|
||||
isMention: partial.isMention ?? false,
|
||||
isGroup: true,
|
||||
},
|
||||
...Object.fromEntries(Object.entries(partial).filter(([k]) => !['text', 'isMention', 'message'].includes(k))),
|
||||
} as InboundEvent;
|
||||
}
|
||||
|
||||
function countInbound(agentGroupId: string, sessionId: string): number {
|
||||
const db = new Database(inboundDbPath(agentGroupId, sessionId));
|
||||
const n = (db.prepare('SELECT count(*) AS n FROM messages_in').get() as { n: number }).n;
|
||||
db.close();
|
||||
return n;
|
||||
}
|
||||
|
||||
function wire(opts: { mgaId: string; mgId: string; agId: string; engageMode: string }): void {
|
||||
createMessagingGroupAgent({
|
||||
id: opts.mgaId,
|
||||
messaging_group_id: opts.mgId,
|
||||
agent_group_id: opts.agId,
|
||||
engage_mode: opts.engageMode as never,
|
||||
engage_pattern: opts.engageMode === 'pattern' ? '.' : null,
|
||||
sender_scope: 'all',
|
||||
ignored_message_policy: 'drop',
|
||||
session_mode: 'per-thread',
|
||||
priority: 0,
|
||||
created_at: now(),
|
||||
});
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
if (fs.existsSync(TEST_DIR)) fs.rmSync(TEST_DIR, { recursive: true });
|
||||
fs.mkdirSync(TEST_DIR, { recursive: true });
|
||||
const db = initTestDb();
|
||||
runMigrations(db);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
closeDb();
|
||||
if (fs.existsSync(TEST_DIR)) fs.rmSync(TEST_DIR, { recursive: true });
|
||||
});
|
||||
|
||||
describe('router sibling-mention suppression (instance-keyed)', () => {
|
||||
it("suppresses mention-sticky follow-ups starting '@' when a sibling mention-mode instance shares the channel", async () => {
|
||||
createAgentGroup({ id: 'ag-super', name: 'Super', folder: 'super', agent_provider: null, created_at: now() });
|
||||
createAgentGroup({ id: 'ag-test', name: 'Tester', folder: 'tester', agent_provider: null, created_at: now() });
|
||||
createMessagingGroup({
|
||||
id: 'mg-super',
|
||||
channel_type: 'slack',
|
||||
platform_id: 'CS',
|
||||
instance: 'slack-supervisor',
|
||||
name: null,
|
||||
is_group: 1,
|
||||
unknown_sender_policy: 'public',
|
||||
created_at: now(),
|
||||
});
|
||||
createMessagingGroup({
|
||||
id: 'mg-test',
|
||||
channel_type: 'slack',
|
||||
platform_id: 'CS',
|
||||
instance: 'slack-tester',
|
||||
name: null,
|
||||
is_group: 1,
|
||||
unknown_sender_policy: 'public',
|
||||
created_at: now(),
|
||||
});
|
||||
wire({ mgaId: 'mga-super', mgId: 'mg-super', agId: 'ag-super', engageMode: 'mention-sticky' });
|
||||
wire({ mgaId: 'mga-test', mgId: 'mg-test', agId: 'ag-test', engageMode: 'mention' });
|
||||
|
||||
// Pre-existing sticky session for the supervisor on thread T1 —
|
||||
// follow-ups in this thread normally engage without a mention.
|
||||
const { session } = resolveSession('ag-super', 'mg-super', 'T1', 'per-thread');
|
||||
|
||||
// '@'-addressed to the sibling → suppressed, nothing written.
|
||||
await routeInbound(event({ text: '@pr-tester please test this', threadId: 'T1', instance: 'slack-supervisor' }));
|
||||
expect(countInbound('ag-super', session.id)).toBe(0);
|
||||
|
||||
// Plain follow-up → sticky engage, message lands.
|
||||
await routeInbound(event({ text: 'carry on', threadId: 'T1', instance: 'slack-supervisor' }));
|
||||
expect(countInbound('ag-super', session.id)).toBe(1);
|
||||
|
||||
stopTypingRefresh(session.id);
|
||||
});
|
||||
|
||||
it("lets '@'-prefixed sticky follow-ups through when NO sibling mention-mode instance exists", async () => {
|
||||
createAgentGroup({ id: 'ag-solo', name: 'Solo', folder: 'solo', agent_provider: null, created_at: now() });
|
||||
createMessagingGroup({
|
||||
id: 'mg-solo',
|
||||
channel_type: 'slack',
|
||||
platform_id: 'CS2',
|
||||
instance: 'slack-supervisor',
|
||||
name: null,
|
||||
is_group: 1,
|
||||
unknown_sender_policy: 'public',
|
||||
created_at: now(),
|
||||
});
|
||||
wire({ mgaId: 'mga-solo', mgId: 'mg-solo', agId: 'ag-solo', engageMode: 'mention-sticky' });
|
||||
const { session } = resolveSession('ag-solo', 'mg-solo', 'T1', 'per-thread');
|
||||
|
||||
await routeInbound(
|
||||
event({ text: '@someone unrelated', threadId: 'T1', platformId: 'CS2', instance: 'slack-supervisor' }),
|
||||
);
|
||||
expect(countInbound('ag-solo', session.id)).toBe(1);
|
||||
|
||||
stopTypingRefresh(session.id);
|
||||
});
|
||||
|
||||
it('the default instance never counts as a sibling (worker row on the same channel)', async () => {
|
||||
createAgentGroup({ id: 'ag-super', name: 'Super', folder: 'super', agent_provider: null, created_at: now() });
|
||||
createAgentGroup({ id: 'ag-work', name: 'Worker', folder: 'worker', agent_provider: null, created_at: now() });
|
||||
createMessagingGroup({
|
||||
id: 'mg-super',
|
||||
channel_type: 'slack',
|
||||
platform_id: 'CS3',
|
||||
instance: 'slack-supervisor',
|
||||
name: null,
|
||||
is_group: 1,
|
||||
unknown_sender_policy: 'public',
|
||||
created_at: now(),
|
||||
});
|
||||
// Default-instance worker row, wired mention-mode: must NOT suppress —
|
||||
// only NAMED sibling instances are mention-addressed bots.
|
||||
createMessagingGroup({
|
||||
id: 'mg-work',
|
||||
channel_type: 'slack',
|
||||
platform_id: 'CS3',
|
||||
name: null,
|
||||
is_group: 1,
|
||||
unknown_sender_policy: 'public',
|
||||
created_at: now(),
|
||||
});
|
||||
wire({ mgaId: 'mga-super', mgId: 'mg-super', agId: 'ag-super', engageMode: 'mention-sticky' });
|
||||
wire({ mgaId: 'mga-work', mgId: 'mg-work', agId: 'ag-work', engageMode: 'mention' });
|
||||
|
||||
const { session } = resolveSession('ag-super', 'mg-super', 'T1', 'per-thread');
|
||||
|
||||
await routeInbound(
|
||||
event({ text: '@someone hello', threadId: 'T1', platformId: 'CS3', instance: 'slack-supervisor' }),
|
||||
);
|
||||
expect(countInbound('ag-super', session.id)).toBe(1);
|
||||
|
||||
stopTypingRefresh(session.id);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,102 @@
|
||||
# Remove slack-canvas
|
||||
|
||||
Reverses every change the apply made. After removal, test plans/results and worker reviews fall back to plain text + `.md` file uploads (core's built-in fallback paths).
|
||||
|
||||
## 1. Delete the copied files
|
||||
|
||||
```bash
|
||||
rm -f src/modules/pr-factory/slack-canvas.ts
|
||||
rm -f src/modules/pr-factory/file-transform.test.ts
|
||||
```
|
||||
|
||||
## 2. Delete the barrel line
|
||||
|
||||
In `src/modules/index.ts`, delete the line `import './pr-factory/slack-canvas.js';`.
|
||||
|
||||
## 3. Revert the delivery.ts reach-in
|
||||
|
||||
Reverse the three apply edits in `src/delivery.ts` byte-for-byte.
|
||||
|
||||
**3a.** Delete the hook infrastructure that was appended immediately after the `getDeliveryAction` function — this entire block (doc comment included):
|
||||
|
||||
```typescript
|
||||
/**
|
||||
* File transform hook — lets a module intercept outbound file attachments
|
||||
* before delivery (e.g. converting .md files to Slack Canvases).
|
||||
*
|
||||
* The transform receives the session, the parsed message content, and the
|
||||
* resolved outbox files. It returns { files, content } — either unchanged or
|
||||
* with files removed and content modified (e.g. a canvas link appended to
|
||||
* the text).
|
||||
*
|
||||
* Single-slot: one transform at a time; a later registrant replaces the
|
||||
* earlier one. An ordered transform chain is the natural upgrade if a second
|
||||
* consumer ever appears.
|
||||
*/
|
||||
export type FileTransformFn = (
|
||||
session: Session,
|
||||
content: Record<string, unknown>,
|
||||
files: OutboundFile[],
|
||||
) => Promise<{ files?: OutboundFile[]; content: Record<string, unknown> }>;
|
||||
|
||||
let fileTransform: FileTransformFn | null = null;
|
||||
|
||||
export function registerFileTransform(transform: FileTransformFn): void {
|
||||
fileTransform = transform;
|
||||
}
|
||||
```
|
||||
|
||||
**3b.** In `deliverMessage`, delete the transform application block — exactly these lines (the blank line above `const platformMsgId` is the context that follows it):
|
||||
|
||||
```typescript
|
||||
// Apply the file transform hook (e.g. converting .md files to Slack
|
||||
// Canvases). Best-effort: a throwing transform falls back to delivering
|
||||
// the original message untouched.
|
||||
let deliveryContent = msg.content;
|
||||
if (fileTransform && files && files.length > 0) {
|
||||
try {
|
||||
const result = await fileTransform(session, content, files);
|
||||
files = result.files;
|
||||
deliveryContent = JSON.stringify(result.content);
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- transform is best-effort by contract; the untransformed message still delivers
|
||||
} catch (err) {
|
||||
log.warn('File transform failed, delivering original', { err, sessionId: session.id });
|
||||
}
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
**3c.** Change the now-mutable `files` declaration back to `const`, and pass `msg.content` again in the `deliver` call. The block becomes exactly:
|
||||
|
||||
```typescript
|
||||
const files =
|
||||
Array.isArray(content.files) && content.files.length > 0
|
||||
? readOutboxFiles(session.agent_group_id, session.id, msg.id, content.files as string[])
|
||||
: undefined;
|
||||
|
||||
const platformMsgId = await deliveryAdapter.deliver(
|
||||
msg.channel_type,
|
||||
msg.platform_id,
|
||||
msg.thread_id,
|
||||
msg.kind,
|
||||
msg.content,
|
||||
files,
|
||||
deliverInstance,
|
||||
);
|
||||
```
|
||||
|
||||
## 4. Slack app scopes
|
||||
|
||||
Optionally remove `canvases:write` and `files:read` from the worker app's OAuth scopes (and reinstall the app). Existing canvases are workspace content — they persist independently of this component.
|
||||
|
||||
## 5. Restart and validate
|
||||
|
||||
> **Skip this step during full-recipe removal.** slack-canvas comes out first in the reverse order, but the rest of the stack is still present and the recipe-level validation is the binding one — run this block only when removing `slack-canvas` in isolation.
|
||||
|
||||
```bash
|
||||
launchctl kickstart -k gui/$(id -u)/com.nanoclaw # macOS
|
||||
# systemctl --user restart nanoclaw # Linux
|
||||
pnpm run build && pnpm test
|
||||
```
|
||||
|
||||
All green, with the file-transform test gone from the run.
|
||||
@@ -0,0 +1,162 @@
|
||||
---
|
||||
name: slack-canvas
|
||||
description: PR Factory component — render PR Factory markdown as Slack Canvases. Registers a canvas provider on pr-factory-core's canvas seam (test plans and results become shared canvases instead of .md uploads) and adds a delivery file-transform hook to core so the worker's .md outbox attachments deliver as inline canvas links.
|
||||
---
|
||||
|
||||
# slack-canvas (PR Factory component)
|
||||
|
||||
Inline documents instead of downloadable files. `pr-factory-core` calls `createCanvas` for test plans and test results and falls back to plain text + `.md` upload when no provider is registered; the worker's review writeups otherwise deliver as raw `.md` attachments. With this component:
|
||||
|
||||
1. **Canvas provider** — `src/modules/pr-factory/slack-canvas.ts` registers the Slack Canvas API client on core's seam (`canvases.create` → `canvases.access.set` → `files.info` permalink, numbered lists sanitized to bullets). Test plans and results render as canvases shared into the channel.
|
||||
2. **Delivery file transform** — the same file registers a transform on `registerFileTransform` (the delivery hook this component's apply adds to core): `.md` outbox attachments from the PR Factory worker's Slack sessions convert to canvas links appended to the message text (`review-pr-42.md` → "Review — PR #42" + `[View review](permalink)`); non-`.md` files, non-worker sessions, and provider failures pass through / fall back to the original upload.
|
||||
|
||||
Skill-owned file: `src/modules/pr-factory/slack-canvas.ts`. Integration surface: one appended barrel line in `src/modules/index.ts` and **one core reach-in** — the file-transform hook in `src/delivery.ts` (three edits, exact shapes below). No new dependencies (built-in `fetch`; the worker bot token is reused via core's `getBotToken`).
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Probe each before applying; stop on a failed probe and do what it names first.
|
||||
|
||||
1. **The `pr-factory-core` component is applied** (this component registers on its canvas seam and scopes by its worker group):
|
||||
|
||||
```bash
|
||||
grep -q 'export function registerCanvasProvider' src/modules/pr-factory/canvas.ts && echo OK
|
||||
```
|
||||
|
||||
If it fails: apply the `pr-factory-core` component first.
|
||||
|
||||
2. **The worker Slack app can use the Canvas API**: in the `/add-slack` app's OAuth scopes, add `canvases:write` and `files:read` (then reinstall the app to the workspace). Canvases also require a paid Slack plan — on free plans `canvases.create` fails and everything falls back to `.md` uploads (the component stays harmless).
|
||||
|
||||
Each step below is idempotent: if the file already contains the patched form, leave it as is and continue.
|
||||
|
||||
## Apply
|
||||
|
||||
All copy sources are under this component's folder; run every command from the repo root:
|
||||
|
||||
```bash
|
||||
SKILL=.claude/skills/recipes/pr-factory/skills/slack-canvas
|
||||
```
|
||||
|
||||
### 1. Copy the module
|
||||
|
||||
```bash
|
||||
cp $SKILL/files/src/modules/pr-factory/slack-canvas.ts src/modules/pr-factory/slack-canvas.ts
|
||||
```
|
||||
|
||||
### 2. Add the file-transform hook to `src/delivery.ts` (the core reach-in)
|
||||
|
||||
**2a.** Append the hook infrastructure immediately after the `getDeliveryAction` function:
|
||||
|
||||
```typescript
|
||||
/**
|
||||
* File transform hook — lets a module intercept outbound file attachments
|
||||
* before delivery (e.g. converting .md files to Slack Canvases).
|
||||
*
|
||||
* The transform receives the session, the parsed message content, and the
|
||||
* resolved outbox files. It returns { files, content } — either unchanged or
|
||||
* with files removed and content modified (e.g. a canvas link appended to
|
||||
* the text).
|
||||
*
|
||||
* Single-slot: one transform at a time; a later registrant replaces the
|
||||
* earlier one. An ordered transform chain is the natural upgrade if a second
|
||||
* consumer ever appears.
|
||||
*/
|
||||
export type FileTransformFn = (
|
||||
session: Session,
|
||||
content: Record<string, unknown>,
|
||||
files: OutboundFile[],
|
||||
) => Promise<{ files?: OutboundFile[]; content: Record<string, unknown> }>;
|
||||
|
||||
let fileTransform: FileTransformFn | null = null;
|
||||
|
||||
export function registerFileTransform(transform: FileTransformFn): void {
|
||||
fileTransform = transform;
|
||||
}
|
||||
```
|
||||
|
||||
(`Session` and `OutboundFile` are already imported at the top of delivery.ts.)
|
||||
|
||||
**2b.** In `deliverMessage`, the outbox-files block reads:
|
||||
|
||||
```typescript
|
||||
const files =
|
||||
Array.isArray(content.files) && content.files.length > 0
|
||||
? readOutboxFiles(session.agent_group_id, session.id, msg.id, content.files as string[])
|
||||
: undefined;
|
||||
|
||||
const platformMsgId = await deliveryAdapter.deliver(
|
||||
msg.channel_type,
|
||||
msg.platform_id,
|
||||
msg.thread_id,
|
||||
msg.kind,
|
||||
msg.content,
|
||||
files,
|
||||
deliverInstance,
|
||||
);
|
||||
```
|
||||
|
||||
Change `const files` to `let files`, insert the transform application between the two statements, and pass `deliveryContent` instead of `msg.content` — keeping `deliverInstance` as the 7th argument:
|
||||
|
||||
```typescript
|
||||
let files =
|
||||
Array.isArray(content.files) && content.files.length > 0
|
||||
? readOutboxFiles(session.agent_group_id, session.id, msg.id, content.files as string[])
|
||||
: undefined;
|
||||
|
||||
// Apply the file transform hook (e.g. converting .md files to Slack
|
||||
// Canvases). Best-effort: a throwing transform falls back to delivering
|
||||
// the original message untouched.
|
||||
let deliveryContent = msg.content;
|
||||
if (fileTransform && files && files.length > 0) {
|
||||
try {
|
||||
const result = await fileTransform(session, content, files);
|
||||
files = result.files;
|
||||
deliveryContent = JSON.stringify(result.content);
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- transform is best-effort by contract; the untransformed message still delivers
|
||||
} catch (err) {
|
||||
log.warn('File transform failed, delivering original', { err, sessionId: session.id });
|
||||
}
|
||||
}
|
||||
|
||||
const platformMsgId = await deliveryAdapter.deliver(
|
||||
msg.channel_type,
|
||||
msg.platform_id,
|
||||
msg.thread_id,
|
||||
msg.kind,
|
||||
deliveryContent,
|
||||
files,
|
||||
deliverInstance,
|
||||
);
|
||||
```
|
||||
|
||||
If the surrounding code has drifted cosmetically, apply the same three semantic edits (mutable `files`, the guarded transform block, `deliveryContent` in the deliver call) and leave every other line untouched.
|
||||
|
||||
### 3. Append the modules-barrel line (`src/modules/index.ts`)
|
||||
|
||||
After the `import './pr-factory/index.js';` line, append (skip if already present):
|
||||
|
||||
```typescript
|
||||
import './pr-factory/slack-canvas.js';
|
||||
```
|
||||
|
||||
### 4. Copy the guard test
|
||||
|
||||
```bash
|
||||
cp $SKILL/files/src/modules/pr-factory/file-transform.test.ts src/modules/pr-factory/file-transform.test.ts
|
||||
```
|
||||
|
||||
| Test | Guards |
|
||||
|------|--------|
|
||||
| `src/modules/pr-factory/file-transform.test.ts` | The delivery reach-in behaviorally (REAL modules barrel + real bootstrap + real `deliverSessionMessages` over on-disk session DBs; Slack's canvas APIs are the only fake): a worker-session `.md` outbox file becomes a canvas link with the file stripped and the default-instance 7th arg intact; non-worker sessions pass through untouched (named-instance fixture); a throwing transform falls back to the original message; plus the provider registration through core's `createCanvas` seam |
|
||||
|
||||
## Known smell (declared)
|
||||
|
||||
**Single-slot file transform.** `registerFileTransform` holds ONE transform; a second registrant silently clobbers the canvas conversion. The guard test's worker-session leg doubles as the composed-stack assertion: any other module registering a transform turns it red. If a second consumer ever appears, the slot must become an ordered chain in core first.
|
||||
|
||||
## Validate
|
||||
|
||||
```bash
|
||||
pnpm run build
|
||||
pnpm test
|
||||
```
|
||||
|
||||
All green. Any failure means a step didn't apply cleanly.
|
||||
@@ -0,0 +1,5 @@
|
||||
# slack-canvas — files this component owns outright. The modules-barrel line
|
||||
# (src/modules/index.ts) and the delivery.ts file-transform hook are applied
|
||||
# as edits per SKILL.md, not as file copies.
|
||||
src/modules/pr-factory/slack-canvas.ts
|
||||
src/modules/pr-factory/file-transform.test.ts
|
||||
+246
@@ -0,0 +1,246 @@
|
||||
/**
|
||||
* slack-canvas component guard — the delivery fileTransform reach-in
|
||||
* (FileTransformFn type, registerFileTransform, the application block in
|
||||
* deliverMessage with its try/catch fallback), the component's canvas
|
||||
* provider registration on core's canvas seam, and the .md→canvas transform's
|
||||
* worker-session scoping.
|
||||
*
|
||||
* Imports the REAL modules barrel with the env trio primed, fires
|
||||
* onDeliveryAdapterReady via setDeliveryAdapter (which runs the real
|
||||
* pr-factory bootstrap), then drives real deliverSessionMessages over on-disk
|
||||
* session DBs with a real outbox file. Slack's canvas APIs are the only fake.
|
||||
*
|
||||
* SINGLE-SLOT GUARD: registerFileTransform holds one transform — a second
|
||||
* registrant silently clobbers the first. The worker-session leg here doubles
|
||||
* as the composed-stack assertion: if any other module in the barrel
|
||||
* registers a transform after this component's, the canvas conversion stops
|
||||
* happening and this test goes red.
|
||||
*/
|
||||
import Database from 'better-sqlite3';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { afterAll, beforeAll, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
vi.mock('../../container-runner.js', () => ({
|
||||
wakeContainer: vi.fn().mockResolvedValue(undefined),
|
||||
isContainerRunning: vi.fn().mockReturnValue(false),
|
||||
killContainer: vi.fn(),
|
||||
buildAgentGroupImage: vi.fn().mockResolvedValue(undefined),
|
||||
}));
|
||||
|
||||
vi.mock('../../config.js', async () => {
|
||||
const actual = await vi.importActual<typeof import('../../config.js')>('../../config.js');
|
||||
return {
|
||||
...actual,
|
||||
DATA_DIR: '/tmp/nanoclaw-test-prf-transform/data',
|
||||
GROUPS_DIR: '/tmp/nanoclaw-test-prf-transform/groups',
|
||||
};
|
||||
});
|
||||
|
||||
const TEST_DIR = '/tmp/nanoclaw-test-prf-transform';
|
||||
const WORKER_CHANNEL = 'C0WORK';
|
||||
const PORT = 21000 + Math.floor(Math.random() * 20000);
|
||||
|
||||
type Delivered = {
|
||||
kind: string;
|
||||
content: Record<string, unknown>;
|
||||
files?: Array<{ filename: string }>;
|
||||
instance?: string;
|
||||
};
|
||||
const delivered: Delivered[] = [];
|
||||
|
||||
let core: {
|
||||
deliverSessionMessages: typeof import('../../delivery.js').deliverSessionMessages;
|
||||
registerFileTransform: typeof import('../../delivery.js').registerFileTransform;
|
||||
createCanvas: typeof import('./canvas.js').createCanvas;
|
||||
resolveSession: typeof import('../../session-manager.js').resolveSession;
|
||||
sessionDir: typeof import('../../session-manager.js').sessionDir;
|
||||
outboundDbPath: typeof import('../../session-manager.js').outboundDbPath;
|
||||
closeDb: () => void;
|
||||
stopWebhookServer: () => Promise<void>;
|
||||
workerAgentGroupId: string;
|
||||
workerMessagingGroupId: string;
|
||||
};
|
||||
|
||||
function now(): string {
|
||||
return new Date().toISOString();
|
||||
}
|
||||
|
||||
function insertOutboundWithFile(agentGroupId: string, sessionId: string, msgId: string, filename: string): void {
|
||||
const outboxDir = path.join(core.sessionDir(agentGroupId, sessionId), 'outbox', msgId);
|
||||
fs.mkdirSync(outboxDir, { recursive: true });
|
||||
fs.writeFileSync(path.join(outboxDir, filename), '# Review\n\nLooks good.\n');
|
||||
|
||||
const db = new Database(core.outboundDbPath(agentGroupId, sessionId));
|
||||
db.prepare(
|
||||
`INSERT INTO messages_out (id, timestamp, kind, platform_id, channel_type, thread_id, content)
|
||||
VALUES (?, datetime('now'), 'chat', ?, 'slack', ?, ?)`,
|
||||
).run(
|
||||
msgId,
|
||||
`slack:${WORKER_CHANNEL}`,
|
||||
`slack:${WORKER_CHANNEL}:123.456`,
|
||||
JSON.stringify({ text: 'Review done', files: [filename] }),
|
||||
);
|
||||
db.close();
|
||||
}
|
||||
|
||||
beforeAll(async () => {
|
||||
fs.rmSync(TEST_DIR, { recursive: true, force: true });
|
||||
fs.mkdirSync(TEST_DIR, { recursive: true });
|
||||
|
||||
// Slack canvas API fake: create → access.set → files.info permalink.
|
||||
vi.stubGlobal(
|
||||
'fetch',
|
||||
vi.fn(async (url: unknown) => {
|
||||
const u = String(url);
|
||||
if (u.includes('canvases.create')) {
|
||||
return new Response(JSON.stringify({ ok: true, canvas_id: 'F0CANVAS' }), { status: 200 });
|
||||
}
|
||||
if (u.includes('canvases.access.set')) {
|
||||
return new Response(JSON.stringify({ ok: true }), { status: 200 });
|
||||
}
|
||||
if (u.includes('files.info')) {
|
||||
return new Response(JSON.stringify({ ok: true, file: { permalink: 'https://acme.slack.com/docs/F0CANVAS' } }), {
|
||||
status: 200,
|
||||
});
|
||||
}
|
||||
return new Response(JSON.stringify({ ok: true }), { status: 200 });
|
||||
}),
|
||||
);
|
||||
|
||||
// Prime env BEFORE the barrel import — registration is import-time.
|
||||
process.env.GITHUB_WEBHOOK_SECRET = 'test-secret';
|
||||
process.env.PR_FACTORY_SLACK_CHANNEL_ID = WORKER_CHANNEL;
|
||||
process.env.SLACK_BOT_TOKEN = 'xoxb-test';
|
||||
process.env.WEBHOOK_PORT = String(PORT);
|
||||
|
||||
const dbMod = await import('../../db/index.js');
|
||||
const db = dbMod.initTestDb();
|
||||
dbMod.runMigrations(db);
|
||||
|
||||
await import('../index.js'); // the real modules barrel
|
||||
const delivery = await import('../../delivery.js');
|
||||
const canvasSeam = await import('./canvas.js');
|
||||
const sessionManager = await import('../../session-manager.js');
|
||||
const webhookServer = await import('../../webhook-server.js');
|
||||
|
||||
// Setting the adapter fires onDeliveryAdapterReady → real bootstrap runs.
|
||||
delivery.setDeliveryAdapter({
|
||||
async deliver(_channelType, _platformId, _threadId, kind, content, files, instance) {
|
||||
delivered.push({ kind, content: JSON.parse(content) as Record<string, unknown>, files, instance });
|
||||
return 'plat-msg-1';
|
||||
},
|
||||
});
|
||||
await new Promise((r) => setTimeout(r, 50)); // adapter-ready callbacks are async
|
||||
|
||||
const { getAgentGroupByFolder } = await import('../../db/agent-groups.js');
|
||||
const { getMessagingGroupByPlatform } = await import('../../db/messaging-groups.js');
|
||||
const bootstrapMod = await import('./bootstrap.js');
|
||||
const worker = getAgentGroupByFolder(bootstrapMod.WORKER_FOLDER);
|
||||
// Default instance: the worker bot's row (instance = channel_type).
|
||||
const workerMg = getMessagingGroupByPlatform('slack', `slack:${WORKER_CHANNEL}`, 'slack');
|
||||
expect(worker).toBeDefined();
|
||||
expect(workerMg).toBeDefined();
|
||||
|
||||
core = {
|
||||
deliverSessionMessages: delivery.deliverSessionMessages,
|
||||
registerFileTransform: delivery.registerFileTransform,
|
||||
createCanvas: canvasSeam.createCanvas,
|
||||
resolveSession: sessionManager.resolveSession,
|
||||
sessionDir: sessionManager.sessionDir,
|
||||
outboundDbPath: sessionManager.outboundDbPath,
|
||||
closeDb: dbMod.closeDb,
|
||||
stopWebhookServer: webhookServer.stopWebhookServer,
|
||||
workerAgentGroupId: worker!.id,
|
||||
workerMessagingGroupId: workerMg!.id,
|
||||
};
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await core.stopWebhookServer();
|
||||
vi.unstubAllGlobals();
|
||||
core.closeDb();
|
||||
for (const key of ['GITHUB_WEBHOOK_SECRET', 'PR_FACTORY_SLACK_CHANNEL_ID', 'SLACK_BOT_TOKEN', 'WEBHOOK_PORT']) {
|
||||
delete process.env[key];
|
||||
}
|
||||
fs.rmSync(TEST_DIR, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
describe('slack-canvas provider on the core canvas seam', () => {
|
||||
it('createCanvas resolves through the registered provider (create → share → permalink)', async () => {
|
||||
const result = await core.createCanvas('Test Plan — PR #1', '# Plan\n1. step', WORKER_CHANNEL);
|
||||
expect(result).toEqual({ canvasId: 'F0CANVAS', permalink: 'https://acme.slack.com/docs/F0CANVAS' });
|
||||
});
|
||||
});
|
||||
|
||||
describe('pr-factory file transform through real delivery', () => {
|
||||
it('converts a worker-session .md outbox file into a canvas link and strips the file', async () => {
|
||||
delivered.length = 0;
|
||||
const { session } = core.resolveSession(
|
||||
core.workerAgentGroupId,
|
||||
core.workerMessagingGroupId,
|
||||
`slack:${WORKER_CHANNEL}:123.456`,
|
||||
'per-thread',
|
||||
);
|
||||
insertOutboundWithFile(core.workerAgentGroupId, session.id, 'out-md-1', 'review-pr-42.md');
|
||||
|
||||
await core.deliverSessionMessages(session);
|
||||
|
||||
expect(delivered).toHaveLength(1);
|
||||
expect(delivered[0].files).toBeUndefined();
|
||||
const text = String(delivered[0].content.text);
|
||||
expect(text).toContain('Review done');
|
||||
expect(text).toContain('[View review](https://acme.slack.com/docs/F0CANVAS)');
|
||||
// Exact-instance dispatch: the worker session delivers through the
|
||||
// default instance.
|
||||
expect(delivered[0].instance).toBe('slack');
|
||||
});
|
||||
|
||||
it('passes non-worker sessions through untouched', async () => {
|
||||
delivered.length = 0;
|
||||
const dbMod = await import('../../db/index.js');
|
||||
dbMod.createAgentGroup({ id: 'ag-other', name: 'Other', folder: 'other', agent_provider: null, created_at: now() });
|
||||
dbMod.createMessagingGroup({
|
||||
id: 'mg-other',
|
||||
channel_type: 'slack',
|
||||
platform_id: `slack:${WORKER_CHANNEL}`,
|
||||
instance: 'other-bot',
|
||||
name: null,
|
||||
is_group: 1,
|
||||
unknown_sender_policy: 'public',
|
||||
created_at: now(),
|
||||
});
|
||||
const { session } = core.resolveSession('ag-other', 'mg-other', null, 'shared');
|
||||
insertOutboundWithFile('ag-other', session.id, 'out-md-2', 'notes-pr-7.md');
|
||||
|
||||
await core.deliverSessionMessages(session);
|
||||
|
||||
expect(delivered).toHaveLength(1);
|
||||
expect(delivered[0].files?.map((f) => f.filename)).toEqual(['notes-pr-7.md']);
|
||||
expect(String(delivered[0].content.text)).not.toContain('](');
|
||||
expect(delivered[0].instance).toBe('other-bot');
|
||||
});
|
||||
|
||||
it('falls back to the original message when the transform throws', async () => {
|
||||
delivered.length = 0;
|
||||
// Clobber the single slot with a throwing transform — this is the
|
||||
// documented hazard; delivery must fall back to the untransformed message.
|
||||
core.registerFileTransform(async () => {
|
||||
throw new Error('transform exploded');
|
||||
});
|
||||
|
||||
const { session } = core.resolveSession(
|
||||
core.workerAgentGroupId,
|
||||
core.workerMessagingGroupId,
|
||||
`slack:${WORKER_CHANNEL}:123.456`,
|
||||
'per-thread',
|
||||
);
|
||||
insertOutboundWithFile(core.workerAgentGroupId, session.id, 'out-md-3', 'review-pr-43.md');
|
||||
|
||||
await core.deliverSessionMessages(session);
|
||||
|
||||
expect(delivered).toHaveLength(1);
|
||||
expect(delivered[0].files?.map((f) => f.filename)).toEqual(['review-pr-43.md']);
|
||||
expect(String(delivered[0].content.text)).toBe('Review done');
|
||||
});
|
||||
});
|
||||
+165
@@ -0,0 +1,165 @@
|
||||
/**
|
||||
* slack-canvas component — renders PR Factory markdown as Slack Canvases.
|
||||
*
|
||||
* Registers two things at import time:
|
||||
*
|
||||
* 1. A canvas provider on pr-factory-core's canvas seam (canvas.ts).
|
||||
* Core's test-plan and test-result posts call `createCanvas` and fall
|
||||
* back to plain text + .md upload when it returns null; with this
|
||||
* provider installed they render as canvases shared into the channel.
|
||||
*
|
||||
* 2. A delivery file transform (core's single-slot `registerFileTransform`
|
||||
* hook — the one core edit this component's apply makes) that converts
|
||||
* .md outbox attachments from the PR Factory worker into canvas links,
|
||||
* so reviews land as inline documents instead of downloadable files.
|
||||
* Scoped to the worker agent group's Slack sessions; everything else
|
||||
* passes through untouched.
|
||||
*
|
||||
* Uses the worker bot token (SLACK_BOT_TOKEN via reactions.getBotToken) for
|
||||
* the Canvas API calls.
|
||||
*/
|
||||
import { getAgentGroupByFolder } from '../../db/agent-groups.js';
|
||||
import { getMessagingGroup } from '../../db/messaging-groups.js';
|
||||
import { registerFileTransform } from '../../delivery.js';
|
||||
import { log } from '../../log.js';
|
||||
import { WORKER_FOLDER } from './bootstrap.js';
|
||||
import { createCanvas, registerCanvasProvider, type CanvasResult } from './canvas.js';
|
||||
import { getBotToken } from './reactions.js';
|
||||
|
||||
// ── Slack Canvas API client ──
|
||||
|
||||
interface SlackApiResponse {
|
||||
ok: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
interface CanvasCreateResponse extends SlackApiResponse {
|
||||
canvas_id?: string;
|
||||
}
|
||||
|
||||
interface FileInfoResponse extends SlackApiResponse {
|
||||
file?: { permalink?: string };
|
||||
}
|
||||
|
||||
async function slackApi<T extends SlackApiResponse>(method: string, body: Record<string, unknown>): Promise<T> {
|
||||
const res = await fetch(`https://slack.com/api/${method}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Authorization: `Bearer ${getBotToken()}`,
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
},
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
return (await res.json()) as T;
|
||||
}
|
||||
|
||||
/** Slack canvases reject numbered lists inside bullet lists. Convert all numbered lists to bullets. */
|
||||
function sanitizeForCanvas(md: string): string {
|
||||
return md.replace(/^(\s*)\d+\.\s/gm, '$1• ');
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Slack Canvas from markdown, grant read access to a channel, and
|
||||
* return the canvas ID + permalink for embedding in a message. Returns null
|
||||
* on failure (logs the error) so callers fall back to file upload.
|
||||
*/
|
||||
async function createSlackCanvas(title: string, markdown: string, channelId: string): Promise<CanvasResult | null> {
|
||||
const create = await slackApi<CanvasCreateResponse>('canvases.create', {
|
||||
title,
|
||||
document_content: { type: 'markdown', markdown: sanitizeForCanvas(markdown) },
|
||||
});
|
||||
|
||||
if (!create.ok || !create.canvas_id) {
|
||||
log.warn('Canvas creation failed', {
|
||||
error: create.error,
|
||||
title,
|
||||
markdownLength: markdown.length,
|
||||
response: JSON.stringify(create),
|
||||
});
|
||||
return null;
|
||||
}
|
||||
|
||||
const canvasId = create.canvas_id;
|
||||
|
||||
// Grant read access to the channel so thread participants can view it
|
||||
const access = await slackApi('canvases.access.set', {
|
||||
canvas_id: canvasId,
|
||||
access_level: 'read',
|
||||
channel_ids: [channelId],
|
||||
});
|
||||
|
||||
if (!access.ok) {
|
||||
log.warn('Canvas access.set failed', { error: access.error, canvasId, channelId });
|
||||
// Canvas exists but isn't shared — still return it, permalink will work
|
||||
// for the bot but viewers may need to request access.
|
||||
}
|
||||
|
||||
// Fetch the permalink (canvas IDs are file IDs).
|
||||
// files.info requires form-urlencoded, not JSON — use GET with query params.
|
||||
const infoRes = await fetch(`https://slack.com/api/files.info?file=${canvasId}`, {
|
||||
headers: { Authorization: `Bearer ${getBotToken()}` },
|
||||
});
|
||||
const info = (await infoRes.json()) as FileInfoResponse;
|
||||
const permalink = info.file?.permalink;
|
||||
|
||||
if (!permalink) {
|
||||
log.warn('Canvas permalink not found', { canvasId });
|
||||
return null;
|
||||
}
|
||||
|
||||
return { canvasId, permalink };
|
||||
}
|
||||
|
||||
registerCanvasProvider(createSlackCanvas);
|
||||
|
||||
// ── .md → canvas delivery transform ──
|
||||
|
||||
/**
|
||||
* Build a canvas title matching the "Type — PR #N" pattern used by test
|
||||
* plans / results, e.g. "review-pr-2383.md" → "Review — PR #2383".
|
||||
*/
|
||||
function canvasTitleFor(filename: string): { title: string; linkLabel: string } {
|
||||
const base = filename.replace(/\.md$/, '');
|
||||
const prMatch = base.match(/^(.+?)[-_]pr[-_](\d+)/);
|
||||
if (prMatch) {
|
||||
const kind = prMatch[1].replace(/[-_]/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase());
|
||||
return { title: `${kind} — PR #${prMatch[2]}`, linkLabel: `View ${kind.toLowerCase()}` };
|
||||
}
|
||||
const title = base.replace(/[-_]/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase());
|
||||
return { title, linkLabel: `View ${title.toLowerCase()}` };
|
||||
}
|
||||
|
||||
registerFileTransform(async (session, content, files) => {
|
||||
// Scope: only the PR Factory worker's Slack sessions convert .md files.
|
||||
// (When core hasn't bootstrapped — e.g. the module is inert — the worker
|
||||
// group doesn't exist and everything passes through.)
|
||||
const worker = getAgentGroupByFolder(WORKER_FOLDER);
|
||||
if (!worker || session.agent_group_id !== worker.id) return { files, content };
|
||||
|
||||
const mdFiles = files.filter((f) => f.filename.endsWith('.md'));
|
||||
if (mdFiles.length === 0) return { files, content };
|
||||
|
||||
const mg = session.messaging_group_id ? getMessagingGroup(session.messaging_group_id) : undefined;
|
||||
if (!mg || mg.channel_type !== 'slack') return { files, content };
|
||||
const bareChannel = mg.platform_id.replace(/^slack:/, '');
|
||||
|
||||
const remaining = files.filter((f) => !f.filename.endsWith('.md'));
|
||||
let text = (content.text as string) || (content.markdown as string) || '';
|
||||
|
||||
for (const md of mdFiles) {
|
||||
const { title, linkLabel } = canvasTitleFor(md.filename);
|
||||
// Through the seam: inherits its try/catch, so a provider failure keeps
|
||||
// the file as an upload instead of dropping it.
|
||||
const canvas = await createCanvas(title, md.data.toString('utf8'), bareChannel);
|
||||
if (canvas) {
|
||||
text += (text ? '\n' : '') + `[${linkLabel}](${canvas.permalink})`;
|
||||
} else {
|
||||
remaining.push(md); // fallback: keep as file
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
files: remaining.length > 0 ? remaining : undefined,
|
||||
content: { ...content, text },
|
||||
};
|
||||
});
|
||||
@@ -0,0 +1,50 @@
|
||||
# Remove vm-test-orchestrator
|
||||
|
||||
Reverses every change the apply made. After removal, send-to-testing degrades gracefully: approving a test plan answers "no test orchestrator installed" and core skips orchestrator init.
|
||||
|
||||
## 1. Tear down live VMs first
|
||||
|
||||
Cloned VMs outlive the host process. Before removing, list and remove any `<TEST_VM_NAME_PREFIX>*` VMs on the control plane (defaults shown):
|
||||
|
||||
```bash
|
||||
ssh exe.dev ls # or your PR_FACTORY_TEST_SSH_HOST
|
||||
ssh exe.dev rm nctest-<pr-number> # per leftover VM
|
||||
```
|
||||
|
||||
## 2. Delete the copied files
|
||||
|
||||
```bash
|
||||
rm -f src/modules/pr-factory/test-orchestrator.ts
|
||||
rm -f src/modules/pr-factory/test-orchestrator.test.ts
|
||||
```
|
||||
|
||||
## 3. Delete the barrel line
|
||||
|
||||
In `src/modules/index.ts`, delete the line `import './pr-factory/test-orchestrator.js';`.
|
||||
|
||||
## 4. Remove the environment lines
|
||||
|
||||
Delete from `.env` if present:
|
||||
|
||||
```
|
||||
PR_FACTORY_TEST_SSH_HOST
|
||||
PR_FACTORY_TEST_SSH_KEY
|
||||
PR_FACTORY_TEST_VM_TEMPLATE
|
||||
TEST_VM_SSH_USER
|
||||
TEST_VM_NAME_PREFIX
|
||||
TEST_VM_HOST_TEMPLATE
|
||||
```
|
||||
|
||||
The template VM and provider account are operator infrastructure — keep or retire them per your own policy.
|
||||
|
||||
## 5. Restart and validate
|
||||
|
||||
> **Skip this step during full-recipe removal.** When tearing down the whole PR Factory, sibling components are still mid-teardown — a build here is expected to be red. Only the recipe-level validation at the end binds. Run the block below only when removing `vm-test-orchestrator` in isolation.
|
||||
|
||||
```bash
|
||||
launchctl kickstart -k gui/$(id -u)/com.nanoclaw # macOS
|
||||
# systemctl --user restart nanoclaw # Linux
|
||||
pnpm run build && pnpm test
|
||||
```
|
||||
|
||||
All green, with the test-orchestrator test gone from the run.
|
||||
@@ -0,0 +1,113 @@
|
||||
---
|
||||
name: vm-test-orchestrator
|
||||
description: PR Factory component — the VM control plane for test runs. Registers a TestOrchestratorModule on pr-factory-core's test-orchestration seam; approved test plans clone an ephemeral VM from a template over SSH, check out the PR, build, start the service, and hand the ready VM to the tester agent. exe.dev conventions are the documented defaults; the SSH user, VM naming, and host template are env knobs.
|
||||
---
|
||||
|
||||
# vm-test-orchestrator (PR Factory component)
|
||||
|
||||
The test-VM half of the PR Factory's send-to-testing flow. `pr-factory-core` owns the coordination side (approval card, tester-agent wake, verdict handling, 30-minute timeout) and a seam (`src/modules/pr-factory/test-orchestration.ts`); without this component, approved test plans answer "no test orchestrator installed". With it:
|
||||
|
||||
1. A human approves a test plan → core calls `submitTest()` on the registered module.
|
||||
2. The sequential queue clones an ephemeral VM from the template (`cp <template> <name>` + `tag <name> ephemeral` on the control-plane host), waits for SSH, checks out the PR (`git fetch origin pull/<n>/head`), builds (`pnpm run build`), restarts the service, and polls systemd until stable.
|
||||
3. On success core's `onVmReady` wakes the tester agent with the plan + VM host; on failure `onRunFailed` posts the reason and a retry card.
|
||||
4. VMs stay alive after a run for investigation (pool capped at 20, oldest evicted); they are destroyed on PR close/merge, run timeout, and shutdown.
|
||||
|
||||
Skill-owned file: `src/modules/pr-factory/test-orchestrator.ts`. Integration surface: one appended barrel line in `src/modules/index.ts`. No new dependencies — `ssh` is a host binary prerequisite.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Probe each before applying; stop on a failed probe and do what it names first.
|
||||
|
||||
1. **The `pr-factory-core` component is applied** (this component implements its seam contract):
|
||||
|
||||
```bash
|
||||
grep -q 'export function registerTestOrchestrator' src/modules/pr-factory/test-orchestration.ts && echo OK
|
||||
```
|
||||
|
||||
If it fails: apply the `pr-factory-core` component first.
|
||||
|
||||
2. **An OpenSSH client on the host** (all VM control runs over `execFile('ssh', ...)`):
|
||||
|
||||
```bash
|
||||
command -v ssh && echo OK
|
||||
```
|
||||
|
||||
3. **A VM provider account + template VM** (see "Template VM contract" below) and the **tester agent group** (`pr-tester`, created per `pr-factory-core`'s SKILL.md — without it core never initializes the orchestrator).
|
||||
|
||||
Each step below is idempotent: if the file or line is already present, leave it as is and continue.
|
||||
|
||||
## Apply
|
||||
|
||||
All copy sources are under this component's folder; run every command from the repo root:
|
||||
|
||||
```bash
|
||||
SKILL=.claude/skills/recipes/pr-factory/skills/vm-test-orchestrator
|
||||
```
|
||||
|
||||
### 1. Copy the module
|
||||
|
||||
```bash
|
||||
cp $SKILL/files/src/modules/pr-factory/test-orchestrator.ts src/modules/pr-factory/test-orchestrator.ts
|
||||
```
|
||||
|
||||
### 2. Append the modules-barrel line (`src/modules/index.ts`)
|
||||
|
||||
After the `import './pr-factory/index.js';` line, append (skip if already present):
|
||||
|
||||
```typescript
|
||||
import './pr-factory/test-orchestrator.js';
|
||||
```
|
||||
|
||||
### 3. Copy the guard test
|
||||
|
||||
```bash
|
||||
cp $SKILL/files/src/modules/pr-factory/test-orchestrator.test.ts src/modules/pr-factory/test-orchestrator.test.ts
|
||||
```
|
||||
|
||||
| Test | Guards |
|
||||
|------|--------|
|
||||
| `src/modules/pr-factory/test-orchestrator.test.ts` | The barrel line via the REAL modules barrel (`getTestOrchestrator()` non-null) and the full lifecycle through the real queue against a PATH-shimmed `ssh`: clone/tag on the control plane, per-VM login as `<TEST_VM_SSH_USER>@<templated host>`, PR checkout/build/stability probe, `destroyVm` teardown + idempotence, the failure path's `onRunFailed` + cleanup (with core pr_threads/session context reads on a real migrated DB), shutdown dropping the callbacks — in one generation with every knob overridden (nothing exe.dev-shaped hard-coded) and one pinning the documented defaults (`exedev@nctest-<pr>.exe.xyz` via `exe.dev`) |
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment (`.env`)
|
||||
|
||||
```bash
|
||||
PR_FACTORY_TEST_VM_TEMPLATE=<template VM name> # required — tests cannot run without it
|
||||
PR_FACTORY_TEST_SSH_HOST=exe.dev # control-plane host (default shown)
|
||||
PR_FACTORY_TEST_SSH_KEY= # ssh identity for the control plane; omit to use ssh defaults
|
||||
TEST_VM_SSH_USER=exedev # login user on the per-test VMs (default shown)
|
||||
TEST_VM_NAME_PREFIX=nctest- # VM name = <prefix><pr-number> (default shown)
|
||||
TEST_VM_HOST_TEMPLATE={name}.exe.xyz # per-VM hostname; {name} expands to the VM name (default shown)
|
||||
```
|
||||
|
||||
The defaults are exe.dev's conventions; any provider whose control plane speaks `cp <template> <name>` / `tag <name> ephemeral` / `rm <name>` over SSH and gives each VM a DNS-resolvable hostname works by overriding the knobs.
|
||||
|
||||
If `NANOCLAW_EGRESS_LOCKDOWN` is enabled (default off), the tester agent's container cannot SSH to the VMs — leave it off or allowlist the VM hosts.
|
||||
|
||||
### Template VM contract
|
||||
|
||||
Prepared once by the operator; the module assumes:
|
||||
|
||||
- the project checked out at `~/nanoclaw` with an `origin` remote that serves `pull/<n>/head` refs,
|
||||
- buildable with `pnpm run build`,
|
||||
- running as a **systemd user service** whose unit name contains `nanoclaw` (the stability probe greps `systemctl --user list-unit-files` for it),
|
||||
- the host's control-plane SSH key authorized for both the control plane and `TEST_VM_SSH_USER` on cloned VMs.
|
||||
|
||||
### Provisioning SSH access for the tester → VM path (required)
|
||||
|
||||
There are **two SSH legs**, and both need keys in place before a test run can succeed:
|
||||
|
||||
1. **Host → control plane / cloned VMs.** The NanoClaw host runs `ssh <control-plane> cp/tag/rm …` to clone and reap VMs. Point `PR_FACTORY_TEST_SSH_KEY` at the host identity authorized on the control-plane host (or rely on ssh's default identities); that same public key must be in `authorized_keys` for `TEST_VM_SSH_USER` on the cloned VMs (the template bakes it in, so every clone inherits it).
|
||||
2. **Tester container → cloned VM.** Once a VM is ready, the *tester agent runs inside its container* and SSHes to `<TEST_VM_HOST_TEMPLATE>` as `TEST_VM_SSH_USER` to execute the plan. The tester container therefore needs its own private key and the matching public key in the VM's `authorized_keys`. Mount or inject that key into the `pr-tester` group's container (e.g. a mounted `~/.ssh` dir on the host allowlist, or a key provisioned into the group's workspace) and add a `known_hosts` entry or `StrictHostKeyChecking accept-new` so the first connection isn't blocked on a prompt. Without this leg the VM clones and reports ready, but the tester cannot log in and the run times out.
|
||||
|
||||
Keep both keys read-restricted and scoped to the test VMs — they are infrastructure credentials, not GitHub or vault secrets.
|
||||
|
||||
## Validate
|
||||
|
||||
```bash
|
||||
pnpm run build
|
||||
pnpm test
|
||||
```
|
||||
|
||||
All green. Any failure means a step didn't apply cleanly. For an end-to-end smoke test (real VM clone), approve a test plan on a live PR thread and watch `logs/nanoclaw.log` for `Test VM ready`.
|
||||
@@ -0,0 +1,5 @@
|
||||
# vm-test-orchestrator — files this component owns outright. The
|
||||
# modules-barrel line (src/modules/index.ts) is applied as an edit per
|
||||
# SKILL.md, not as a file copy.
|
||||
src/modules/pr-factory/test-orchestrator.ts
|
||||
src/modules/pr-factory/test-orchestrator.test.ts
|
||||
+262
@@ -0,0 +1,262 @@
|
||||
/**
|
||||
* vm-test-orchestrator component guard — the modules-barrel line (`import
|
||||
* './pr-factory/test-orchestrator.js'` in src/modules/index.ts), the
|
||||
* registration on pr-factory-core's test-orchestration seam, and the module's
|
||||
* conformance to the TestOrchestratorModule contract, driven through the REAL
|
||||
* queue against a PATH-shimmed `ssh` binary.
|
||||
*
|
||||
* The shim logs every invocation's argv and answers like a healthy control
|
||||
* plane + VM (prints `active` for the systemd stability probe), so the full
|
||||
* lifecycle — clone, tag, SSH wait, PR checkout, build, restart, stability —
|
||||
* runs for real with only the network edge faked. Timing knobs are shrunk via
|
||||
* the module's test-only _setTimingForTest so the polls complete in
|
||||
* milliseconds.
|
||||
*
|
||||
* Pins the install-specific parameterization (TEST_VM_SSH_USER /
|
||||
* TEST_VM_NAME_PREFIX / TEST_VM_HOST_TEMPLATE / PR_FACTORY_TEST_SSH_HOST) in
|
||||
* one generation and the documented exe.dev defaults (exedev@,
|
||||
* nctest-<pr>.exe.xyz) in a second, and the failure path's consumption of
|
||||
* core's pr_threads/session reads on a real migrated DB.
|
||||
*/
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
vi.mock('../../container-runner.js', () => ({
|
||||
wakeContainer: vi.fn().mockResolvedValue(undefined),
|
||||
isContainerRunning: vi.fn().mockReturnValue(false),
|
||||
killContainer: vi.fn(),
|
||||
buildAgentGroupImage: vi.fn().mockResolvedValue(undefined),
|
||||
}));
|
||||
|
||||
vi.mock('../../config.js', async () => {
|
||||
const actual = await vi.importActual<typeof import('../../config.js')>('../../config.js');
|
||||
return {
|
||||
...actual,
|
||||
DATA_DIR: '/tmp/nanoclaw-test-prf-vmorch/data',
|
||||
GROUPS_DIR: '/tmp/nanoclaw-test-prf-vmorch/groups',
|
||||
};
|
||||
});
|
||||
|
||||
import type { TestOrchestratorModule } from './test-orchestration.js';
|
||||
|
||||
const TEST_DIR = '/tmp/nanoclaw-test-prf-vmorch';
|
||||
const SSH_LOG = path.join(TEST_DIR, 'ssh-calls.log');
|
||||
const SSH_FAIL_FLAG = path.join(TEST_DIR, 'ssh-fail-flag');
|
||||
const ORIGINAL_CWD = process.cwd();
|
||||
const ORIGINAL_PATH = process.env.PATH;
|
||||
const REPO = 'acme/widgets';
|
||||
|
||||
const KNOB_KEYS = [
|
||||
'PR_FACTORY_TEST_SSH_HOST',
|
||||
'PR_FACTORY_TEST_SSH_KEY',
|
||||
'PR_FACTORY_TEST_VM_TEMPLATE',
|
||||
'TEST_VM_SSH_USER',
|
||||
'TEST_VM_NAME_PREFIX',
|
||||
'TEST_VM_HOST_TEMPLATE',
|
||||
];
|
||||
|
||||
const FAST_TIMING = {
|
||||
sshWaitIntervalMs: 5,
|
||||
sshWaitTimeoutMs: 2_000,
|
||||
stabilityPollMs: 5,
|
||||
stabilityRequiredMs: 0,
|
||||
stabilityTimeoutMs: 2_000,
|
||||
};
|
||||
|
||||
let mod: TestOrchestratorModule;
|
||||
let closeDbFn: () => void;
|
||||
|
||||
function sshCalls(): string[] {
|
||||
if (!fs.existsSync(SSH_LOG)) return [];
|
||||
return fs.readFileSync(SSH_LOG, 'utf8').trim().split('\n').filter(Boolean);
|
||||
}
|
||||
|
||||
function deferred<T>(): { promise: Promise<T>; resolve: (v: T) => void } {
|
||||
let resolve!: (v: T) => void;
|
||||
const promise = new Promise<T>((r) => {
|
||||
resolve = r;
|
||||
});
|
||||
return { promise, resolve };
|
||||
}
|
||||
|
||||
function now(): string {
|
||||
return new Date().toISOString();
|
||||
}
|
||||
|
||||
beforeAll(async () => {
|
||||
fs.rmSync(TEST_DIR, { recursive: true, force: true });
|
||||
fs.mkdirSync(path.join(TEST_DIR, 'bin'), { recursive: true });
|
||||
fs.mkdirSync(path.join(TEST_DIR, 'data'), { recursive: true });
|
||||
|
||||
// Fake ssh: logs argv tab-separated, fails when the flag file exists,
|
||||
// answers the systemd stability probe with `active`.
|
||||
const shim = [
|
||||
'#!/bin/sh',
|
||||
`LOG="${SSH_LOG}"`,
|
||||
'out=""',
|
||||
'for a in "$@"; do out="$out$a\t"; done',
|
||||
'printf \'%s\\n\' "$out" >> "$LOG"',
|
||||
`if [ -e "${SSH_FAIL_FLAG}" ]; then echo "clone failed: quota exceeded" >&2; exit 1; fi`,
|
||||
'case "$*" in',
|
||||
' *is-active*) echo "active" ;;',
|
||||
' *) echo "ok" ;;',
|
||||
'esac',
|
||||
].join('\n');
|
||||
fs.writeFileSync(path.join(TEST_DIR, 'bin', 'ssh'), shim, { mode: 0o755 });
|
||||
process.env.PATH = `${path.join(TEST_DIR, 'bin')}:${process.env.PATH}`;
|
||||
|
||||
// readEnvFile resolves .env from cwd — run from a dir guaranteed to have none.
|
||||
process.chdir(TEST_DIR);
|
||||
for (const k of [...KNOB_KEYS, 'GITHUB_WEBHOOK_SECRET', 'PR_FACTORY_SLACK_CHANNEL_ID', 'SLACK_BOT_TOKEN']) {
|
||||
delete process.env[k];
|
||||
}
|
||||
|
||||
// Generation 1: every install-specific knob overridden — pins that nothing
|
||||
// exe.dev-shaped is hard-coded in the SSH plumbing.
|
||||
process.env.PR_FACTORY_TEST_SSH_HOST = 'control.vms.test';
|
||||
process.env.PR_FACTORY_TEST_VM_TEMPLATE = 'tmpl-nc';
|
||||
process.env.TEST_VM_SSH_USER = 'vmtester';
|
||||
process.env.TEST_VM_NAME_PREFIX = 'pvt-';
|
||||
process.env.TEST_VM_HOST_TEMPLATE = '{name}.vms.test';
|
||||
|
||||
const dbMod = await import('../../db/index.js');
|
||||
const db = dbMod.initTestDb();
|
||||
dbMod.runMigrations(db);
|
||||
closeDbFn = dbMod.closeDb;
|
||||
dbMod.createAgentGroup({ id: 'ag-w', name: 'W', folder: 'w', agent_provider: null, created_at: now() });
|
||||
const { createSession } = await import('../../db/sessions.js');
|
||||
createSession({
|
||||
id: 'sess-43',
|
||||
agent_group_id: 'ag-w',
|
||||
messaging_group_id: null,
|
||||
thread_id: null,
|
||||
agent_provider: null,
|
||||
status: 'active',
|
||||
container_status: 'stopped',
|
||||
last_active: null,
|
||||
created_at: now(),
|
||||
});
|
||||
const { createPrThread } = await import('../../db/pr-threads.js');
|
||||
createPrThread({
|
||||
channel_id: 'slack:C0WORK',
|
||||
thread_ts: '1700000000.000043',
|
||||
channel_type: 'slack',
|
||||
repo_full_name: REPO,
|
||||
pr_number: 43,
|
||||
session_id: 'sess-43',
|
||||
created_at: now(),
|
||||
});
|
||||
|
||||
await import('../index.js'); // the REAL modules barrel — the line under guard lives here
|
||||
const { getTestOrchestrator } = await import('./test-orchestration.js');
|
||||
const registered = getTestOrchestrator();
|
||||
expect(registered, 'no TestOrchestratorModule registered — barrel line missing?').not.toBeNull();
|
||||
mod = registered!;
|
||||
(await import('./test-orchestrator.js'))._setTimingForTest(FAST_TIMING);
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await mod?.shutdown();
|
||||
closeDbFn?.();
|
||||
process.chdir(ORIGINAL_CWD);
|
||||
process.env.PATH = ORIGINAL_PATH;
|
||||
for (const k of KNOB_KEYS) delete process.env[k];
|
||||
fs.rmSync(TEST_DIR, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
fs.rmSync(SSH_LOG, { force: true });
|
||||
fs.rmSync(SSH_FAIL_FLAG, { force: true });
|
||||
});
|
||||
|
||||
describe('vm-test-orchestrator through the real queue (parameterized knobs)', () => {
|
||||
it('submitTest runs the full VM lifecycle over shimmed ssh and reports the knob-derived host via onVmReady', async () => {
|
||||
const ready = deferred<{ prNumber: number; repo: string; vmHost: string; planContent: string }>();
|
||||
const onRunFailed = vi.fn().mockResolvedValue(undefined);
|
||||
mod.init({
|
||||
onVmReady: async (prNumber, repo, vmHost, planContent) => ready.resolve({ prNumber, repo, vmHost, planContent }),
|
||||
onRunFailed,
|
||||
});
|
||||
|
||||
mod.submitTest({ prNumber: 42, repo: REPO, planContent: '## Plan body' });
|
||||
const result = await ready.promise;
|
||||
|
||||
expect(result).toEqual({ prNumber: 42, repo: REPO, vmHost: 'pvt-42.vms.test', planContent: '## Plan body' });
|
||||
expect(onRunFailed).not.toHaveBeenCalled();
|
||||
|
||||
const calls = sshCalls();
|
||||
// Control plane: clone + ephemeral tag against the configured host.
|
||||
expect(calls.some((c) => c.includes('control.vms.test\tcp\ttmpl-nc\tpvt-42'))).toBe(true);
|
||||
expect(calls.some((c) => c.includes('control.vms.test\ttag\tpvt-42\tephemeral'))).toBe(true);
|
||||
// VM side: every command logs in as <TEST_VM_SSH_USER>@<templated host>.
|
||||
expect(calls.some((c) => c.includes('vmtester@pvt-42.vms.test\techo ok'))).toBe(true);
|
||||
expect(
|
||||
calls.some((c) => c.includes('vmtester@pvt-42.vms.test') && c.includes('git fetch origin pull/42/head')),
|
||||
).toBe(true);
|
||||
expect(calls.some((c) => c.includes('vmtester@pvt-42.vms.test') && c.includes('pnpm run build'))).toBe(true);
|
||||
expect(calls.some((c) => c.includes('vmtester@pvt-42.vms.test') && c.includes('is-active'))).toBe(true);
|
||||
// Nothing exe.dev-shaped leaked past the knobs.
|
||||
expect(calls.join('\n')).not.toContain('exe.dev');
|
||||
expect(calls.join('\n')).not.toContain('exedev@');
|
||||
});
|
||||
|
||||
it('destroyVm tears the per-PR VM down through the control plane', async () => {
|
||||
await mod.destroyVm(42);
|
||||
expect(sshCalls().some((c) => c.includes('control.vms.test\trm\tpvt-42'))).toBe(true);
|
||||
// Idempotent: a second destroy finds no pool entry and makes no ssh call.
|
||||
fs.rmSync(SSH_LOG, { force: true });
|
||||
await mod.destroyVm(42);
|
||||
expect(sshCalls()).toEqual([]);
|
||||
});
|
||||
|
||||
it('a failed VM setup reports through onRunFailed (with core pr_threads/session context reads) and cleans up', async () => {
|
||||
fs.writeFileSync(SSH_FAIL_FLAG, '1');
|
||||
const failed = deferred<{ prNumber: number; reason: string }>();
|
||||
mod.init({
|
||||
onVmReady: vi.fn().mockResolvedValue(undefined),
|
||||
onRunFailed: async (prNumber, _repo, reason) => failed.resolve({ prNumber, reason }),
|
||||
});
|
||||
|
||||
mod.submitTest({ prNumber: 43, repo: REPO, planContent: '## Plan' });
|
||||
const result = await failed.promise;
|
||||
|
||||
expect(result.prNumber).toBe(43);
|
||||
expect(result.reason).toContain('quota exceeded');
|
||||
// Cleanup was attempted even though the control plane was down.
|
||||
expect(sshCalls().some((c) => c.includes('control.vms.test\trm\tpvt-43'))).toBe(true);
|
||||
});
|
||||
|
||||
it('shutdown destroys pooled VMs and drops the callbacks', async () => {
|
||||
await mod.shutdown();
|
||||
expect(() => mod.submitTest({ prNumber: 1, repo: REPO, planContent: 'x' })).toThrow('not initialized');
|
||||
});
|
||||
});
|
||||
|
||||
describe('documented defaults (fresh module generation, no TEST_VM_* knobs)', () => {
|
||||
it('pins the exe.dev shape: exedev@nctest-<pr>.exe.xyz via the exe.dev control plane', async () => {
|
||||
vi.resetModules();
|
||||
for (const k of KNOB_KEYS) delete process.env[k];
|
||||
process.env.PR_FACTORY_TEST_VM_TEMPLATE = 'tmpl-nc';
|
||||
|
||||
await import('../index.js');
|
||||
const { getTestOrchestrator } = await import('./test-orchestration.js');
|
||||
const fresh = getTestOrchestrator()!;
|
||||
expect(fresh).not.toBeNull();
|
||||
(await import('./test-orchestrator.js'))._setTimingForTest(FAST_TIMING);
|
||||
|
||||
const ready = deferred<string>();
|
||||
fresh.init({
|
||||
onVmReady: async (_pr, _repo, vmHost) => ready.resolve(vmHost),
|
||||
onRunFailed: vi.fn().mockResolvedValue(undefined),
|
||||
});
|
||||
fresh.submitTest({ prNumber: 7, repo: REPO, planContent: 'plan' });
|
||||
|
||||
expect(await ready.promise).toBe('nctest-7.exe.xyz');
|
||||
const calls = sshCalls();
|
||||
expect(calls.some((c) => c.includes('exe.dev\tcp\ttmpl-nc\tnctest-7'))).toBe(true);
|
||||
expect(calls.some((c) => c.includes('exedev@nctest-7.exe.xyz\techo ok'))).toBe(true);
|
||||
|
||||
await fresh.shutdown();
|
||||
});
|
||||
});
|
||||
+403
@@ -0,0 +1,403 @@
|
||||
/**
|
||||
* vm-test-orchestrator component — VM lifecycle, sequential queue, pool
|
||||
* management. Implements pr-factory-core's `TestOrchestratorModule` and
|
||||
* registers on the test-orchestration seam at import time.
|
||||
*
|
||||
* Owns everything test-VM-facing. Never touches sessions, agents, Slack,
|
||||
* or verdicts — core's orchestrator.ts drives it through the seam and
|
||||
* receives results via the callbacks wired in `init()`.
|
||||
*
|
||||
* Entry point: `submitTest()` is called by core's testing-approval flow
|
||||
* after a human approves a test plan. The queue processes one test at a
|
||||
* time. VM lifecycle: clone → SSH wait → checkout PR → build → start →
|
||||
* stability check. On success: `onVmReady()` → core wakes the tester
|
||||
* agent. On failure: `onRunFailed()` → core posts the error to the thread.
|
||||
*
|
||||
* The VM provider is any host reachable over SSH that exposes
|
||||
* `cp <template> <name>` / `tag <name> ephemeral` / `rm <name>` commands
|
||||
* and DNS-resolvable per-VM hostnames (exe.dev's CLI shape; the defaults
|
||||
* below are its conventions). Install-specific knobs (.env, process.env
|
||||
* overrides):
|
||||
*
|
||||
* PR_FACTORY_TEST_SSH_HOST — control-plane host (default: exe.dev)
|
||||
* PR_FACTORY_TEST_SSH_KEY — ssh identity file for the control plane
|
||||
* (default: ssh's own defaults)
|
||||
* PR_FACTORY_TEST_VM_TEMPLATE — template VM cloned per test run
|
||||
* (required to run tests)
|
||||
* TEST_VM_SSH_USER — login user on the per-test VMs
|
||||
* (default: exedev)
|
||||
* TEST_VM_NAME_PREFIX — per-PR VM name prefix; the VM name is
|
||||
* `<prefix><pr-number>` (default: nctest-)
|
||||
* TEST_VM_HOST_TEMPLATE — per-VM hostname template, `{name}`
|
||||
* expands to the VM name
|
||||
* (default: {name}.exe.xyz)
|
||||
*
|
||||
* Template VM contract (prepared once by the operator, see SKILL.md): the
|
||||
* project checked out at `~/nanoclaw`, buildable with `pnpm run build`, and
|
||||
* running as a systemd user service whose unit name contains `nanoclaw`.
|
||||
*/
|
||||
import { execFile } from 'child_process';
|
||||
|
||||
import { readEnvFile } from '../../env.js';
|
||||
import { log } from '../../log.js';
|
||||
import { getPrThreadByRepoPr } from '../../db/pr-threads.js';
|
||||
import { getSession } from '../../db/sessions.js';
|
||||
import {
|
||||
registerTestOrchestrator,
|
||||
type OrchestratorCallbacks,
|
||||
type TestOrchestratorModule,
|
||||
type TestRun,
|
||||
} from './test-orchestration.js';
|
||||
|
||||
// ── Config ──
|
||||
|
||||
const testEnv = readEnvFile([
|
||||
'PR_FACTORY_TEST_SSH_HOST',
|
||||
'PR_FACTORY_TEST_SSH_KEY',
|
||||
'PR_FACTORY_TEST_VM_TEMPLATE',
|
||||
'TEST_VM_SSH_USER',
|
||||
'TEST_VM_NAME_PREFIX',
|
||||
'TEST_VM_HOST_TEMPLATE',
|
||||
]);
|
||||
const CONTROL_HOST = process.env.PR_FACTORY_TEST_SSH_HOST || testEnv.PR_FACTORY_TEST_SSH_HOST || 'exe.dev';
|
||||
const CONTROL_SSH_KEY = process.env.PR_FACTORY_TEST_SSH_KEY || testEnv.PR_FACTORY_TEST_SSH_KEY || '';
|
||||
const TEMPLATE_VM = process.env.PR_FACTORY_TEST_VM_TEMPLATE || testEnv.PR_FACTORY_TEST_VM_TEMPLATE || '';
|
||||
const VM_SSH_USER = process.env.TEST_VM_SSH_USER || testEnv.TEST_VM_SSH_USER || 'exedev';
|
||||
const VM_NAME_PREFIX = process.env.TEST_VM_NAME_PREFIX || testEnv.TEST_VM_NAME_PREFIX || 'nctest-';
|
||||
const VM_HOST_TEMPLATE = process.env.TEST_VM_HOST_TEMPLATE || testEnv.TEST_VM_HOST_TEMPLATE || '{name}.exe.xyz';
|
||||
|
||||
const MAX_VMS = 20;
|
||||
|
||||
// Wait/poll intervals. Module-level so the PATH-shimmed ssh seam test can
|
||||
// shrink them to milliseconds; production never touches the setter.
|
||||
const timing = {
|
||||
sshWaitIntervalMs: 5_000,
|
||||
sshWaitTimeoutMs: 90_000,
|
||||
stabilityPollMs: 3_000,
|
||||
stabilityRequiredMs: 10_000,
|
||||
stabilityTimeoutMs: 60_000,
|
||||
};
|
||||
|
||||
/** Test-only: override the wait/poll intervals. */
|
||||
export function _setTimingForTest(overrides: Partial<typeof timing>): void {
|
||||
Object.assign(timing, overrides);
|
||||
}
|
||||
|
||||
// ── State ──
|
||||
|
||||
export interface VmInfo {
|
||||
vmName: string;
|
||||
vmHost: string;
|
||||
prNumber: number;
|
||||
createdAt: number;
|
||||
}
|
||||
|
||||
let callbacks: OrchestratorCallbacks | null = null;
|
||||
const queue: TestRun[] = [];
|
||||
let processing = false;
|
||||
const activeVms = new Map<number, VmInfo>();
|
||||
|
||||
// ── SSH helpers ──
|
||||
|
||||
/**
|
||||
* Combine stderr + stdout + err.message into a single error string. SSH emits
|
||||
* useful failure details on stderr, but the inner command's failure can show
|
||||
* up on stdout. Joining both ensures we surface the real cause without
|
||||
* stripping benign warnings — those are already suppressed by LogLevel=ERROR.
|
||||
*/
|
||||
function sshError(err: Error, stdout: string, stderr: string): Error {
|
||||
const parts: string[] = [];
|
||||
const e = stderr?.trim();
|
||||
const o = stdout?.trim();
|
||||
if (e) parts.push(e);
|
||||
if (o) parts.push(o);
|
||||
return new Error(parts.length ? parts.join('\n---\n') : err.message);
|
||||
}
|
||||
|
||||
function sshControl(args: string[]): Promise<string> {
|
||||
const keyArgs = CONTROL_SSH_KEY ? ['-i', CONTROL_SSH_KEY] : [];
|
||||
return new Promise((resolve, reject) => {
|
||||
execFile(
|
||||
'ssh',
|
||||
[...keyArgs, '-o', 'ConnectTimeout=10', '-o', 'LogLevel=ERROR', CONTROL_HOST, ...args],
|
||||
{ timeout: 120_000 },
|
||||
(err, stdout, stderr) => {
|
||||
if (err) return reject(sshError(err, String(stdout), String(stderr)));
|
||||
resolve(String(stdout).trim());
|
||||
},
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
function sshVm(vmHost: string, command: string, opts?: { timeout?: number }): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
execFile(
|
||||
'ssh',
|
||||
[
|
||||
'-o',
|
||||
'ConnectTimeout=10',
|
||||
'-o',
|
||||
'LogLevel=ERROR',
|
||||
'-o',
|
||||
'StrictHostKeyChecking=no',
|
||||
'-o',
|
||||
'UserKnownHostsFile=/dev/null',
|
||||
`${VM_SSH_USER}@${vmHost}`,
|
||||
command,
|
||||
],
|
||||
{ timeout: opts?.timeout ?? 300_000 },
|
||||
(err, stdout, stderr) => {
|
||||
if (err) return reject(sshError(err, String(stdout), String(stderr)));
|
||||
resolve(String(stdout).trim());
|
||||
},
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
// ── VM lifecycle helpers ──
|
||||
|
||||
function vmName(prNumber: number): string {
|
||||
return `${VM_NAME_PREFIX}${prNumber}`;
|
||||
}
|
||||
|
||||
function vmHost(prNumber: number): string {
|
||||
return VM_HOST_TEMPLATE.replace('{name}', vmName(prNumber));
|
||||
}
|
||||
|
||||
async function enforcePoolLimit(): Promise<void> {
|
||||
if (activeVms.size < MAX_VMS) return;
|
||||
|
||||
// Find oldest VM by createdAt
|
||||
let oldest: VmInfo | null = null;
|
||||
for (const vm of activeVms.values()) {
|
||||
if (!oldest || vm.createdAt < oldest.createdAt) oldest = vm;
|
||||
}
|
||||
if (!oldest) return;
|
||||
|
||||
log.info('VM pool at limit, destroying oldest', { vmName: oldest.vmName, prNumber: oldest.prNumber });
|
||||
try {
|
||||
await sshControl(['rm', oldest.vmName]);
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- best-effort pool eviction; the clone proceeds either way
|
||||
} catch (err) {
|
||||
log.warn('Failed to destroy oldest VM during pool enforcement', { vmName: oldest.vmName, err });
|
||||
}
|
||||
activeVms.delete(oldest.prNumber);
|
||||
}
|
||||
|
||||
async function cloneVm(prNumber: number): Promise<string> {
|
||||
if (!TEMPLATE_VM) {
|
||||
throw new Error('PR_FACTORY_TEST_VM_TEMPLATE not set — cannot clone a test VM (see .env)');
|
||||
}
|
||||
const name = vmName(prNumber);
|
||||
const host = vmHost(prNumber);
|
||||
|
||||
// If a VM already exists for this PR (e.g. re-test), destroy it first
|
||||
if (activeVms.has(prNumber)) {
|
||||
log.info('Destroying existing VM before clone', { vmName: name });
|
||||
try {
|
||||
await sshControl(['rm', name]);
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- best-effort pre-clone cleanup; the clone is the operation that matters
|
||||
} catch {
|
||||
// already gone
|
||||
}
|
||||
activeVms.delete(prNumber);
|
||||
}
|
||||
|
||||
await enforcePoolLimit();
|
||||
|
||||
log.info('Cloning test VM', { template: TEMPLATE_VM, vmName: name });
|
||||
await sshControl(['cp', TEMPLATE_VM, name]);
|
||||
await sshControl(['tag', name, 'ephemeral']);
|
||||
activeVms.set(prNumber, { vmName: name, vmHost: host, prNumber, createdAt: Date.now() });
|
||||
return host;
|
||||
}
|
||||
|
||||
async function waitForSsh(host: string): Promise<void> {
|
||||
const deadline = Date.now() + timing.sshWaitTimeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
try {
|
||||
await sshVm(host, 'echo ok', { timeout: 10_000 });
|
||||
return;
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- polling: every failure means "not ready yet"
|
||||
} catch {
|
||||
// Not ready yet
|
||||
}
|
||||
await sleep(timing.sshWaitIntervalMs);
|
||||
}
|
||||
throw new Error(`SSH not available after ${timing.sshWaitTimeoutMs / 1000}s`);
|
||||
}
|
||||
|
||||
async function checkoutPr(host: string, prNumber: number): Promise<void> {
|
||||
log.info('Checking out PR on test VM', { vmHost: host, prNumber });
|
||||
await sshVm(
|
||||
host,
|
||||
`cd ~/nanoclaw && git fetch origin pull/${prNumber}/head:pr-${prNumber} && git checkout pr-${prNumber}`,
|
||||
{ timeout: 60_000 },
|
||||
);
|
||||
}
|
||||
|
||||
async function startRuntime(host: string): Promise<void> {
|
||||
log.info('Building and starting runtime on test VM', { vmHost: host });
|
||||
await sshVm(host, 'cd ~/nanoclaw && pnpm run build', { timeout: 120_000 });
|
||||
await sshVm(
|
||||
host,
|
||||
"export XDG_RUNTIME_DIR=/run/user/$(id -u) && systemctl --user restart $(systemctl --user list-unit-files --type=service | grep nanoclaw | awk '{print $1}')",
|
||||
{ timeout: 30_000 },
|
||||
);
|
||||
}
|
||||
|
||||
async function waitForStability(host: string): Promise<void> {
|
||||
const deadline = Date.now() + timing.stabilityTimeoutMs;
|
||||
let stableSince: number | null = null;
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
try {
|
||||
const status = await sshVm(
|
||||
host,
|
||||
"export XDG_RUNTIME_DIR=/run/user/$(id -u) && systemctl --user is-active $(systemctl --user list-unit-files --type=service | grep nanoclaw | awk '{print $1}')",
|
||||
{ timeout: 10_000 },
|
||||
);
|
||||
if (status === 'active') {
|
||||
if (!stableSince) stableSince = Date.now();
|
||||
if (Date.now() - stableSince >= timing.stabilityRequiredMs) return;
|
||||
} else {
|
||||
stableSince = null;
|
||||
}
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- polling: every failure resets the stability window
|
||||
} catch {
|
||||
stableSince = null;
|
||||
}
|
||||
await sleep(timing.stabilityPollMs);
|
||||
}
|
||||
throw new Error(`Service not stable after ${timing.stabilityTimeoutMs / 1000}s`);
|
||||
}
|
||||
|
||||
// ── Queue processing ──
|
||||
|
||||
async function processQueue(): Promise<void> {
|
||||
if (processing) return;
|
||||
if (queue.length === 0) return;
|
||||
|
||||
processing = true;
|
||||
try {
|
||||
while (queue.length > 0) {
|
||||
const run = queue.shift()!;
|
||||
log.info('Dequeued test run', { prNumber: run.prNumber, repo: run.repo, queueDepth: queue.length });
|
||||
|
||||
try {
|
||||
const host = await cloneVm(run.prNumber);
|
||||
await waitForSsh(host);
|
||||
await checkoutPr(host, run.prNumber);
|
||||
await startRuntime(host);
|
||||
await waitForStability(host);
|
||||
|
||||
log.info('Test VM ready', { prNumber: run.prNumber, vmHost: host });
|
||||
await callbacks!.onVmReady(run.prNumber, run.repo, host, run.planContent);
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- a failed VM setup is reported through onRunFailed, never thrown past the queue
|
||||
} catch (err) {
|
||||
const reason = err instanceof Error ? err.message : String(err);
|
||||
const pr = getPrThreadByRepoPr(run.repo, run.prNumber);
|
||||
const session = pr ? getSession(pr.session_id) : null;
|
||||
log.error('Test run failed during VM setup', {
|
||||
prNumber: run.prNumber,
|
||||
repo: run.repo,
|
||||
category: 'test-vm-setup',
|
||||
sessionId: pr?.session_id,
|
||||
agentGroup: session?.agent_group_id,
|
||||
threadTs: pr?.thread_ts,
|
||||
channelId: pr?.channel_id,
|
||||
err,
|
||||
});
|
||||
|
||||
// Clean up the VM on failure
|
||||
try {
|
||||
await sshControl(['rm', vmName(run.prNumber)]);
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- best-effort cleanup after a failed setup
|
||||
} catch {
|
||||
// best effort
|
||||
}
|
||||
activeVms.delete(run.prNumber);
|
||||
|
||||
await callbacks!.onRunFailed(run.prNumber, run.repo, reason, run.planContent);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
processing = false;
|
||||
}
|
||||
}
|
||||
|
||||
// ── TestOrchestratorModule implementation ──
|
||||
|
||||
function init(cbs: OrchestratorCallbacks): void {
|
||||
callbacks = cbs;
|
||||
log.info('Test orchestrator initialized');
|
||||
}
|
||||
|
||||
function submitTest(run: TestRun): void {
|
||||
if (!callbacks) throw new Error('Test orchestrator not initialized');
|
||||
log.info('Test submitted', { prNumber: run.prNumber, repo: run.repo, queueDepth: queue.length });
|
||||
queue.push(run);
|
||||
processQueue().catch((err) => log.error('processQueue error', { err }));
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark a run as complete. The VM stays alive in the pool for investigation —
|
||||
* it leaves activeVms via destroyVm, pool enforcement, or shutdown.
|
||||
*/
|
||||
function completeRun(prNumber: number): void {
|
||||
log.info('Test run completed', { prNumber });
|
||||
}
|
||||
|
||||
/** Cancel an active run (timeout path). Destroys the VM. */
|
||||
async function cancelRun(prNumber: number): Promise<void> {
|
||||
log.info('Cancelling test run', { prNumber });
|
||||
await destroyVm(prNumber);
|
||||
}
|
||||
|
||||
/** Destroy a test VM. Called on PR close/merge, timeout, and pool enforcement. */
|
||||
async function destroyVm(prNumber: number): Promise<void> {
|
||||
const vm = activeVms.get(prNumber);
|
||||
if (!vm) return;
|
||||
|
||||
log.info('Destroying test VM', { vmName: vm.vmName, prNumber });
|
||||
try {
|
||||
await sshControl(['rm', vm.vmName]);
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- best-effort teardown; the pool entry is removed either way
|
||||
} catch (err) {
|
||||
log.warn('Failed to destroy test VM', { vmName: vm.vmName, err });
|
||||
}
|
||||
activeVms.delete(prNumber);
|
||||
}
|
||||
|
||||
/** Shutdown: clear queue, destroy all VMs. */
|
||||
async function shutdown(): Promise<void> {
|
||||
queue.length = 0;
|
||||
|
||||
const destroys = Array.from(activeVms.values()).map(async (vm) => {
|
||||
try {
|
||||
await sshControl(['rm', vm.vmName]);
|
||||
// eslint-disable-next-line no-catch-all/no-catch-all -- best-effort teardown during shutdown
|
||||
} catch {
|
||||
// best effort
|
||||
}
|
||||
});
|
||||
await Promise.all(destroys);
|
||||
activeVms.clear();
|
||||
callbacks = null;
|
||||
log.info('Test orchestrator shut down');
|
||||
}
|
||||
|
||||
registerTestOrchestrator({
|
||||
init,
|
||||
submitTest,
|
||||
completeRun,
|
||||
cancelRun,
|
||||
destroyVm,
|
||||
shutdown,
|
||||
} satisfies TestOrchestratorModule);
|
||||
|
||||
// ── Helpers ──
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((r) => setTimeout(r, ms));
|
||||
}
|
||||
@@ -60,11 +60,20 @@ Help a user with a customized NanoClaw install safely incorporate upstream chang
|
||||
- Default to MERGE (one-pass conflict resolution). Offer REBASE as an explicit option.
|
||||
- Keep token usage low: rely on `git status`, `git log`, `git diff`, and open only conflicted files.
|
||||
|
||||
# Step 0a: Refresh this skill first
|
||||
The update process itself evolves, so run its newest version before doing anything else:
|
||||
- Ensure the `upstream` remote exists (default `https://github.com/nanocoai/nanoclaw.git`) and fetch: `git fetch upstream --prune`. Detect the upstream branch (`main` or `master`).
|
||||
- Refresh this skill from upstream: `git checkout upstream/<branch> -- .claude/skills/update-nanoclaw/`
|
||||
- Re-read `.claude/skills/update-nanoclaw/SKILL.md`. If it changed, **follow the updated version from the top** instead of this one.
|
||||
|
||||
This is the only working-tree change expected before the preflight check; the full update commits it along with everything else.
|
||||
|
||||
# Step 0: Preflight (stop early if unsafe)
|
||||
Run:
|
||||
- `git status --porcelain`
|
||||
If output is non-empty:
|
||||
- Tell the user to commit or stash first, then stop.
|
||||
- Exception: changes limited to `.claude/skills/update-nanoclaw/` are the Step 0a self-refresh — ignore those and proceed.
|
||||
|
||||
Confirm remotes:
|
||||
- `git remote -v`
|
||||
@@ -256,6 +265,16 @@ If any channels/providers are installed AND `upstream/channels` or `upstream/pro
|
||||
|
||||
If no channels/providers are installed, skip silently.
|
||||
|
||||
Proceed to Step 7.9.
|
||||
|
||||
# Step 7.9: Stamp the upgrade marker (required)
|
||||
After validation has **succeeded**, record that this install reached the new version through the supported path. Without this, the startup tripwire stops the host on its next start.
|
||||
|
||||
- `pnpm exec tsx scripts/upgrade-state.ts set "" update-nanoclaw`
|
||||
- The empty version argument stamps the current `package.json` version.
|
||||
|
||||
If validation did NOT succeed, do not stamp — leave the tripwire to catch the broken state.
|
||||
|
||||
Proceed to Step 8.
|
||||
|
||||
# Step 8: Summary + rollback instructions
|
||||
|
||||
@@ -18,12 +18,20 @@ jobs:
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ steps.app-token.outputs.token }}
|
||||
|
||||
- uses: pnpm/action-setup@v4
|
||||
|
||||
- name: Bump patch version
|
||||
run: |
|
||||
# Skip the auto-bump when the pushed commits already changed the
|
||||
# version themselves (e.g. a release PR that set a minor/major).
|
||||
# Otherwise the bot would patch a deliberate 2.1.0 up to 2.1.1.
|
||||
if git diff --name-only "${{ github.event.before }}" "${{ github.sha }}" | grep -qx 'package.json'; then
|
||||
echo "package.json already changed in this push; skipping auto-bump."
|
||||
exit 0
|
||||
fi
|
||||
pnpm version patch --no-git-tag-version
|
||||
git add package.json
|
||||
git diff --cached --quiet && exit 0
|
||||
|
||||
@@ -39,3 +39,10 @@ groups/*
|
||||
.nanoclaw/
|
||||
|
||||
agents-sdk-docs
|
||||
.agents
|
||||
AGENTS.md
|
||||
|
||||
# Internal working docs, never committed
|
||||
docs/maintainer-guide.md
|
||||
docs/drafts/
|
||||
forks.md
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
|
||||
All notable changes to NanoClaw will be documented in this file.
|
||||
|
||||
## [2.1.0] - 2026-06-07
|
||||
|
||||
- [BREAKING] **Startup now requires an upgrade marker.** The host refuses to boot unless `data/upgrade-state.json` records that this install reached the current version through a sanctioned path (`/setup`, `/update-nanoclaw`, `/migrate-nanoclaw`). After this update completes — and before restarting the service — stamp the marker by running `pnpm exec tsx scripts/upgrade-state.ts set`. If the host has already tripped on restart with "update did not go through the supported path", that same command clears it. See [docs/upgrade-recovery.md](docs/upgrade-recovery.md).
|
||||
|
||||
## [2.0.64] - 2026-05-18
|
||||
|
||||
- **`ncl destinations add` and `remove` through the approval flow now reach the receiver immediately.** Approved destinations weren't being projected into the receiving agent's local session state, so a freshly-added destination silently failed at `send_message` with `unknown destination`, and a removed destination stayed resolvable until the next container restart. Both now take effect the moment the approval executes. Direct (non-approval) calls were unaffected.
|
||||
|
||||
@@ -33,7 +33,7 @@ user_dms (user_id, channel_type, messaging_group_id) — cold-DM cache
|
||||
|
||||
agent_groups (workspace, memory, CLAUDE.md, personality, container config)
|
||||
↕ many-to-many via messaging_group_agents (session_mode, trigger_rules, priority)
|
||||
messaging_groups (one chat/channel on one platform; unknown_sender_policy)
|
||||
messaging_groups (one chat/channel on one platform; instance = adapter-instance name, defaults to channel_type; unknown_sender_policy)
|
||||
|
||||
sessions (agent_group_id + messaging_group_id + thread_id → per-session container)
|
||||
```
|
||||
@@ -83,6 +83,7 @@ For ad-hoc queries from skills or scripts, use the in-tree wrapper rather than t
|
||||
| `groups/<folder>/` | Per-agent-group filesystem (CLAUDE.md, skills, per-group `agent-runner-src/` overlay) |
|
||||
| `scripts/init-first-agent.ts` | Bootstrap the first DM-wired agent (used by `/init-first-agent` skill) |
|
||||
| `migrate-v2.sh` + `setup/migrate-v2/` | v1→v2 migration. Standalone script: `bash migrate-v2.sh`. Seeds DB, copies groups/sessions, installs channels, builds container, offers service switchover, then hands off to `/migrate-from-v1` skill for owner setup and CLAUDE.md cleanup. See [docs/migration-dev.md](docs/migration-dev.md). |
|
||||
| `nanoclaw.sh --uninstall` + `setup/uninstall/` | Uninstall this copy only (slug-scoped): service, containers + image, `data/`, `logs/`, `groups/`, this copy's OneCLI agents. Confirms per group; `--dry-run` previews, `--yes` skips prompts. Other copies and the shared OneCLI app are untouched. Bypasses bootstrap entirely; `uninstall.sh` is a pointer that execs it. |
|
||||
|
||||
## Admin CLI (`ncl`)
|
||||
|
||||
@@ -274,6 +275,9 @@ This project uses pnpm with `minimumReleaseAge: 4320` (3 days) in `pnpm-workspac
|
||||
| [docs/build-and-runtime.md](docs/build-and-runtime.md) | Runtime split (Node host + Bun container), lockfiles, image build surface, CI, key invariants |
|
||||
| [docs/v1-to-v2-changes.md](docs/v1-to-v2-changes.md) | v1→v2 architecture diff — vocabulary for where v1 things moved |
|
||||
| [docs/migration-dev.md](docs/migration-dev.md) | Migration development guide — testing, debugging, dev loop |
|
||||
| [docs/customizing.md](docs/customizing.md) | Short intro to customizing via skills |
|
||||
| [docs/skills-model.md](docs/skills-model.md) | The skills model in full: recipes, tests, upgrades, migrations |
|
||||
| [docs/skill-guidelines.md](docs/skill-guidelines.md) | Authoritative checklist for writing a skill |
|
||||
|
||||
## Container Build Cache
|
||||
|
||||
|
||||
+17
-12
@@ -29,26 +29,27 @@ Every user should have clean and minimal code that does exactly what they need.
|
||||
|
||||
### Skill types
|
||||
|
||||
#### 1. Feature skills (branch-based)
|
||||
#### 1. Channel and provider skills (registry branches)
|
||||
|
||||
Add capabilities to NanoClaw by merging a git branch. The SKILL.md contains setup instructions; the actual code lives on a `skill/*` branch.
|
||||
Add a messaging channel or an agent provider. The SKILL.md contains the install steps; the actual code lives on a long-lived registry branch (`channels` or `providers`) that we keep in sync with `main`.
|
||||
|
||||
**Location:** `.claude/skills/` on `main` (instructions only), code on `skill/*` branch
|
||||
**Location:** `.claude/skills/` on `main` (instructions only), code on the `channels` or `providers` branch
|
||||
|
||||
**Examples:** `/add-telegram`, `/add-slack`, `/add-discord`, `/add-gmail`
|
||||
**Examples:** `/add-telegram`, `/add-slack`, `/add-discord`, `/add-opencode`
|
||||
|
||||
**How they work:**
|
||||
1. User runs `/add-telegram`
|
||||
2. Claude follows the SKILL.md: fetches and merges the `skill/telegram` branch
|
||||
3. Claude walks through interactive setup (env vars, bot creation, etc.)
|
||||
2. Claude follows the SKILL.md: `git fetch origin channels`, then copies each file in with `git show origin/channels:<path> > <path>`. Install is an additive fetch, never a `git merge`.
|
||||
3. The adapter's registration test is fetched the same way and run as verification
|
||||
4. Claude walks through interactive setup (tokens, bot creation, etc.)
|
||||
|
||||
**Contributing a feature skill:**
|
||||
**Contributing a channel or provider skill:**
|
||||
1. Fork `nanocoai/nanoclaw` and branch from `main`
|
||||
2. Make the code changes (new files, modified source, updated `package.json`, etc.)
|
||||
3. Add a SKILL.md in `.claude/skills/<name>/` with setup instructions — step 1 should be merging the branch
|
||||
4. Open a PR. We'll create the `skill/<name>` branch from your work
|
||||
2. Build the adapter following [docs/skill-guidelines.md](docs/skill-guidelines.md): a self-registering module, one appended barrel import, and a registration test that imports the real barrel
|
||||
3. Add a SKILL.md in `.claude/skills/<name>/` with the fetch-and-copy steps, and a REMOVE.md that reverses every change
|
||||
4. Open a PR. We'll land the code on the registry branch from your work
|
||||
|
||||
See `/add-telegram` for a good example. See [docs/skills-as-branches.md](docs/skills-as-branches.md) for the full system design.
|
||||
See `/add-slack` for a good example. See [docs/skills-model.md](docs/skills-model.md) for why install is a fetch, never a merge.
|
||||
|
||||
#### 2. Utility skills (with code files)
|
||||
|
||||
@@ -58,7 +59,7 @@ Standalone tools that ship code files alongside the SKILL.md. The SKILL.md tells
|
||||
|
||||
**Examples:** a self-contained CLI or helper shipped in a `scripts/` subfolder of the skill.
|
||||
|
||||
**Key difference from feature skills:** No branch merge needed. The code is self-contained in the skill directory and gets copied into place during installation.
|
||||
**Key difference from channel/provider skills:** the code is self-contained in the skill directory and gets copied into place during installation; nothing is fetched from a registry branch.
|
||||
|
||||
**Guidelines:**
|
||||
- Put code in separate files, not inline in the SKILL.md
|
||||
@@ -93,6 +94,10 @@ Skills that run inside the agent container, not on the host. These teach the con
|
||||
- Use `allowed-tools` frontmatter to scope tool permissions
|
||||
- Keep them focused — the agent's context window is shared across all container skills
|
||||
|
||||
### Writing a good skill
|
||||
|
||||
The authoring bar is [docs/skill-guidelines.md](docs/skill-guidelines.md): mostly adds, minimal reach-ins into existing code, a test for every functional integration point, and a REMOVE.md whenever apply leaves anything behind. [docs/skills-model.md](docs/skills-model.md) explains the model behind it.
|
||||
|
||||
### SKILL.md format
|
||||
|
||||
All skills use the [Claude Code skills standard](https://code.claude.com/docs/en/skills):
|
||||
|
||||
@@ -196,11 +196,19 @@ Ask Claude Code. "Why isn't the scheduler running?" "What's in the recent logs?"
|
||||
|
||||
If a step fails, `nanoclaw.sh` hands off to Claude Code to diagnose and resume. If that doesn't resolve it, run `claude`, then `/debug`. If Claude identifies an issue likely to affect other users, open a PR against the relevant setup step or skill.
|
||||
|
||||
**How do I uninstall NanoClaw?**
|
||||
|
||||
```bash
|
||||
bash nanoclaw.sh --uninstall
|
||||
```
|
||||
|
||||
Every install is tagged with a per-checkout id, so the uninstaller removes only what belongs to that copy: the background service, containers and image, app data and logs, your agents' files, and this copy's OneCLI vault agents. Shared things — the OneCLI app and your credentials, other NanoClaw copies on the machine — are left alone. It shows exactly what it found and asks for confirmation per group; nothing is deleted until you say yes. Use `--dry-run` to preview without changing anything, or `--yes` to skip the prompts. Your `.env` is backed up before removal. To finish, delete the checkout folder itself.
|
||||
|
||||
**What changes will be accepted into the codebase?**
|
||||
|
||||
Only security fixes, bug fixes, and clear improvements will be accepted to the base configuration. That's all.
|
||||
|
||||
Everything else (new capabilities, OS compatibility, hardware support, enhancements) should be contributed as skills on the `channels` or `providers` branch.
|
||||
Everything else (new capabilities, OS compatibility, hardware support, enhancements) should be contributed as skills: channel and provider code on the `channels`/`providers` registry branches, everything else as a self-contained skill. See [docs/customizing.md](docs/customizing.md) and [CONTRIBUTING.md](CONTRIBUTING.md).
|
||||
|
||||
This keeps the base system minimal and lets every user customize their installation without inheriting features they don't want.
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ ARG INSTALL_CJK_FONTS=false
|
||||
# Pin CLI versions for reproducibility. Bump deliberately — unpinned installs
|
||||
# mean every rebuild silently picks up the latest and can break in lockstep
|
||||
# across all users.
|
||||
ARG CLAUDE_CODE_VERSION=2.1.154
|
||||
ARG CLAUDE_CODE_VERSION=2.1.170
|
||||
ARG AGENT_BROWSER_VERSION=latest
|
||||
ARG VERCEL_VERSION=52.2.1
|
||||
ARG BUN_VERSION=1.3.12
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
"": {
|
||||
"name": "nanoclaw-agent-runner",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/claude-agent-sdk": "^0.3.154",
|
||||
"@anthropic-ai/claude-agent-sdk": "^0.3.170",
|
||||
"@anthropic-ai/sdk": "^0.100.0",
|
||||
"@modelcontextprotocol/sdk": "^1.29.0",
|
||||
"cron-parser": "^5.0.0",
|
||||
@@ -19,23 +19,23 @@
|
||||
},
|
||||
},
|
||||
"packages": {
|
||||
"@anthropic-ai/claude-agent-sdk": ["@anthropic-ai/claude-agent-sdk@0.3.154", "", { "optionalDependencies": { "@anthropic-ai/claude-agent-sdk-darwin-arm64": "0.3.154", "@anthropic-ai/claude-agent-sdk-darwin-x64": "0.3.154", "@anthropic-ai/claude-agent-sdk-linux-arm64": "0.3.154", "@anthropic-ai/claude-agent-sdk-linux-arm64-musl": "0.3.154", "@anthropic-ai/claude-agent-sdk-linux-x64": "0.3.154", "@anthropic-ai/claude-agent-sdk-linux-x64-musl": "0.3.154", "@anthropic-ai/claude-agent-sdk-win32-arm64": "0.3.154", "@anthropic-ai/claude-agent-sdk-win32-x64": "0.3.154" }, "peerDependencies": { "@anthropic-ai/sdk": ">=0.93.0", "@modelcontextprotocol/sdk": "^1.29.0", "zod": "^4.0.0" } }, "sha512-iEn25urI2QrMPFIhId3h7v/7EG5gsmF7ooe+6EvsAosePeLmpVVerp5nXtHnlmBkMinLecurcPA+OddKw76jYw=="],
|
||||
"@anthropic-ai/claude-agent-sdk": ["@anthropic-ai/claude-agent-sdk@0.3.170", "", { "optionalDependencies": { "@anthropic-ai/claude-agent-sdk-darwin-arm64": "0.3.170", "@anthropic-ai/claude-agent-sdk-darwin-x64": "0.3.170", "@anthropic-ai/claude-agent-sdk-linux-arm64": "0.3.170", "@anthropic-ai/claude-agent-sdk-linux-arm64-musl": "0.3.170", "@anthropic-ai/claude-agent-sdk-linux-x64": "0.3.170", "@anthropic-ai/claude-agent-sdk-linux-x64-musl": "0.3.170", "@anthropic-ai/claude-agent-sdk-win32-arm64": "0.3.170", "@anthropic-ai/claude-agent-sdk-win32-x64": "0.3.170" }, "peerDependencies": { "@anthropic-ai/sdk": ">=0.93.0", "@modelcontextprotocol/sdk": "^1.29.0", "zod": "^4.0.0" } }, "sha512-pAvhfk+iTodXZ6RF18Kz7BEUWFjL7EcR3tKuhUNdPpE1NAYCR3mSHGbafi72JsrNwKEDIs7FU31z3fqhwy8QzA=="],
|
||||
|
||||
"@anthropic-ai/claude-agent-sdk-darwin-arm64": ["@anthropic-ai/claude-agent-sdk-darwin-arm64@0.3.154", "", { "os": "darwin", "cpu": "arm64" }, "sha512-oFW3LD5lYrKAU+AKu27Z8hrzqkrh362qQrwi/i3DxGcud9BXUycsXYjShpDj3D3JZu169UzZuSPhx1Wajmbiwg=="],
|
||||
"@anthropic-ai/claude-agent-sdk-darwin-arm64": ["@anthropic-ai/claude-agent-sdk-darwin-arm64@0.3.170", "", { "os": "darwin", "cpu": "arm64" }, "sha512-rwfgArIa5WI0QPNqFsRBgvtSI0mrtpynUm0oK6+l6/KX4hcgnYGEzciZR1bOeD9/7sSZlTdIgt+T9alKeZmXcg=="],
|
||||
|
||||
"@anthropic-ai/claude-agent-sdk-darwin-x64": ["@anthropic-ai/claude-agent-sdk-darwin-x64@0.3.154", "", { "os": "darwin", "cpu": "x64" }, "sha512-5BgWEueP+cqoctWjZYhCbyltuaV/N2DmKDXD3/69cKaVmJp8XL9OCzlq/HEirA/+Ssjskx6hDUBaOcpuZ3iwQA=="],
|
||||
"@anthropic-ai/claude-agent-sdk-darwin-x64": ["@anthropic-ai/claude-agent-sdk-darwin-x64@0.3.170", "", { "os": "darwin", "cpu": "x64" }, "sha512-0e58h8UQMtsQxLGIv9r4foxfBFWKZ7NeDtoplLhuD7EwQonehomw1sBXCch77t/IfUS+q5vQ5zv+fOGmap5nLQ=="],
|
||||
|
||||
"@anthropic-ai/claude-agent-sdk-linux-arm64": ["@anthropic-ai/claude-agent-sdk-linux-arm64@0.3.154", "", { "os": "linux", "cpu": "arm64" }, "sha512-rRkW4SBL3W7zQvKscCIfIGlmoeuTbMV6dXFbPdmpRGvmYZIs79RpzO6xrGBnnhmm+B7znQ9oHAnffi/2FBgJbA=="],
|
||||
"@anthropic-ai/claude-agent-sdk-linux-arm64": ["@anthropic-ai/claude-agent-sdk-linux-arm64@0.3.170", "", { "os": "linux", "cpu": "arm64" }, "sha512-gLbaFqcGppFJQd4DLNV4IXoeahejT/p2/M8bSSvRDbla9GOsBr1AxV5XLRyBn1e7xFGozZIAIQr3+1chp7NJgQ=="],
|
||||
|
||||
"@anthropic-ai/claude-agent-sdk-linux-arm64-musl": ["@anthropic-ai/claude-agent-sdk-linux-arm64-musl@0.3.154", "", { "os": "linux", "cpu": "arm64" }, "sha512-o2bCQN4Xn3UqCLErC5m4T7u0yYArJYmgFCUFnA6K96DdW2RERvx+gTKXxWuHEBkDO+eMoHLHLxk0u2jGES00Ng=="],
|
||||
"@anthropic-ai/claude-agent-sdk-linux-arm64-musl": ["@anthropic-ai/claude-agent-sdk-linux-arm64-musl@0.3.170", "", { "os": "linux", "cpu": "arm64" }, "sha512-SRYfQcsXlOq+CD/FqkQBTSHbaD++w73GnnO+NUV9adLYrca3kfetRwWT1iguY1cNS0l34dCR3rlzCPq78vg1Jg=="],
|
||||
|
||||
"@anthropic-ai/claude-agent-sdk-linux-x64": ["@anthropic-ai/claude-agent-sdk-linux-x64@0.3.154", "", { "os": "linux", "cpu": "x64" }, "sha512-GpiFF8Ez6PbM3m0gqtCo/FKM346qyRdP7VhbmJzdnbNKTiiUZ66vDQyEUPZPCG24ZkrG4m96KpRIUwY08rHiNg=="],
|
||||
"@anthropic-ai/claude-agent-sdk-linux-x64": ["@anthropic-ai/claude-agent-sdk-linux-x64@0.3.170", "", { "os": "linux", "cpu": "x64" }, "sha512-Xl/m7TaSC3T5IDBdHrZQ9fCQYyDmPELN34CL+MoyPIf7uSmuZnjE9fUOqDh2Rv26JxWssi1M6X+BBvVuKd6Cpg=="],
|
||||
|
||||
"@anthropic-ai/claude-agent-sdk-linux-x64-musl": ["@anthropic-ai/claude-agent-sdk-linux-x64-musl@0.3.154", "", { "os": "linux", "cpu": "x64" }, "sha512-zA7S8Lm6O4QBsUpbhiOht8BgiXHOBBFUIo8ZLK6r5wAatK3Q44syWVxICeyCnR6wqfnkf3cugCw27ycS6vVgaA=="],
|
||||
"@anthropic-ai/claude-agent-sdk-linux-x64-musl": ["@anthropic-ai/claude-agent-sdk-linux-x64-musl@0.3.170", "", { "os": "linux", "cpu": "x64" }, "sha512-m4+I0qBEk7cxRKS+pL+eoWXbXTFOAo83fQ0tQvap4z/mDMm06IWJtEPoYTaMBwsp32GJWLkHWKbZSBCHZnp2DQ=="],
|
||||
|
||||
"@anthropic-ai/claude-agent-sdk-win32-arm64": ["@anthropic-ai/claude-agent-sdk-win32-arm64@0.3.154", "", { "os": "win32", "cpu": "arm64" }, "sha512-cDW1YFbU/PJFlrGXhlAGcbkXt80sEO6WtnH8nN8YHXLn5NWduy2q7o/qC6i8XozgvRGf6t/eMoH7IasGIEDhDw=="],
|
||||
"@anthropic-ai/claude-agent-sdk-win32-arm64": ["@anthropic-ai/claude-agent-sdk-win32-arm64@0.3.170", "", { "os": "win32", "cpu": "arm64" }, "sha512-IG+8isJNNJKbnnhO7m+PGhfVCg+XoQ/MDxGde5eigFI0WsEfitjuWSWwx82bT9ghxI1aa6qNvI+UPgPcZuo5Fg=="],
|
||||
|
||||
"@anthropic-ai/claude-agent-sdk-win32-x64": ["@anthropic-ai/claude-agent-sdk-win32-x64@0.3.154", "", { "os": "win32", "cpu": "x64" }, "sha512-tSKaIIpL72OPg3WfzZTCIl8OJgcbq4qieu8/fDWjsdeQuari9gQMIuEflFphk9HqNsxpSmDqKi8Sm5mW2V566Q=="],
|
||||
"@anthropic-ai/claude-agent-sdk-win32-x64": ["@anthropic-ai/claude-agent-sdk-win32-x64@0.3.170", "", { "os": "win32", "cpu": "x64" }, "sha512-7cuqSKbHVItPGVwRbd3A0BEJwcNtc7Fhoh6qHN4C6yrmjSrvdYYx3MLvq/VI768/RoG7mAMDxb+j7WfEfoP9BA=="],
|
||||
|
||||
"@anthropic-ai/sdk": ["@anthropic-ai/sdk@0.100.0", "", { "dependencies": { "json-schema-to-ts": "^3.1.1", "standardwebhooks": "^1.0.0" }, "peerDependencies": { "zod": "^3.25.0 || ^4.0.0" }, "optionalPeers": ["zod"], "bin": { "anthropic-ai-sdk": "bin/cli" } }, "sha512-cAm3aXm6qAiHIvHxyIIGd6tVmsD2gDqlc2h0R20ijNUzGgVnIN822bit4mKbF6CkuV7qIrLQIPoAepHEpanrQQ=="],
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"test": "bun test"
|
||||
},
|
||||
"dependencies": {
|
||||
"@anthropic-ai/claude-agent-sdk": "^0.3.154",
|
||||
"@anthropic-ai/claude-agent-sdk": "^0.3.170",
|
||||
"@anthropic-ai/sdk": "^0.100.0",
|
||||
"@modelcontextprotocol/sdk": "^1.29.0",
|
||||
"cron-parser": "^5.0.0",
|
||||
|
||||
@@ -5,8 +5,11 @@
|
||||
* send_message(to="agent-name") since agents and channels share the
|
||||
* unified destinations namespace.
|
||||
*
|
||||
* create_agent is admin-only. Non-admin containers never see this tool
|
||||
* (see mcp-tools/index.ts). The host re-checks permission on receive.
|
||||
* create_agent writes central-DB state. The host authorizes it by CLI scope:
|
||||
* trusted owner agent groups (scope 'global') create directly; confined groups
|
||||
* require admin approval (see src/modules/agent-to-agent/create-agent.ts). This
|
||||
* tool just writes the outbound request; authorization is enforced host-side,
|
||||
* not here — the container is untrusted and cannot be relied on to gate itself.
|
||||
*/
|
||||
import { writeMessageOut } from '../db/messages-out.js';
|
||||
import { registerTools } from './server.js';
|
||||
@@ -32,7 +35,7 @@ export const createAgent: McpToolDefinition = {
|
||||
tool: {
|
||||
name: 'create_agent',
|
||||
description:
|
||||
'Create a long-lived companion sub-agent (research assistant, task manager, specialist) — the name becomes your destination for it. Admin-only. Fire-and-forget.',
|
||||
'Create a long-lived companion sub-agent (research assistant, task manager, specialist) — the name becomes your destination for it. May require admin approval before the agent is created. Fire-and-forget.',
|
||||
inputSchema: {
|
||||
type: 'object' as const,
|
||||
properties: {
|
||||
|
||||
@@ -9,6 +9,5 @@ The files in this directory are original design documents and developer referenc
|
||||
| [SPEC.md](SPEC.md) | [Architecture](https://docs.nanoclaw.dev/concepts/architecture) |
|
||||
| [SECURITY.md](SECURITY.md) | [Security model](https://docs.nanoclaw.dev/concepts/security) |
|
||||
| [REQUIREMENTS.md](REQUIREMENTS.md) | [Introduction](https://docs.nanoclaw.dev/introduction) |
|
||||
| [skills-as-branches.md](skills-as-branches.md) | [Skills system](https://docs.nanoclaw.dev/integrations/skills-system) |
|
||||
| [docker-sandboxes.md](docker-sandboxes.md) | [Docker Sandboxes](https://docs.nanoclaw.dev/advanced/docker-sandboxes) |
|
||||
| [APPLE-CONTAINER-NETWORKING.md](APPLE-CONTAINER-NETWORKING.md) | [Container runtime](https://docs.nanoclaw.dev/advanced/container-runtime) |
|
||||
|
||||
@@ -83,6 +83,48 @@ Each NanoClaw group gets its own OneCLI agent identity. This allows different cr
|
||||
- Any credentials matching blocked patterns
|
||||
- `.env` is shadowed with `/dev/null` in the project root mount
|
||||
|
||||
### 6. Egress Lockdown (Forced Proxy)
|
||||
|
||||
The `HTTPS_PROXY` env var only redirects *proxy-aware* clients — a tool that
|
||||
ignores it (or a raw socket) could reach the internet directly and bypass
|
||||
credential injection, approvals, and audit. Egress lockdown closes that hole at
|
||||
the network layer.
|
||||
|
||||
**How it works:** agents are placed on a Docker `--internal` network
|
||||
(`nanoclaw-egress`) that has **no route to the internet**. The OneCLI gateway
|
||||
container is attached to that network, aliased as `host.docker.internal`, so the
|
||||
injected proxy URL (`…@host.docker.internal:10255`) resolves to the gateway
|
||||
*container-to-container*. The gateway is therefore the **only reachable hop** —
|
||||
anything else has nowhere to go. The agent is non-root with no `NET_ADMIN`, so
|
||||
it cannot undo this. Identical mechanism on macOS and Linux (no host firewall,
|
||||
no `host-gateway` route).
|
||||
|
||||
- **Self-healing:** the gateway is re-attached to the network at every spawn and
|
||||
on each host-sweep tick, so an out-of-band detach (e.g. `docker compose up` on
|
||||
the OneCLI stack — its compose lives in `~/.onecli`, not this repo) recovers
|
||||
automatically.
|
||||
- **Fail-fast:** if lockdown is on but the network can't be created or the
|
||||
gateway can't be attached (e.g. a non-standard gateway container name, or the
|
||||
gateway isn't running), nanoclaw **refuses to spawn the agent** and surfaces a
|
||||
clear error — it never silently falls back to open egress. Fix the cause (or
|
||||
set `NANOCLAW_EGRESS_LOCKDOWN=false`) and retry. The host-sweep re-heal is the
|
||||
exception: a heal failure there is logged but not fatal, since already-running
|
||||
agents stay on the internal net (no leak) until the gateway returns.
|
||||
|
||||
**Configuration:**
|
||||
|
||||
| Env | Default | Meaning |
|
||||
| --- | --- | --- |
|
||||
| `NANOCLAW_EGRESS_LOCKDOWN` | `false` | Set `true` to opt in (otherwise the host-gateway path is used). Enabled automatically by `/add-golden-registry`. |
|
||||
| `NANOCLAW_EGRESS_NETWORK` | `nanoclaw-egress` | Network name. |
|
||||
| `ONECLI_GATEWAY_CONTAINER` | `onecli` | Gateway container to attach. |
|
||||
|
||||
**⚠ Behavior when enabled:** with lockdown on, agents have **no direct
|
||||
internet** — all traffic must go through OneCLI. Proxy-aware clients (npm, pnpm,
|
||||
pip, curl, node/bun with the proxy env) are unaffected. Any workflow that relies
|
||||
on a **non-proxy-aware** tool reaching the internet directly will fail by design.
|
||||
Lockdown is **off by default**; opt in with `NANOCLAW_EGRESS_LOCKDOWN=true`.
|
||||
|
||||
## Privilege Comparison
|
||||
|
||||
| Capability | Main Group | Non-Main Group |
|
||||
|
||||
@@ -668,15 +668,19 @@ CREATE TABLE agent_groups (
|
||||
);
|
||||
|
||||
-- Platform groups/channels (WhatsApp group, Slack channel, Discord channel, email thread, etc.)
|
||||
-- One row per chat PER ADAPTER INSTANCE. instance defaults to channel_type
|
||||
-- (the "default instance"), so single-instance installs never see it.
|
||||
CREATE TABLE messaging_groups (
|
||||
id TEXT PRIMARY KEY,
|
||||
channel_type TEXT NOT NULL, -- 'whatsapp', 'slack', 'discord', 'telegram', 'email'
|
||||
platform_id TEXT NOT NULL, -- platform-specific ID (JID, channel ID, etc.)
|
||||
instance TEXT NOT NULL, -- adapter-instance name; default = channel_type
|
||||
name TEXT,
|
||||
is_group INTEGER DEFAULT 0,
|
||||
unknown_sender_policy TEXT NOT NULL DEFAULT 'strict', -- 'strict' | 'request_approval' | 'public'
|
||||
created_at TEXT NOT NULL,
|
||||
UNIQUE(channel_type, platform_id)
|
||||
denied_at TEXT,
|
||||
UNIQUE(channel_type, platform_id, instance)
|
||||
);
|
||||
|
||||
-- Users (messaging platform identities, namespaced "<channel_type>:<handle>")
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
# Customizing NanoClaw
|
||||
|
||||
NanoClaw is made to be forked and changed. The catch with most projects is that once you edit the code, every upstream update turns into a merge fight, and the more you customized, the worse it gets.
|
||||
|
||||
NanoClaw avoids that with one simple idea: **every change you make is a skill.**
|
||||
|
||||
## The idea in a minute
|
||||
|
||||
- A **skill** is a small, self-contained add-on. It brings its own code and knows how to install itself.
|
||||
- Your **fork is just a list of skills**, plus one "recipe" that says which skills you have and how they fit together.
|
||||
- Because your changes live beside the core instead of tangled into it, **pulling in updates stays easy**.
|
||||
|
||||
## What makes it work
|
||||
|
||||
A good skill mostly **adds** things: new files, a line appended to an existing file, a dependency. It avoids rewriting existing code in place.
|
||||
|
||||
And it ships a test for each spot where it touches the rest of the system. When an update moves something your skill depends on, that test fails and points at the fix, instead of you finding out when things break in production.
|
||||
|
||||
## How you actually work
|
||||
|
||||
You don't have to think in skills while you're building. **Edit the code directly, get it working, then turn your changes into skills afterward.** A coding agent does the conversion for you, following [skill-guidelines.md](skill-guidelines.md).
|
||||
|
||||
The only rule worth remembering: **a change isn't really part of your fork until it's a skill**, because that's the form that survives an upgrade.
|
||||
|
||||
## Upgrading
|
||||
|
||||
Always upgrade by running `/update-nanoclaw`. **Don't just `git pull`.** The command sets a rollback point, pulls the upstream changes, runs your tests, and walks you through anything that needs fixing, usually a small, local fix in one skill.
|
||||
|
||||
## The deal
|
||||
|
||||
We keep the core small and stable, and every breaking change ships with its migration. You keep your changes as skills, with tests. Do that, and upgrades won't break you. Changes edited directly into the core are the one thing the model can't protect.
|
||||
|
||||
## Go deeper
|
||||
|
||||
- **[The skills model in full](skills-model.md)**: how skills, recipes, tests, and upgrades work under the hood.
|
||||
- **[Skill guidelines](skill-guidelines.md)**: the authoritative checklist for writing one.
|
||||
+6
-3
@@ -27,21 +27,24 @@ CREATE TABLE agent_groups (
|
||||
|
||||
### 1.2 `messaging_groups`
|
||||
|
||||
One row per platform chat (one WhatsApp group, one Slack channel, one 1:1 DM, etc.).
|
||||
One row per platform chat (one WhatsApp group, one Slack channel, one 1:1 DM, etc.) per adapter instance.
|
||||
|
||||
```sql
|
||||
CREATE TABLE messaging_groups (
|
||||
id TEXT PRIMARY KEY,
|
||||
channel_type TEXT NOT NULL,
|
||||
platform_id TEXT NOT NULL,
|
||||
instance TEXT NOT NULL,
|
||||
name TEXT,
|
||||
is_group INTEGER DEFAULT 0,
|
||||
unknown_sender_policy TEXT NOT NULL DEFAULT 'strict',
|
||||
created_at TEXT NOT NULL,
|
||||
UNIQUE(channel_type, platform_id)
|
||||
denied_at TEXT,
|
||||
UNIQUE(channel_type, platform_id, instance)
|
||||
);
|
||||
```
|
||||
|
||||
- `instance`: adapter-instance name — N adapters of one platform (e.g. three Slack apps in one workspace) each own their rows. The default instance IS the channel type: migration 016 backfills `instance = channel_type` and `createMessagingGroup` stamps the same default, so single-instance installs never see the dimension. Inbound lookups are exact-on-instance (an unknown named instance auto-creates its own row); outbound lookups resolve default-instance-first.
|
||||
- `unknown_sender_policy`: `strict` (drop), `request_approval` (ask admin), `public` (allow).
|
||||
- **Readers:** `src/router.ts`, `src/delivery.ts`, `src/session-manager.ts`
|
||||
- **Writers:** `src/db/messaging-groups.ts`, channel setup flows
|
||||
@@ -134,7 +137,7 @@ CREATE TABLE user_dms (
|
||||
);
|
||||
```
|
||||
|
||||
Populated lazily by `ensureUserDm()` in `src/user-dm.ts`.
|
||||
Populated lazily by `ensureUserDm()` in `src/user-dm.ts`. Cold DMs resolve via the channel's default adapter instance — `PRIMARY KEY (user_id, channel_type)` is per-platform, not per-instance.
|
||||
|
||||
### 1.8 `sessions`
|
||||
|
||||
|
||||
@@ -53,6 +53,80 @@ Model selection considerations for Apple Silicon:
|
||||
|
||||
The agent uses tool calls extensively (read/write files, shell commands). Models that support tool use reliably work best. Gemma 4 and Qwen 3 Coder both handle structured tool calls well.
|
||||
|
||||
## Allowing Prompt Caching (filter the cache-busting hash)
|
||||
|
||||
Out of the box this path is slow — every reply re-reads the whole multi-thousand-token system prompt from scratch, even for a one-word answer. Ollama has a prompt cache that should skip that repeated work, but on this path it never kicks in.
|
||||
|
||||
**Cause.** The Claude Agent SDK adds a per-request hash to the front of every prompt — `x-anthropic-billing-header: ...; cch=<hash>;`. It changes on every request, and Ollama's cache only reuses a prompt whose start is unchanged. So that one shifting value at the front makes Ollama treat every prompt as new and re-read all of it. (Ollama ignores the hash itself, so filtering it has no effect on output.)
|
||||
|
||||
**Fix.** Run a tiny proxy between the container and Ollama that filters the hash out (pins `cch=<hash>` to a constant). The start of the prompt is now stable, so the cache kicks in and only the new message gets processed. In our setup — a 31B model on Apple Silicon — follow-up replies dropped from ~80s to ~4s; your numbers will vary with model size and hardware. Output is unchanged, since Ollama ignores the value anyway.
|
||||
|
||||
Point the agent group's `ANTHROPIC_BASE_URL` at the proxy instead of Ollama directly (everything else from the sections above is unchanged):
|
||||
|
||||
```
|
||||
ANTHROPIC_BASE_URL=http://host.docker.internal:11999 # the proxy
|
||||
# proxy forwards to http://127.0.0.1:11434 (Ollama)
|
||||
```
|
||||
|
||||
The proxy is ~40 lines of dependency-free Node:
|
||||
|
||||
```js
|
||||
// ollama-cch-proxy.mjs — normalize the SDK's per-request cch nonce so Ollama's
|
||||
// prefix cache survives across turns. Listens on :11999, forwards to Ollama.
|
||||
import http from 'node:http';
|
||||
|
||||
const TARGET_HOST = process.env.OLLAMA_HOST || '127.0.0.1';
|
||||
const TARGET_PORT = Number(process.env.OLLAMA_PORT || 11434);
|
||||
const LISTEN_PORT = Number(process.env.PROXY_PORT || 11999);
|
||||
|
||||
const server = http.createServer((req, res) => {
|
||||
const chunks = [];
|
||||
req.on('data', (c) => chunks.push(c));
|
||||
req.on('end', () => {
|
||||
let body = Buffer.concat(chunks);
|
||||
if (req.method === 'POST' && body.length) {
|
||||
body = Buffer.from(body.toString('utf8').replace(/cch=[0-9a-f]+;/g, 'cch=00000;'), 'utf8');
|
||||
}
|
||||
const headers = { ...req.headers, host: `${TARGET_HOST}:${TARGET_PORT}`, 'content-length': String(body.length) };
|
||||
const proxyReq = http.request(
|
||||
{ host: TARGET_HOST, port: TARGET_PORT, method: req.method, path: req.url, headers },
|
||||
(proxyRes) => {
|
||||
res.writeHead(proxyRes.statusCode || 502, proxyRes.headers);
|
||||
proxyRes.pipe(res);
|
||||
},
|
||||
);
|
||||
proxyReq.on('error', (e) => { res.writeHead(502); res.end(String(e)); });
|
||||
proxyReq.end(body);
|
||||
});
|
||||
});
|
||||
server.listen(LISTEN_PORT, '0.0.0.0', () => console.log(`cch-proxy :${LISTEN_PORT} -> ${TARGET_HOST}:${TARGET_PORT}`));
|
||||
```
|
||||
|
||||
Run it durably so it survives reboots. On Linux, a systemd user service:
|
||||
|
||||
```ini
|
||||
# ~/.config/systemd/user/ollama-cch-proxy.service
|
||||
[Unit]
|
||||
Description=Ollama cch-normalizing proxy for NanoClaw
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
ExecStart=/usr/bin/node %h/.config/nanoclaw/ollama-cch-proxy.mjs
|
||||
Restart=always
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
```
|
||||
|
||||
```bash
|
||||
systemctl --user enable --now ollama-cch-proxy
|
||||
loginctl enable-linger "$USER" # so it runs without an active login session
|
||||
```
|
||||
|
||||
On macOS use a `launchd` user agent (`~/Library/LaunchAgents/`) running the same script.
|
||||
|
||||
**Scope.** This only affects the Claude-Code-CLI → Ollama path described here. Codex and OpenCode don't use the Claude Agent SDK, so they never emit the `cch` hash and get prompt caching for free.
|
||||
|
||||
## What Changes at the Code Level
|
||||
|
||||
Three files need to support this feature. See `/add-ollama-provider` for the exact changes.
|
||||
|
||||
+1
-1
@@ -187,7 +187,7 @@ leaking the token to disk outweighs the debugging value.
|
||||
|
||||
| File | Role |
|
||||
|---|---|
|
||||
| `nanoclaw.sh` | Top-level wrapper. Phase 1 (bootstrap) and phase 2 (setup:auto) orchestration. Writes bootstrap's raw log + progression entry. |
|
||||
| `nanoclaw.sh` | Top-level wrapper. Phase 1 (bootstrap) and phase 2 (setup:auto) orchestration. Writes bootstrap's raw log + progression entry. `--uninstall` bypasses bootstrap entirely — it execs setup:auto directly (the flow lives in `setup/uninstall/`), or prints manual-cleanup guidance and exits 1 when the TS toolchain is missing. |
|
||||
| `setup.sh` | Phase 1 bootstrap: Node, pnpm, native-module verify. Emits its own `BOOTSTRAP` status block (historically printed to stdout; now goes to the bootstrap raw log). |
|
||||
| `setup/auto.ts` | Phase 2 driver. Orchestrates the clack UI, step execution, user prompts, and writes to all three log levels for every step it spawns. |
|
||||
| `setup/logs.ts` | The logging primitives (`logStep`, `logUserInput`, `logComplete`, `stepRawLog`, `initSetupLog`). Single source of truth for level 2/3 formatting and file paths. |
|
||||
|
||||
@@ -0,0 +1,168 @@
|
||||
# Skill guidelines
|
||||
|
||||
The authoritative checklist for writing a NanoClaw skill: the bar that conformance tooling and registry review will hold every skill to. [customizing.md](customizing.md) is the short introduction; [skills-model.md](skills-model.md) explains why the model works this way. This document evolves with the system; when a rule here proves wrong, fix the rule.
|
||||
|
||||
---
|
||||
|
||||
## Principles
|
||||
|
||||
Every customization is an additive **skill**: not an edit buried in core, but a skill that carries its own code and knows how to install and remove itself. Two principles make a skill *maintainable*; everything else in this document follows from them.
|
||||
|
||||
### 1. Minimal integration surface
|
||||
|
||||
A skill adds files and makes the **smallest possible reach-ins** into existing code. Adding a file or a dependency never breaks on upgrade; reaching into existing code is the only thing that does, so the integration surface *is* the upgrade risk. Keep reach-ins few, tiny, and ideally a single line that *calls* into the skill's own code.
|
||||
|
||||
Follows from this:
|
||||
|
||||
- **Mostly add.** See the change shapes below, in safety order.
|
||||
- **Push logic into skill-owned files** so the core edit is one call, not an inlined block. This shrinks the surface *and* makes the point testable.
|
||||
- **Colocated, self-contained** edits over edits in two places.
|
||||
- **Use an existing registry or hook when there is one**: appending to a registry is a smaller surface than reaching into code. When none exists, a true code-level edit is fine and first-class. (Whether to *add* a hook because a spot has become a hotspot is the maintainer's call, not the skill's.)
|
||||
|
||||
### 2. A test for every functional integration point
|
||||
|
||||
Every reach-in with a **functional consequence** gets a test that goes **red if the wiring is deleted or drifts**. That's what protects the fork from upstream changes. The tests are also the verification: there is no separate "verify" step.
|
||||
|
||||
Follows from this:
|
||||
|
||||
- **Tests target integration with core, not internal correctness.** Unit tests of a skill's own logic, or its behavior against an external service, are the creator's call: fine, just not required.
|
||||
- **A direct unit test doesn't count**: calling the skill's own function bypasses the wiring and stays green when the reach-in is deleted. Drive the real entry, or assert the wiring structurally.
|
||||
- **Build / typecheck is an always-on leg**: drift (moved imports, renamed fields) is the main enemy and slips past runtime tests.
|
||||
- **The test lives where the point runs**: host code uses vitest under `src/`; container code uses `bun:test` under `container/agent-runner/`.
|
||||
- **"Functional" is the filter**: weigh a reach-in by what breaks if it's gone. A cosmetic one (raising a log line's level) gets no test.
|
||||
|
||||
The two interlock: a minimal surface keeps the integration points few and testable; a test per point keeps the surface safe. *Maintainable = small surface, every functional point guarded.*
|
||||
|
||||
---
|
||||
|
||||
## Skill anatomy
|
||||
|
||||
A skill carries everything it needs:
|
||||
|
||||
- **Code**: the files it adds. They live in the skill's own folder, or, for large registry-backed skills like channels and providers, on a registry branch the skill fetches from. Apply copies them in.
|
||||
- **Apply**: the steps in `SKILL.md`, written as prose an agent can run. Apply must be safe to re-run: upgrades re-run it, and a skill that half-applies twice is a bug.
|
||||
- **Remove**: a separate `REMOVE.md` that reverses *every* change apply made: barrel lines deleted (not commented out), every copied file removed including tests, dependencies uninstalled, Dockerfile edits reverted, env lines removed. **REMOVE.md is required exactly when apply leaves anything behind.** A pure instruction-only skill that copies nothing needs none, and an empty one is noise.
|
||||
- **Tests**: files that ship with the skill and are copied into the project's test tree on apply, so they run against the *composed* system.
|
||||
- **Recipe entry**: how it composes with the fork's other skills (ordering, dependencies). A recipe published upstream ships its components inside its own folder (`.claude/skills/recipes/<name>/skills/<component>/`), each held to this checklist individually.
|
||||
|
||||
---
|
||||
|
||||
## Change shapes
|
||||
|
||||
In rough order of safety:
|
||||
|
||||
- **Add a file**: safest. New code in the skill's own files, or fetched from a registry branch (`git show origin/<branch>:path > path`).
|
||||
- **Append to a file**: an import in a barrel, a line in `.env`, an entry at the end of a list.
|
||||
- **Edit a value in JSON**: e.g. a `package.json` field.
|
||||
- **Add a dependency**, pinned to an exact version.
|
||||
- **Insert into existing code (an "integration point")**: the one risky move. Keep it to a line or two that *calls* code living in the skill's own files, never an inlined block of logic. A skill full of these is a smell.
|
||||
|
||||
Fetching from a registry branch is **additive, never a merge**. `git fetch origin <branch>` then `git show origin/<branch>:path > path` per file. Never `git merge` a registry branch into an install.
|
||||
|
||||
---
|
||||
|
||||
## Integration points
|
||||
|
||||
The integration point is wherever the skill reaches into existing code. Make it **minimal, colocated, and self-contained**:
|
||||
|
||||
- All real logic lives in the skill's own file behind a single entry function; the edit to core is just the call.
|
||||
- **Prefer one colocated block** over edits in two places. For an inserted call, a dynamic import at the call site keeps the import and call together and avoids touching the top-of-file import block (itself a merge hotspot):
|
||||
|
||||
```typescript
|
||||
const { startDashboard } = await import('./dashboard-pusher.js');
|
||||
await startDashboard();
|
||||
```
|
||||
|
||||
A static import + call is acceptable too; this is a recommendation, not a mandate.
|
||||
- Keep any gating (feature flags, env checks) *inside* the skill's function, so the core edit stays a single call.
|
||||
- When the reach-in lands inside an entangled function, extract a tiny skill-owned helper so the core touch is one line, like `args.push(...mySkillEnvArgs())`, rather than exporting the whole function or inlining the logic.
|
||||
|
||||
---
|
||||
|
||||
## Testing
|
||||
|
||||
**What the standard requires: integration with the NanoClaw system.**
|
||||
|
||||
- **Required:** a test for every functional integration point, and, where an added file consumes core (core APIs, data shapes, registries), a test that exercises that consumption against the real core. That's the leg that catches core drift.
|
||||
- **Optional, the creator's call:** unit tests of the skill's own internal logic, or its behavior against an external service. Often good practice; not what defines a maintainable skill, because they don't protect against upstream changes.
|
||||
|
||||
### Choosing the test type
|
||||
|
||||
For a code-edit integration point, how you test the wiring depends on whether you can invoke the function the edit lives in. **Prefer behavior; fall back to structure.**
|
||||
|
||||
- **If the edit lives in an invocable function, test that function's behavior.** Calling it exercises the edit; remove or break the edit and the test goes red. This is the strongest option, and usually available, because a minimal integration point pushes the logic into the skill's own exported function anyway.
|
||||
- **If the edit lives in a non-invocable entry point** (e.g. `main()` or boot), **use a structural / AST test.** Use the TypeScript compiler API and assert not just that the symbol exists but its **placement**: awaited, a direct statement of the right function, importing the right module path, correctly ordered. A present-but-misplaced call must go red.
|
||||
|
||||
Two more legs apply when relevant:
|
||||
|
||||
- **Build / typecheck** always applies: it catches a renamed symbol, a moved module, a bad signature.
|
||||
- **A behavior test of how added code consumes core**, required when the added file reaches into core APIs or data at runtime. When the consumption is a *typed* call into a core API (a Chat SDK adapter calling `createChatSdkBridge`), the build leg already guards it and no separate behavior test is required. The behavior-test requirement targets runtime consumption: core DB state, data shapes, registries.
|
||||
|
||||
Together these cover deletion, misplacement, drift, and core consumption. Only true runtime-reachability (a call stranded behind a dead branch) needs the heavy option of booting the real entry point, a rare "real run" reserved for critical wiring.
|
||||
|
||||
### Registration reach-ins: behavior, not structural
|
||||
|
||||
A registry queryable at runtime gets a **behavior** test: import the real barrel, assert the registry contains the entry. A structural parse only proves the *source line* exists. It stays green when the barrel can't evaluate or the package isn't installed, which is exactly when the thing is actually broken. The behavior test goes red on a deleted barrel line, a barrel that won't evaluate, *and* an uninstalled package (the unmocked import throws), so it covers the dependency integration point for free.
|
||||
|
||||
Two consequences. First, **don't mock the adapter's package in the shipped test**: that would defeat the dependency check, and the test runs in the composed install where the package is present. Second, the only reason to fall back to a structural parse is an adapter with real import-time side effects (spawns a process, opens a socket, needs creds at load), which is an adapter smell to fix, not a reason to weaken the test. Conformant adapters do all side-effectful work in the factory or `setup()`, never at import.
|
||||
|
||||
### Test archetypes
|
||||
|
||||
The test matches the kind of integration point:
|
||||
|
||||
- **In-process seam with core** (a channel into the router, a pusher into the central DB): drive the real added component against the **real core collaborators** (DB, registry, router), faking only the external edge. The highest-value archetype: it exercises the added file's consumption of core, which is what catches core drift.
|
||||
- **Wiring / registration** (a barrel import, a `main()` call, an entry in an `mcpServers` map): behavior test via the registry where queryable (see above); structural / AST test where not.
|
||||
- **Config / container probe** (mounts, Dockerfile, a tool installed in the image): run the change where you can. Spin up a container to confirm a mount or binary. Checking that a line exists in a file is the last resort.
|
||||
- **Agentic run** (operational, instruction-only skills): run the workflow with a small model; did it complete?
|
||||
- **Patch behavior** (a patch skill that changes core logic): a behavior test of the changed behavior.
|
||||
- **Provider (multi-point)**: a non-default agent backend reaches into *two* barrels (host `src/providers/index.ts`; container `container/agent-runner/src/providers/index.ts`), plus Dockerfile edits and a CLI or SDK dependency. Each is a separate way to break, and each needs its own guard. Ship a **barrel-driven registration test per tree** that imports *only* the real barrel and asserts the registry contains the provider. **The trap:** a `*.factory.test.ts` that imports the provider module directly self-registers it and stays green when the barrel line is deleted; that's a unit test, not a registration guard. REMOVE.md must reverse both barrel lines, all copied files in both trees, the dependency, and the Dockerfile edits.
|
||||
- **Content / instruction-only** (a reference wiki, a pure workflow): makes no functional reach-in, so it owes no integration test. Conformance is anatomy: idempotent apply, plus REMOVE.md iff apply leaves anything behind.
|
||||
|
||||
### Dependencies are integration points
|
||||
|
||||
A skill that installs a package has made a reach-in: the code now assumes it's there. Guard it so a missing package goes red, in order of preference:
|
||||
|
||||
1. **An unmocked import in a behavior test**: the test imports real code that imports the package, so a missing package throws. Covers presence *and* exercises the real dependency.
|
||||
2. **The build leg**: a typed import of a missing module fails typecheck. The fallback when the package genuinely can't be imported in a test (e.g. it binds a port on import). Only works if the validate step runs the build before or alongside the tests, so verify the order.
|
||||
3. **A Dockerfile-installed CLI binary** is the case most often left unguarded: it isn't importable, so neither guard above sees it. Use a **structural test** asserting the Dockerfile `ARG <X>_VERSION=` and install line are present, optionally backed by a `<bin> --version` container probe. Pin the version; reject `latest`.
|
||||
|
||||
You do *not* need to test the dependency's own API contract; that's optional external-service coverage.
|
||||
|
||||
### When there is genuinely nothing to test in-tree
|
||||
|
||||
Some skills' only functional integration is a runtime operator action with no source footprint: registering an MCP server through `ncl`, or a mount through the sanctioned query wrapper (until the `ncl` add-mount verb lands). There's no line in the tree whose deletion a test could catch, so a registration test is structurally inapplicable. **State this explicitly in SKILL.md** rather than inventing a hollow test; conformance is then anatomy plus the dependency guard. This is a conformant outcome, valid only when the reach-in has no in-tree representation. (A raw-SQL write into core's schema to achieve the same thing is a smell, not a workaround.)
|
||||
|
||||
### Test rules
|
||||
|
||||
- **Hermetic at the external edge.** Mock genuinely external services (a fake HTTP server, stubbed creds), never the package under guard (see "Registration reach-ins").
|
||||
- **Exercise the real entry, or assert it structurally.** A test that imports the skill's function directly does not test the integration.
|
||||
- **Tests travel with the skill** and are copied in on apply; an integration test only means anything against the composed project.
|
||||
- **Robustness check.** Apply the skill with a small, cheap model. If a small model fumbles the instructions, they're too vague. Fix the instructions, don't blame the model. (Small models also keep applying skills cheap.)
|
||||
|
||||
---
|
||||
|
||||
## Anti-patterns
|
||||
|
||||
Each with its fix. These are patterns to remove, not to test around: a drift-prone, untestable reach-in is usually a symptom of a bad pattern, not a missing test. Reviewers reject them; the conformance linter will flag them automatically.
|
||||
|
||||
1. **A separate VERIFY.md.** Delete it; tests are the verification. Fold any genuinely useful manual smoke check into SKILL.md's next steps.
|
||||
2. **REMOVE.md soft-disable** (comments out an import; leaves copied files behind). DELETE the import line and `rm` every file the skill copied.
|
||||
3. **REMOVE.md incomplete** (misses env vars, the package uninstall, copied tests). Reverse *every* change; read the env vars from the skill's own credentials section, don't guess.
|
||||
4. **Raw SQL against a core DB** (read or write). Use a core helper or an `ncl` verb; the in-tree query wrapper is the sanctioned last resort. Never the `sqlite3` binary.
|
||||
5. **Credential threading** (`-e KEY=…` or a stdin secrets payload into the container). OneCLI gateway only; it injects credentials per request.
|
||||
6. **Branch-merge install** (`git merge` of a registry branch or any code branch). Install by additive fetch: `git fetch origin <branch>`, then `git show origin/<branch>:path > path` per file. For an update/reapply workflow, re-run each installed skill's additive apply, never merge.
|
||||
7. **Diff-against-past framing** ("earlier versions…", "this is now redundant") and **documenting non-steps** ("no X needed"). Write present-tense DO steps only. A skill reads as a standalone artifact with no memory of its own edits.
|
||||
8. **Stale reach-in targets** (an edit aimed at code that no longer exists; a reach-in already shipped in trunk). Verify the target exists *before* instructing the edit; reconcile already-in-trunk ones to a no-op. Before appending to an allowlist or list, check how it's consumed; the entry may already be derived from a registry, making the edit dead.
|
||||
9. **Hand-maintained duplicate copies** (a mirror directory kept in sync by hand or sed). Generate the mirror from a single canonical source.
|
||||
|
||||
---
|
||||
|
||||
## Worked examples
|
||||
|
||||
In-tree exemplars for the code archetypes. (Two carry known smells, kept deliberately pending architectural fixes; they demonstrate the test shapes, not perfection.)
|
||||
|
||||
- `add-dashboard`: in-process seam with core (the pusher against the central DB), plus an AST wiring test for its `main()` call.
|
||||
- `add-slack`: Chat SDK channel registration; the template for the whole channel family.
|
||||
- `add-deltachat`: native channel registration.
|
||||
- `add-atomic-chat-tool`: MCP-tool wiring across both runtimes (container registration and host env-helper call).
|
||||
- `add-opencode` / `add-codex`: the provider multi-point archetype, with two barrels, Dockerfile pins, and per-tree registration tests.
|
||||
@@ -1,677 +0,0 @@
|
||||
# Skills as Branches
|
||||
|
||||
## Overview
|
||||
|
||||
This document covers **feature skills** — skills that add capabilities via git branch merges. This is the most complex skill type and the primary way NanoClaw is extended.
|
||||
|
||||
NanoClaw has four types of skills overall. See [CONTRIBUTING.md](../CONTRIBUTING.md) for the full taxonomy:
|
||||
|
||||
| Type | Location | How it works |
|
||||
|------|----------|-------------|
|
||||
| **Feature** (this doc) | `.claude/skills/` + `skill/*` branch | SKILL.md has instructions; code lives on a branch, applied via `git merge` |
|
||||
| **Utility** | `.claude/skills/<name>/` with code files | Self-contained tools; code in skill directory, copied into place on install |
|
||||
| **Operational** | `.claude/skills/` on `main` | Instruction-only workflows (setup, debug, update) |
|
||||
| **Container** | `container/skills/` | Loaded inside agent containers at runtime |
|
||||
|
||||
---
|
||||
|
||||
Feature skills are distributed as git branches on the upstream repository. Applying a skill is a `git merge`. Updating core is a `git merge`. Everything is standard git.
|
||||
|
||||
This replaces the previous `skills-engine/` system (three-way file merging, `.nanoclaw/` state, manifest files, replay, backup/restore) with plain git operations and Claude for conflict resolution.
|
||||
|
||||
## How It Works
|
||||
|
||||
### Repository structure
|
||||
|
||||
The upstream repo (`nanocoai/nanoclaw`) maintains:
|
||||
|
||||
- `main` — core NanoClaw (no skill code)
|
||||
- `skill/discord` — main + Discord integration
|
||||
- `skill/telegram` — main + Telegram integration
|
||||
- `skill/slack` — main + Slack integration
|
||||
- `skill/gmail` — main + Gmail integration
|
||||
- etc.
|
||||
|
||||
Each skill branch contains all the code changes for that skill: new files, modified source files, updated `package.json` dependencies, `.env.example` additions — everything. No manifest, no structured operations, no separate `add/` and `modify/` directories.
|
||||
|
||||
### Skill discovery and installation
|
||||
|
||||
Skills are split into two categories:
|
||||
|
||||
**Operational skills** (on `main`, always available):
|
||||
- `/setup`, `/debug`, `/update-nanoclaw`, `/customize`, `/update-skills`
|
||||
- These are instruction-only SKILL.md files — no code changes, just workflows
|
||||
- Live in `.claude/skills/` on `main`, immediately available to every user
|
||||
|
||||
**Feature skills** (in marketplace, installed on demand):
|
||||
- `/add-discord`, `/add-telegram`, `/add-slack`, `/add-gmail`, etc.
|
||||
- Each has a SKILL.md with setup instructions and a corresponding `skill/*` branch with code
|
||||
- Live in the marketplace repo (`nanocoai/nanoclaw-skills`)
|
||||
|
||||
Users never interact with the marketplace directly. The operational skills `/setup` and `/customize` handle plugin installation transparently:
|
||||
|
||||
```bash
|
||||
# Claude runs this behind the scenes — users don't see it
|
||||
claude plugin install nanoclaw-skills@nanoclaw-skills --scope project
|
||||
```
|
||||
|
||||
Skills are hot-loaded after `claude plugin install` — no restart needed. This means `/setup` can install the marketplace plugin, then immediately run any feature skill, all in one session.
|
||||
|
||||
### Selective skill installation
|
||||
|
||||
`/setup` asks users what channels they want, then only offers relevant skills:
|
||||
|
||||
1. "Which messaging channels do you want to use?" → Discord, Telegram, Slack, WhatsApp
|
||||
2. User picks Telegram → Claude installs the plugin and runs `/add-telegram`
|
||||
3. After Telegram is set up: "Want to add Agent Swarm support for Telegram?" → offers `/add-telegram-swarm`
|
||||
4. "Want to enable community skills?" → installs community marketplace plugins
|
||||
|
||||
Dependent skills (e.g., `telegram-swarm` depends on `telegram`) are only offered after their parent is installed. `/customize` follows the same pattern for post-setup additions.
|
||||
|
||||
### Marketplace configuration
|
||||
|
||||
NanoClaw's `.claude/settings.json` registers the official marketplace:
|
||||
|
||||
```json
|
||||
{
|
||||
"extraKnownMarketplaces": {
|
||||
"nanoclaw-skills": {
|
||||
"source": {
|
||||
"source": "github",
|
||||
"repo": "nanocoai/nanoclaw-skills"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The marketplace repo uses Claude Code's plugin structure:
|
||||
|
||||
```
|
||||
nanocoai/nanoclaw-skills/
|
||||
.claude-plugin/
|
||||
marketplace.json # Plugin catalog
|
||||
plugins/
|
||||
nanoclaw-skills/ # Single plugin bundling all official skills
|
||||
.claude-plugin/
|
||||
plugin.json # Plugin manifest
|
||||
skills/
|
||||
add-discord/
|
||||
SKILL.md # Setup instructions; step 1 is "merge the branch"
|
||||
add-telegram/
|
||||
SKILL.md
|
||||
add-slack/
|
||||
SKILL.md
|
||||
...
|
||||
```
|
||||
|
||||
Multiple skills are bundled in one plugin — installing `nanoclaw-skills` makes all feature skills available at once. Individual skills don't need separate installation.
|
||||
|
||||
Each SKILL.md tells Claude to merge the corresponding skill branch as step 1, then walks through interactive setup (env vars, bot creation, etc.).
|
||||
|
||||
### Applying a skill
|
||||
|
||||
User runs `/add-discord` (discovered via marketplace). Claude follows the SKILL.md:
|
||||
|
||||
1. `git fetch upstream skill/discord`
|
||||
2. `git merge upstream/skill/discord`
|
||||
3. Interactive setup (create bot, get token, configure env vars, etc.)
|
||||
|
||||
Or manually:
|
||||
|
||||
```bash
|
||||
git fetch upstream skill/discord
|
||||
git merge upstream/skill/discord
|
||||
```
|
||||
|
||||
### Applying multiple skills
|
||||
|
||||
```bash
|
||||
git merge upstream/skill/discord
|
||||
git merge upstream/skill/telegram
|
||||
```
|
||||
|
||||
Git handles the composition. If both skills modify the same lines, it's a real conflict and Claude resolves it.
|
||||
|
||||
### Updating core
|
||||
|
||||
```bash
|
||||
git fetch upstream main
|
||||
git merge upstream/main
|
||||
```
|
||||
|
||||
Since skill branches are kept merged-forward with main (see CI section), the user's merged-in skill changes and upstream changes have proper common ancestors.
|
||||
|
||||
### Checking for skill updates
|
||||
|
||||
Users who previously merged a skill branch can check for updates. For each `upstream/skill/*` branch, check whether the branch has commits that aren't in the user's HEAD:
|
||||
|
||||
```bash
|
||||
git fetch upstream
|
||||
for branch in $(git branch -r | grep 'upstream/skill/'); do
|
||||
# Check if user has merged this skill at some point
|
||||
merge_base=$(git merge-base HEAD "$branch" 2>/dev/null) || continue
|
||||
# Check if the skill branch has new commits beyond what the user has
|
||||
if ! git merge-base --is-ancestor "$branch" HEAD 2>/dev/null; then
|
||||
echo "$branch has updates available"
|
||||
fi
|
||||
done
|
||||
```
|
||||
|
||||
This requires no state — it uses git history to determine which skills were previously merged and whether they have new commits.
|
||||
|
||||
This logic is available in two ways:
|
||||
- Built into `/update-nanoclaw` — after merging main, optionally check for skill updates
|
||||
- Standalone `/update-skills` — check and merge skill updates independently
|
||||
|
||||
### Conflict resolution
|
||||
|
||||
At any merge step, conflicts may arise. Claude resolves them — reading the conflicted files, understanding the intent of both sides, and producing the correct result. This is what makes the branch approach viable at scale: conflict resolution that previously required human judgment is now automated.
|
||||
|
||||
### Skill dependencies
|
||||
|
||||
Some skills depend on other skills. E.g., `skill/telegram-swarm` requires `skill/telegram`. Dependent skill branches are branched from their parent skill branch, not from `main`.
|
||||
|
||||
This means `skill/telegram-swarm` includes all of telegram's changes plus its own additions. When a user merges `skill/telegram-swarm`, they get both — no need to merge telegram separately.
|
||||
|
||||
Dependencies are implicit in git history — `git merge-base --is-ancestor` determines whether one skill branch is an ancestor of another. No separate dependency file is needed.
|
||||
|
||||
### Uninstalling a skill
|
||||
|
||||
```bash
|
||||
# Find the merge commit
|
||||
git log --merges --oneline | grep discord
|
||||
|
||||
# Revert it
|
||||
git revert -m 1 <merge-commit>
|
||||
```
|
||||
|
||||
This creates a new commit that undoes the skill's changes. Claude can handle the whole flow.
|
||||
|
||||
If the user has modified the skill's code since merging (custom changes on top), the revert might conflict — Claude resolves it.
|
||||
|
||||
If the user later wants to re-apply the skill, they need to revert the revert first (git treats reverted changes as "already applied and undone"). Claude handles this too.
|
||||
|
||||
## CI: Keeping Skill Branches Current
|
||||
|
||||
A GitHub Action runs on every push to `main`:
|
||||
|
||||
1. List all `skill/*` branches
|
||||
2. For each skill branch, merge `main` into it (merge-forward, not rebase)
|
||||
3. Run build and tests on the merged result
|
||||
4. If tests pass, push the updated skill branch
|
||||
5. If a skill fails (conflict, build error, test failure), open a GitHub issue for manual resolution
|
||||
|
||||
**Why merge-forward instead of rebase:**
|
||||
- No force-push — preserves history for users who already merged the skill
|
||||
- Users can re-merge a skill branch to pick up skill updates (bug fixes, improvements)
|
||||
- Git has proper common ancestors throughout the merge graph
|
||||
|
||||
**Why this scales:** With a few hundred skills and a few commits to main per day, the CI cost is trivial. Haiku is fast and cheap. The approach that wouldn't have been feasible a year or two ago is now practical because Claude can resolve conflicts at scale.
|
||||
|
||||
## Installation Flow
|
||||
|
||||
### New users (recommended)
|
||||
|
||||
1. Fork `nanocoai/nanoclaw` on GitHub (click the Fork button)
|
||||
2. Clone your fork:
|
||||
```bash
|
||||
git clone https://github.com/<you>/nanoclaw.git
|
||||
cd nanoclaw
|
||||
```
|
||||
3. Run Claude Code:
|
||||
```bash
|
||||
claude
|
||||
```
|
||||
4. Run `/setup` — Claude handles dependencies, authentication, container setup, service configuration, and adds `upstream` remote if not present
|
||||
|
||||
Forking is recommended because it gives users a remote to push their customizations to. Clone-only works for trying things out but provides no remote backup.
|
||||
|
||||
### Existing users migrating from clone
|
||||
|
||||
Users who previously ran `git clone https://github.com/nanocoai/nanoclaw.git` and have local customizations:
|
||||
|
||||
1. Fork `nanocoai/nanoclaw` on GitHub
|
||||
2. Reroute remotes:
|
||||
```bash
|
||||
git remote rename origin upstream
|
||||
git remote add origin https://github.com/<you>/nanoclaw.git
|
||||
git push --force origin main
|
||||
```
|
||||
The `--force` is needed because the fresh fork's main is at upstream's latest, but the user wants their (possibly behind) version. The fork was just created so there's nothing to lose.
|
||||
3. From this point, `origin` = their fork, `upstream` = nanocoai/nanoclaw
|
||||
|
||||
### Existing users migrating from the old skills engine
|
||||
|
||||
Users who previously applied skills via the `skills-engine/` system have skill code in their tree but no merge commits linking to skill branches. Git doesn't know these changes came from a skill, so merging a skill branch on top would conflict or duplicate.
|
||||
|
||||
**For new skills going forward:** just merge skill branches as normal. No issue.
|
||||
|
||||
**For existing old-engine skills**, two migration paths:
|
||||
|
||||
**Option A: Per-skill reapply (keep your fork)**
|
||||
1. For each old-engine skill: identify and revert the old changes, then merge the skill branch fresh
|
||||
2. Claude assists with identifying what to revert and resolving any conflicts
|
||||
3. Custom modifications (non-skill changes) are preserved
|
||||
|
||||
**Option B: Fresh start (cleanest)**
|
||||
1. Create a new fork from upstream
|
||||
2. Merge the skill branches you want
|
||||
3. Manually re-apply your custom (non-skill) changes
|
||||
4. Claude assists by diffing your old fork against the new one to identify custom changes
|
||||
|
||||
In both cases:
|
||||
- Delete the `.nanoclaw/` directory (no longer needed)
|
||||
- The `skills-engine/` code will be removed from upstream once all skills are migrated
|
||||
- `/update-skills` only tracks skills applied via branch merge — old-engine skills won't appear in update checks
|
||||
|
||||
## User Workflows
|
||||
|
||||
### Custom changes
|
||||
|
||||
Users make custom changes directly on their main branch. This is the standard fork workflow — their `main` IS their customized version.
|
||||
|
||||
```bash
|
||||
# Make changes
|
||||
vim src/config.ts
|
||||
git commit -am "change trigger word to @Bob"
|
||||
git push origin main
|
||||
```
|
||||
|
||||
Custom changes, skills, and core updates all coexist on their main branch. Git handles the three-way merging at each merge step because it can trace common ancestors through the merge history.
|
||||
|
||||
### Applying a skill
|
||||
|
||||
Run `/add-discord` in Claude Code (discovered via the marketplace plugin), or manually:
|
||||
|
||||
```bash
|
||||
git fetch upstream skill/discord
|
||||
git merge upstream/skill/discord
|
||||
# Follow setup instructions for configuration
|
||||
git push origin main
|
||||
```
|
||||
|
||||
If the user is behind upstream's main when they merge a skill branch, the merge might bring in some core changes too (since skill branches are merged-forward with main). This is generally fine — they get a compatible version of everything.
|
||||
|
||||
### Updating core
|
||||
|
||||
```bash
|
||||
git fetch upstream main
|
||||
git merge upstream/main
|
||||
git push origin main
|
||||
```
|
||||
|
||||
This is the same as the existing `/update-nanoclaw` skill's merge path.
|
||||
|
||||
### Updating skills
|
||||
|
||||
Run `/update-skills` or let `/update-nanoclaw` check after a core update. For each previously-merged skill branch that has new commits, Claude offers to merge the updates.
|
||||
|
||||
### Contributing back to upstream
|
||||
|
||||
Users who want to submit a PR to upstream:
|
||||
|
||||
```bash
|
||||
git fetch upstream main
|
||||
git checkout -b my-fix upstream/main
|
||||
# Make changes
|
||||
git push origin my-fix
|
||||
# Create PR from my-fix to nanocoai/nanoclaw:main
|
||||
```
|
||||
|
||||
Standard fork contribution workflow. Their custom changes stay on their main and don't leak into the PR.
|
||||
|
||||
## Contributing a Skill
|
||||
|
||||
The flow below is for **feature skills** (branch-based). For utility skills (self-contained tools) and container skills, the contributor opens a PR that adds files directly to `.claude/skills/<name>/` or `container/skills/<name>/` — no branch extraction needed. See [CONTRIBUTING.md](../CONTRIBUTING.md) for all skill types.
|
||||
|
||||
### Contributor flow (feature skills)
|
||||
|
||||
1. Fork `nanocoai/nanoclaw`
|
||||
2. Branch from `main`
|
||||
3. Make the code changes (new channel file, modified integration points, updated package.json, .env.example additions, etc.)
|
||||
4. Open a PR to `main`
|
||||
|
||||
The contributor opens a normal PR — they don't need to know about skill branches or marketplace repos. They just make code changes and submit.
|
||||
|
||||
### Maintainer flow
|
||||
|
||||
When a skill PR is reviewed and approved:
|
||||
|
||||
1. Create a `skill/<name>` branch from the PR's commits:
|
||||
```bash
|
||||
git fetch origin pull/<PR_NUMBER>/head:skill/<name>
|
||||
git push origin skill/<name>
|
||||
```
|
||||
2. Force-push to the contributor's PR branch, replacing it with a single commit that adds the contributor to `CONTRIBUTORS.md` (removing all code changes)
|
||||
3. Merge the slimmed PR into `main` (just the contributor addition)
|
||||
4. Add the skill's SKILL.md to the marketplace repo (`nanocoai/nanoclaw-skills`)
|
||||
|
||||
This way:
|
||||
- The contributor gets merge credit (their PR is merged)
|
||||
- They're added to CONTRIBUTORS.md automatically by the maintainer
|
||||
- The skill branch is created from their work
|
||||
- `main` stays clean (no skill code)
|
||||
- The contributor only had to do one thing: open a PR with code changes
|
||||
|
||||
**Note:** GitHub PRs from forks have "Allow edits from maintainers" checked by default, so the maintainer can push to the contributor's PR branch.
|
||||
|
||||
### Skill SKILL.md
|
||||
|
||||
The contributor can optionally provide a SKILL.md (either in the PR or separately). This goes into the marketplace repo and contains:
|
||||
|
||||
1. Frontmatter (name, description, triggers)
|
||||
2. Step 1: Merge the skill branch
|
||||
3. Steps 2-N: Interactive setup (create bot, get token, configure env vars, verify)
|
||||
|
||||
If the contributor doesn't provide a SKILL.md, the maintainer writes one based on the PR.
|
||||
|
||||
## Community Marketplaces
|
||||
|
||||
Anyone can maintain their own fork with skill branches and their own marketplace repo. This enables a community-driven skill ecosystem without requiring write access to the upstream repo.
|
||||
|
||||
### How it works
|
||||
|
||||
A community contributor:
|
||||
|
||||
1. Maintains a fork of NanoClaw (e.g., `alice/nanoclaw`)
|
||||
2. Creates `skill/*` branches on their fork with their custom skills
|
||||
3. Creates a marketplace repo (e.g., `alice/nanoclaw-skills`) with a `.claude-plugin/marketplace.json` and plugin structure
|
||||
|
||||
### Adding a community marketplace
|
||||
|
||||
If the community contributor is trusted, they can open a PR to add their marketplace to NanoClaw's `.claude/settings.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"extraKnownMarketplaces": {
|
||||
"nanoclaw-skills": {
|
||||
"source": {
|
||||
"source": "github",
|
||||
"repo": "nanocoai/nanoclaw-skills"
|
||||
}
|
||||
},
|
||||
"alice-nanoclaw-skills": {
|
||||
"source": {
|
||||
"source": "github",
|
||||
"repo": "alice/nanoclaw-skills"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Once merged, all NanoClaw users automatically discover the community marketplace alongside the official one.
|
||||
|
||||
### Installing community skills
|
||||
|
||||
`/setup` and `/customize` ask users whether they want to enable community skills. If yes, Claude installs community marketplace plugins via `claude plugin install`:
|
||||
|
||||
```bash
|
||||
claude plugin install alice-skills@alice-nanoclaw-skills --scope project
|
||||
```
|
||||
|
||||
Community skills are hot-loaded and immediately available — no restart needed. Dependent skills are only offered after their prerequisites are met (e.g., community Telegram add-ons only after Telegram is installed).
|
||||
|
||||
Users can also browse and install community plugins manually via `/plugin`.
|
||||
|
||||
### Properties of this system
|
||||
|
||||
- **No gatekeeping required.** Anyone can create skills on their fork without permission. They only need approval to be listed in the auto-discovered marketplaces.
|
||||
- **Multiple marketplaces coexist.** Users see skills from all trusted marketplaces in `/plugin`.
|
||||
- **Community skills use the same merge pattern.** The SKILL.md just points to a different remote:
|
||||
```bash
|
||||
git remote add alice https://github.com/alice/nanoclaw.git
|
||||
git fetch alice skill/my-cool-feature
|
||||
git merge alice/skill/my-cool-feature
|
||||
```
|
||||
- **Users can also add marketplaces manually.** Even without being listed in settings.json, users can run `/plugin marketplace add alice/nanoclaw-skills` to discover skills from any source.
|
||||
- **CI is per-fork.** Each community maintainer runs their own CI to keep their skill branches merged-forward. They can use the same GitHub Action as the upstream repo.
|
||||
|
||||
## Flavors
|
||||
|
||||
A flavor is a curated fork of NanoClaw — a combination of skills, custom changes, and configuration tailored for a specific use case (e.g., "NanoClaw for Sales," "NanoClaw Minimal," "NanoClaw for Developers").
|
||||
|
||||
### Creating a flavor
|
||||
|
||||
1. Fork `nanocoai/nanoclaw`
|
||||
2. Merge in the skills you want
|
||||
3. Make custom changes (trigger word, prompts, integrations, etc.)
|
||||
4. Your fork's `main` IS the flavor
|
||||
|
||||
### Installing a flavor
|
||||
|
||||
During `/setup`, users are offered a choice of flavors before any configuration happens. The setup skill reads `flavors.yaml` from the repo (shipped with upstream, always up to date) and presents options:
|
||||
|
||||
AskUserQuestion: "Start with a flavor or default NanoClaw?"
|
||||
- Default NanoClaw
|
||||
- NanoClaw for Sales — Gmail + Slack + CRM (maintained by alice)
|
||||
- NanoClaw Minimal — Telegram-only, lightweight (maintained by bob)
|
||||
|
||||
If a flavor is chosen:
|
||||
|
||||
```bash
|
||||
git remote add <flavor-name> https://github.com/alice/nanoclaw.git
|
||||
git fetch <flavor-name> main
|
||||
git merge <flavor-name>/main
|
||||
```
|
||||
|
||||
Then setup continues normally (dependencies, auth, container, service).
|
||||
|
||||
**This choice is only offered on a fresh fork** — when the user's main matches or is close to upstream's main with no local commits. If `/setup` detects significant local changes (re-running setup on an existing install), it skips the flavor selection and goes straight to configuration.
|
||||
|
||||
After installation, the user's fork has three remotes:
|
||||
- `origin` — their fork (push customizations here)
|
||||
- `upstream` — `nanocoai/nanoclaw` (core updates)
|
||||
- `<flavor-name>` — the flavor fork (flavor updates)
|
||||
|
||||
### Updating a flavor
|
||||
|
||||
```bash
|
||||
git fetch <flavor-name> main
|
||||
git merge <flavor-name>/main
|
||||
```
|
||||
|
||||
The flavor maintainer keeps their fork updated (merging upstream, updating skills). Users pull flavor updates the same way they pull core updates.
|
||||
|
||||
### Flavors registry
|
||||
|
||||
`flavors.yaml` lives in the upstream repo:
|
||||
|
||||
```yaml
|
||||
flavors:
|
||||
- name: NanoClaw for Sales
|
||||
repo: alice/nanoclaw
|
||||
description: Gmail + Slack + CRM integration, daily pipeline summaries
|
||||
maintainer: alice
|
||||
|
||||
- name: NanoClaw Minimal
|
||||
repo: bob/nanoclaw
|
||||
description: Telegram-only, no container overhead
|
||||
maintainer: bob
|
||||
```
|
||||
|
||||
Anyone can PR to add their flavor. The file is available locally when `/setup` runs since it's part of the cloned repo.
|
||||
|
||||
### Discoverability
|
||||
|
||||
- **During setup** — flavor selection is offered as part of the initial setup flow
|
||||
- **`/browse-flavors` skill** — reads `flavors.yaml` and presents options at any time
|
||||
- **GitHub topics** — flavor forks can tag themselves with `nanoclaw-flavor` for searchability
|
||||
- **Discord / website** — community-curated lists
|
||||
|
||||
## Migration
|
||||
|
||||
Migration from the old skills engine to branches is complete. All feature skills now live on `skill/*` branches, and the skills engine has been removed.
|
||||
|
||||
### Skill branches
|
||||
|
||||
| Branch | Base | Description |
|
||||
|--------|------|-------------|
|
||||
| `skill/whatsapp` | `main` | WhatsApp channel |
|
||||
| `skill/telegram` | `main` | Telegram channel |
|
||||
| `skill/slack` | `main` | Slack channel |
|
||||
| `skill/discord` | `main` | Discord channel |
|
||||
| `skill/gmail` | `main` | Gmail channel |
|
||||
| `skill/voice-transcription` | `skill/whatsapp` | OpenAI Whisper voice transcription |
|
||||
| `skill/image-vision` | `skill/whatsapp` | Image attachment processing |
|
||||
| `skill/pdf-reader` | `skill/whatsapp` | PDF attachment reading |
|
||||
| `skill/local-whisper` | `skill/voice-transcription` | Local whisper.cpp transcription |
|
||||
| `skill/ollama-tool` | `main` | Ollama MCP server for local models |
|
||||
| `skill/apple-container` | `main` | Apple Container runtime |
|
||||
| `skill/reactions` | `main` | WhatsApp emoji reactions |
|
||||
|
||||
### What was removed
|
||||
|
||||
- `skills-engine/` directory (entire engine)
|
||||
- `scripts/apply-skill.ts`, `scripts/uninstall-skill.ts`, `scripts/rebase.ts`
|
||||
- `scripts/fix-skill-drift.ts`, `scripts/validate-all-skills.ts`
|
||||
- `.github/workflows/skill-drift.yml`, `.github/workflows/skill-pr.yml`
|
||||
- All `add/`, `modify/`, `tests/`, and `manifest.yaml` from skill directories
|
||||
- `.nanoclaw/` state directory
|
||||
|
||||
Operational skills (`setup`, `debug`, `update-nanoclaw`, `customize`, `update-skills`) remain on main in `.claude/skills/`.
|
||||
|
||||
## What Changes
|
||||
|
||||
### README Quick Start
|
||||
|
||||
Before:
|
||||
```bash
|
||||
git clone https://github.com/nanocoai/NanoClaw.git
|
||||
cd NanoClaw
|
||||
claude
|
||||
```
|
||||
|
||||
After:
|
||||
```
|
||||
1. Fork nanocoai/nanoclaw on GitHub
|
||||
2. git clone https://github.com/<you>/nanoclaw.git
|
||||
3. cd nanoclaw
|
||||
4. claude
|
||||
5. /setup
|
||||
```
|
||||
|
||||
### Setup skill (`/setup`)
|
||||
|
||||
Updates to the setup flow:
|
||||
|
||||
- Check if `upstream` remote exists; if not, add it: `git remote add upstream https://github.com/nanocoai/nanoclaw.git`
|
||||
- Check if `origin` points to the user's fork (not nanocoai). If it points to nanocoai, guide them through the fork migration.
|
||||
- **Install marketplace plugin:** `claude plugin install nanoclaw-skills@nanoclaw-skills --scope project` — makes all feature skills available (hot-loaded, no restart)
|
||||
- **Ask which channels to add:** present channel options (Discord, Telegram, Slack, WhatsApp, Gmail), run corresponding `/add-*` skills for selected channels
|
||||
- **Offer dependent skills:** after a channel is set up, offer relevant add-ons (e.g., Agent Swarm after Telegram, voice transcription after WhatsApp)
|
||||
- **Optionally enable community marketplaces:** ask if the user wants community skills, install those marketplace plugins too
|
||||
|
||||
### `.claude/settings.json`
|
||||
|
||||
Marketplace configuration so the official marketplace is auto-registered:
|
||||
|
||||
```json
|
||||
{
|
||||
"extraKnownMarketplaces": {
|
||||
"nanoclaw-skills": {
|
||||
"source": {
|
||||
"source": "github",
|
||||
"repo": "nanocoai/nanoclaw-skills"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Skills directory on main
|
||||
|
||||
The `.claude/skills/` directory on `main` retains only operational skills (setup, debug, update-nanoclaw, customize, update-skills). Feature skills (add-discord, add-telegram, etc.) live in the marketplace repo, installed via `claude plugin install` during `/setup` or `/customize`.
|
||||
|
||||
### Skills engine removal
|
||||
|
||||
The following can be removed:
|
||||
|
||||
- `skills-engine/` — entire directory (apply, merge, replay, state, backup, etc.)
|
||||
- `scripts/apply-skill.ts`
|
||||
- `scripts/uninstall-skill.ts`
|
||||
- `scripts/fix-skill-drift.ts`
|
||||
- `scripts/validate-all-skills.ts`
|
||||
- `.nanoclaw/` — state directory
|
||||
- `add/` and `modify/` subdirectories from all skill directories
|
||||
- Feature skill SKILL.md files from `.claude/skills/` on main (they now live in the marketplace)
|
||||
|
||||
Operational skills (`setup`, `debug`, `update-nanoclaw`, `customize`, `update-skills`) remain on main in `.claude/skills/`.
|
||||
|
||||
### New infrastructure
|
||||
|
||||
- **Marketplace repo** (`nanocoai/nanoclaw-skills`) — single Claude Code plugin bundling SKILL.md files for all feature skills
|
||||
- **CI GitHub Action** — merge-forward `main` into all `skill/*` branches on every push to `main`, using Claude (Haiku) for conflict resolution
|
||||
- **`/update-skills` skill** — checks for and applies skill branch updates using git history
|
||||
- **`CONTRIBUTORS.md`** — tracks skill contributors
|
||||
|
||||
### Update skill (`/update-nanoclaw`)
|
||||
|
||||
The update skill gets simpler with the branch-based approach. The old skills engine required replaying all applied skills after merging core updates — that entire step disappears. Skill changes are already in the user's git history, so `git merge upstream/main` just works.
|
||||
|
||||
**What stays the same:**
|
||||
- Preflight (clean working tree, upstream remote)
|
||||
- Backup branch + tag
|
||||
- Preview (git log, git diff, file buckets)
|
||||
- Merge/cherry-pick/rebase options
|
||||
- Conflict preview (dry-run merge)
|
||||
- Conflict resolution
|
||||
- Build + test validation
|
||||
- Rollback instructions
|
||||
|
||||
**What's removed:**
|
||||
- Skill replay step (was needed by the old skills engine to re-apply skills after core update)
|
||||
- Re-running structured operations (npm deps, env vars — these are part of git history now)
|
||||
|
||||
**What's added:**
|
||||
- Optional step at the end: "Check for skill updates?" which runs the `/update-skills` logic
|
||||
- This checks whether any previously-merged skill branches have new commits (bug fixes, improvements to the skill itself — not just merge-forwards from main)
|
||||
|
||||
**Why users don't need to re-merge skills after a core update:**
|
||||
When the user merged a skill branch, those changes became part of their git history. When they later merge `upstream/main`, git performs a normal three-way merge — the skill changes in their tree are untouched, and only core changes are brought in. The merge-forward CI ensures skill branches stay compatible with latest main, but that's for new users applying the skill fresh. Existing users who already merged the skill don't need to do anything.
|
||||
|
||||
Users only need to re-merge a skill branch if the skill itself was updated (not just merged-forward with main). The `/update-skills` check detects this.
|
||||
|
||||
## Discord Announcement
|
||||
|
||||
### For existing users
|
||||
|
||||
> **Skills are now git branches**
|
||||
>
|
||||
> We've simplified how skills work in NanoClaw. Instead of a custom skills engine, skills are now git branches that you merge in.
|
||||
>
|
||||
> **What this means for you:**
|
||||
> - Applying a skill: `git fetch upstream skill/discord && git merge upstream/skill/discord`
|
||||
> - Updating core: `git fetch upstream main && git merge upstream/main`
|
||||
> - Checking for skill updates: `/update-skills`
|
||||
> - No more `.nanoclaw/` state directory or skills engine
|
||||
>
|
||||
> **We now recommend forking instead of cloning.** This gives you a remote to push your customizations to.
|
||||
>
|
||||
> **If you currently have a clone with local changes**, migrate to a fork:
|
||||
> 1. Fork `nanocoai/nanoclaw` on GitHub
|
||||
> 2. Run:
|
||||
> ```
|
||||
> git remote rename origin upstream
|
||||
> git remote add origin https://github.com/<you>/nanoclaw.git
|
||||
> git push --force origin main
|
||||
> ```
|
||||
> This works even if you're way behind — just push your current state.
|
||||
>
|
||||
> **If you previously applied skills via the old system**, your code changes are already in your working tree — nothing to redo. You can delete the `.nanoclaw/` directory. Future skills and updates use the branch-based approach.
|
||||
>
|
||||
> **Discovering skills:** Skills are now available through Claude Code's plugin marketplace. Run `/plugin` in Claude Code to browse and install available skills.
|
||||
|
||||
### For skill contributors
|
||||
|
||||
> **Contributing skills**
|
||||
>
|
||||
> To contribute a skill:
|
||||
> 1. Fork `nanocoai/nanoclaw`
|
||||
> 2. Branch from `main` and make your code changes
|
||||
> 3. Open a regular PR
|
||||
>
|
||||
> That's it. We'll create a `skill/<name>` branch from your PR, add you to CONTRIBUTORS.md, and add the SKILL.md to the marketplace. CI automatically keeps skill branches merged-forward with `main` using Claude to resolve any conflicts.
|
||||
>
|
||||
> **Want to run your own skill marketplace?** Maintain skill branches on your fork and create a marketplace repo. Open a PR to add it to NanoClaw's auto-discovered marketplaces — or users can add it manually via `/plugin marketplace add`.
|
||||
@@ -0,0 +1,152 @@
|
||||
# The skills model
|
||||
|
||||
How NanoClaw stays customizable without breaking its forks. This is the full version; [customizing.md](customizing.md) is the short one, and [skill-guidelines.md](skill-guidelines.md) is the authoritative checklist for writing a skill.
|
||||
|
||||
## The problem
|
||||
|
||||
People fork NanoClaw and change the code. When we ship updates, their changes collide with ours and `git merge` turns into a fight. The more someone customized, the worse it gets. We can't grow the core without breaking everyone downstream.
|
||||
|
||||
## The bet
|
||||
|
||||
Every customization is a skill: not an edit buried in the core, but a skill that adds the change on top.
|
||||
|
||||
The core stays small and stable. Everything else composes on top as skills. Adding your 1st skill and your 500th skill is the same amount of work.
|
||||
|
||||
This works for any fork: a personal install with three tweaks, a company build with fifty.
|
||||
|
||||
## A fork is a recipe of skills
|
||||
|
||||
You don't track your changes as a pile of edits. You track them as skills.
|
||||
|
||||
- Each customization = one small skill.
|
||||
- One "recipe" skill lists all your skills and how they fit together: the order, and any dependencies between them.
|
||||
|
||||
So a fork is defined by its recipe. Most upgrades don't need to run it (see "Upgrading"), but it's what lets you rebuild the fork from scratch on clean upstream, and it's how you hand your whole fork to someone else. It replaces every "what did I change" artifact you'd otherwise keep (a migration guide, a manifest, a pile of notes) with one runnable thing.
|
||||
|
||||
The recipe is the one fork-specific thing. It lives in your fork, never upstream. (A recipe is itself a skill: a SKILL.md listing the fork's skills in apply order.)
|
||||
|
||||
The exception is a **published recipe**: a composition worth sharing whole can be contributed upstream as one reviewable bundle under `.claude/skills/recipes/<name>/` — the recipe SKILL.md on top, its component skills inside it under `skills/<component>/`, each a complete skill in its own right (SKILL.md, REMOVE.md, a `files.txt` manifest, and a generated `files/` mirror) held to the same guidelines as any standalone skill. The recipe's own stack tests ride along the same way. Fork-private recipes remain the default; publishing is for the rare composition that is itself the product, like the PR Factory.
|
||||
|
||||
## What's in a skill
|
||||
|
||||
A skill carries everything it needs:
|
||||
|
||||
- **Its code**: the files it adds (see "Where a skill's files live").
|
||||
- **Apply and remove.** Apply installs it; remove uninstalls it. Uninstall isn't a separate problem; it ships with the skill. (Remove is required exactly when apply leaves anything behind. A pure instruction-only skill that changes nothing needs none.)
|
||||
- **Its tests**: see "A test for every integration point." The tests *are* the verification. If they pass against the composed project, the skill applied correctly and works; there is no separate "verify" step.
|
||||
- **Its recipe entry**: how it composes with the others.
|
||||
|
||||
Apply must be safe to re-run. Upgrades re-run skills, so a skill that half-applies twice is a bug.
|
||||
|
||||
## Two kinds of skills
|
||||
|
||||
- **Capability skills** add something new: a channel, a provider, a tool, a dashboard.
|
||||
- **Patch skills** make small tweaks or bug fixes to existing behavior, instead of adding a capability.
|
||||
|
||||
Patch skills follow the same rules: a test for every edit, and code pushed into independent files wherever possible instead of inline. To keep the overhead down, bundle several small patches into a single patch skill rather than making one skill per one-line fix.
|
||||
|
||||
One honest exception: a bug fix that genuinely changes an existing line can't always be moved into a new file. That single line is the one place an upgrade can still hard-conflict. If upstream touched the same line, the fix has to be re-derived against the new code. That's fine when it's small and tested; just don't pretend it's free.
|
||||
|
||||
(Packaging is a separate axis: some skills fetch code from a registry branch, some ship files in their own folder, some are pure instructions.)
|
||||
|
||||
## What makes a good skill
|
||||
|
||||
A good skill mostly just *adds* things:
|
||||
|
||||
- Adds new files.
|
||||
- Adds a line to an existing file (an import, an entry, a line in `.env`).
|
||||
- Adds a dependency.
|
||||
- Changes a value in a JSON file like `package.json`.
|
||||
|
||||
These never really break.
|
||||
|
||||
The one risky move is when a skill has to *reach into* existing code and wire something in at a specific spot. That's the only part that breaks when we change the code later. Keep these rare, and keep them to a line or two that just *calls* code living in the skill's own files, not big chunks of logic inline.
|
||||
|
||||
Rule of thumb: aim for skills that are almost all "adds." Not 100%; some reach-ins are fine. But a skill full of reach-ins is a smell, and a sign that spot in the core should become a proper hook.
|
||||
|
||||
## Where a skill's files live
|
||||
|
||||
The files a skill adds live in the skill's own folder, and the skill copies them into the project when it runs. The skill is self-contained.
|
||||
|
||||
The exception is skills that plug into a registry: channels and providers. Their code is larger, multi-file, and has to stay in sync with the core as it changes over time. That code lives on a long-lived **registry branch** (`channels`, `providers`) that we forward-merge against main, and the skill fetches it from there (`git show origin/channels:path > path`). A frozen copy in a skill folder would go stale.
|
||||
|
||||
This fetch is **additive, never a merge**. The skill copies in the files it needs; it does *not* `git merge` the branch. Merging a registry branch into a customized install is exactly the conflict fight this model exists to avoid. A skill's **tests live on the branch alongside its code** and are fetched the same way; a channel's adapter travels with its registration test. A provider is the multi-point case: its code spans the host *and* container trees plus a Dockerfile edit, so it fetches files into both trees and ships a registration test per tree. See the provider archetype in [skill-guidelines.md](skill-guidelines.md).
|
||||
|
||||
Either way the skill brings its own code, from its folder or from its branch.
|
||||
|
||||
## A test for every integration point
|
||||
|
||||
The tests a skill *must* ship are the ones that prove it integrates with the core and keeps working as the core changes. That's the whole point. Tests of a skill's own internal logic, or of its behavior against an external service, are fine but optional: the creator's call, because they don't guard against upstream changes. A pure-add skill that touches nothing existing needs no required integration test at all.
|
||||
|
||||
The places that break on upgrade are the **integration points**: wherever a skill reaches into the existing system. That's not just the obvious code edit. An appended import, a config entry, a Dockerfile change, a mount, an installed dependency, and a direct read of the core's data all count. Each gets a guard that goes **red if it breaks or goes missing**:
|
||||
|
||||
- **A behavior or structural test of the wiring.** Prefer behavior when the seam is queryable at runtime: a channel's registration test imports the real barrel and asserts the registry contains it. Fall back to a structural test only for wiring with no invocable seam.
|
||||
- **The build / typecheck.** Always on. It catches the drift a runtime test can't: a renamed symbol, a moved module, a changed signature.
|
||||
- **Coverage of how an added file consumes the core.** When a skill's own file reaches into core APIs or data, a test must exercise that consumption against the *real* core. That's the leg that catches core drift.
|
||||
|
||||
Why points and not whole skills: a skill can have several, and each is a separate way to break. The count is honest signal: a skill's integration points are exactly its upgrade risk. Pure-add skills have zero and stay cheap.
|
||||
|
||||
This is what makes upgrades cheap to fix: when we move something in the core, the integration-point tests are exactly what fail, and that failing list *is* the set of skills to update.
|
||||
|
||||
**Tests travel with the skill.** They're files kept with the skill, in its folder or on its branch, and applying the skill copies them into the project's test tree. An integration-point test has to run against the *composed* system, so it only means anything once the skill is applied.
|
||||
|
||||
**The recipe tests the stack.** A single skill's tests prove that skill works alone. The recipe carries tests that run the skills *together*, in order. That's where you catch two skills that collide.
|
||||
|
||||
The full testing doctrine (how to pick the test type per point, the archetypes, the dependency cases) is in [skill-guidelines.md](skill-guidelines.md).
|
||||
|
||||
## How you actually work
|
||||
|
||||
You don't have to write a skill before you touch anything. Edit the code directly, get it working, then turn those edits into skills afterward; a coding agent does that conversion. Good authoring guidelines and a good recipe make skillifying-after-the-fact close to trivial.
|
||||
|
||||
The point isn't to slow you down at edit time. It's that nothing counts as part of your fork until it's a skill, because that's the only form that survives an upgrade.
|
||||
|
||||
## Upgrading
|
||||
|
||||
**Every update goes through `/update-nanoclaw`, never a raw `git pull`.** You don't know what an update contains until it lands; it might carry a breaking change with a migration. So the command inspects what's coming and runs the proper process: back up, pull the changes in, apply migrations, run tests, fix what broke, and flag when a fresh rebuild is needed instead.
|
||||
|
||||
Two different moves, two different rules. Your **fork pulls trunk**: that's a normal pull, run by the update command, and it's safe precisely because your changes live beside the core as skills rather than inside it. A **skill never merges**: it installs by fetching files and copying them in. If a skill's instructions say `git merge`, it isn't built to this model.
|
||||
|
||||
The update takes one of two paths:
|
||||
|
||||
**Normal upgrade: pull and fix what breaks.** Most of the time it pulls the latest upstream, resolves the occasional small conflict, runs the tests, and fixes whatever they flag. This stays cheap *because* the changes are small self-contained skills with tests: conflicts are rare, and when something does break, the failing test points at the exact skill and the fix is local.
|
||||
|
||||
**Rebuild from the recipe: the rare path.** Take fresh upstream and apply every skill from scratch. The command flags this when you've fallen far behind across many breaking changes (a clean rebuild beats catching up step by step). It's also how you hand your entire fork to someone else.
|
||||
|
||||
Around both:
|
||||
|
||||
- **The update skill updates itself first.** The first thing it does is fetch the latest version of the upgrade process. Otherwise you're upgrading with stale instructions.
|
||||
- **Snapshot first, restore on failure.** The upgrade sets a rollback point before it starts: today a git backup branch and tag; the model calls for a full project snapshot (code, database, data, files) so anything that fails rolls back and retries. Until that snapshot lands, a migration that touches data makes its own data backup. Nothing in the upgrade needs its own undo logic.
|
||||
- **Broken skills don't block you.** If a core change broke a skill, its test tells you, but the skill is usually still usable, and an agent fixes it at apply time. Skills are fixed lazily, when applied, not ahead of time for every core version.
|
||||
|
||||
## Migrations
|
||||
|
||||
Migrations are core, not an afterthought. Every breaking change ships with its migration, packaged together. A "migration" is broad: upgrading dependencies, a database change, a data backfill, moving files to new locations, whatever the change requires.
|
||||
|
||||
Migrations are **forward-only**. They don't need reverse scripts; the rollback point in front of the upgrade is the undo. If one fails, restore and retry.
|
||||
|
||||
A **startup tripwire** keeps installs on the supported path. Every sanctioned update path (install, update, migrate) stamps a marker with the version it reached; at startup the host checks that marker against the running code. If it's missing or doesn't match, because someone pulled by hand, the host stops, loudly, with the exact command to fix it instead of silently breaking.
|
||||
|
||||
The tripwire doesn't reason about *which* changes are breaking; it just enforces that the path was used. (DB schema migrations already run automatically at startup, so they aren't its concern; it guards everything else a raw `git pull` leaves undone.) To override, you stamp the marker yourself: an explicit "I know what I'm doing," not a deletion. If you have your **own** upgrade flow (a deploy script, a CI job), make stamping the last step after it succeeds: `pnpm exec tsx scripts/upgrade-state.ts set`. See [upgrade-recovery.md](upgrade-recovery.md).
|
||||
|
||||
## The maintainer's side of the deal
|
||||
|
||||
This is a two-sided contract. Users keep their changes as skills. In return, the maintainer keeps the core stable and owns the breakage.
|
||||
|
||||
As maintainer:
|
||||
|
||||
1. **Keep the core small and stable.** Resist hardwiring features into the core. Push them to skills too.
|
||||
2. **Before shipping a core change, run the skills against it.** That tells you what you broke before users find out.
|
||||
3. **When you break a skill, you fix it, not the users.** If a refactor moves something, update the affected skills or ship a migration. Don't make every user rediscover the same fix.
|
||||
4. **Ship the migration with the breaking change.** Packaged together: code, DB, files. Not a separate "good luck" note.
|
||||
5. **Watch for hotspots.** When lots of skills reach into the same spot in the core, that's the signal to add a proper hook there, so those reach-ins become clean adds.
|
||||
6. **Test against real forks.** Every core change and migration runs against a fleet of real, skill-built forks before shipping. Real proof on real installs.
|
||||
|
||||
## The public registry
|
||||
|
||||
Skills will be shared and composed; that's the whole point. A skill runs real code when it applies (copies files, installs dependencies, edits the Dockerfile). So a public registry of skills is a trust surface.
|
||||
|
||||
The rule: **every skill is reviewed and approved before it goes into the public registry, and every new version is re-reviewed.** Approving once and trusting forever is how supply chains get poisoned. Automated checks (linting against the guidelines, plus a harness that applies the skill on fresh upstream, runs its tests, removes it, and applies it twice) will clear the mechanical part so human review can focus on intent and safety. First-party skills are trusted by where they come from; the gate is for the public registry.
|
||||
|
||||
## The promise
|
||||
|
||||
Build your changes as skills following this, and we won't break you. It's a promise we can only make for skills: changes edited directly into the core are beyond what we can protect.
|
||||
@@ -0,0 +1,51 @@
|
||||
# Recovering from the upgrade tripwire
|
||||
|
||||
If NanoClaw refuses to start with a message like *"update did not go through the supported path"*, this page explains what happened and how to clear it.
|
||||
|
||||
## What happened
|
||||
|
||||
NanoClaw records the version it reached each time you upgrade through a supported path — `/setup`, `/update-nanoclaw`, or `/migrate-nanoclaw`. That record lives in `data/upgrade-state.json`.
|
||||
|
||||
At startup the host checks that record against the running code. If it's **missing** or its version **doesn't match** the code, the host stops. This almost always means the code was updated by a raw `git pull` instead of the supported flow — so migrations, dependency installs, or container rebuilds that the flow would have run may not have happened.
|
||||
|
||||
## If you just ran a supported upgrade
|
||||
|
||||
If you reached this point by running `/update-nanoclaw`, `/migrate-nanoclaw`, or `/setup` and it **completed successfully**, this is expected the first time an existing install meets the tripwire (your previous version predated it). Clear it by stamping the current version:
|
||||
|
||||
```bash
|
||||
pnpm exec tsx scripts/upgrade-state.ts set
|
||||
```
|
||||
|
||||
Then restart NanoClaw. It won't trip again.
|
||||
|
||||
**Coding agents:** if the upgrade you just ran reported success, run the command above automatically and restart. Only stop to ask a human if the upgrade itself failed or you can't confirm it finished.
|
||||
|
||||
## If you ran `git pull` yourself
|
||||
|
||||
Don't just clear the tripwire — that skips the work the supported flow does. Instead, run the supported upgrade so backups, migrations, and rebuilds happen:
|
||||
|
||||
```
|
||||
/update-nanoclaw
|
||||
```
|
||||
|
||||
Once it finishes it stamps the marker for you, and the next start is clean.
|
||||
|
||||
## If you have your own upgrade flow
|
||||
|
||||
If you've built your own way to upgrade — a custom skill, a deploy script, a CI job, a service that pulls and restarts — it won't stamp the marker, so the host will trip on the next start. Add the stamp as the **last step** of that flow, after the upgrade succeeds and before the restart:
|
||||
|
||||
```bash
|
||||
pnpm exec tsx scripts/upgrade-state.ts set
|
||||
```
|
||||
|
||||
That's the same thing `/setup`, `/update-nanoclaw`, and `/migrate-nanoclaw` do at the end. Do it only when the upgrade actually completed — the marker is your assertion that this install reached the current version through a path you trust.
|
||||
|
||||
## The override
|
||||
|
||||
`pnpm exec tsx scripts/upgrade-state.ts set` is the override: it declares "this install is good at the current version." Use it when you know the install is actually in a good state (e.g. you completed the steps manually). It's safe to re-run.
|
||||
|
||||
To inspect the current marker:
|
||||
|
||||
```bash
|
||||
pnpm exec tsx scripts/upgrade-state.ts get
|
||||
```
|
||||
+38
@@ -25,6 +25,44 @@ set -euo pipefail
|
||||
PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
# ─── --uninstall: short-circuit before any setup work ──────────────────
|
||||
# Never install dependencies just to uninstall. With the TS toolchain
|
||||
# present, hand straight off to setup:auto (the flow lives in
|
||||
# setup/uninstall/); without it, print manual cleanup guidance. Runs
|
||||
# before diagnostics.sh is sourced so a pure uninstall doesn't emit
|
||||
# setup_launched, and before all pre-flights/bootstrap.
|
||||
for arg in "$@"; do
|
||||
if [ "$arg" = "--uninstall" ]; then
|
||||
# exec tsx directly rather than `pnpm run -- …`: pnpm passes the `--`
|
||||
# separator through to the script, where the flag parser treats
|
||||
# everything after it as positional args and the flags get dropped.
|
||||
# Gate on node (tsx's shebang interpreter) — pnpm isn't used here.
|
||||
if command -v node >/dev/null 2>&1 && [ -x "$PROJECT_ROOT/node_modules/.bin/tsx" ]; then
|
||||
exec "$PROJECT_ROOT/node_modules/.bin/tsx" "$PROJECT_ROOT/setup/auto.ts" "$@"
|
||||
fi
|
||||
export NANOCLAW_PROJECT_ROOT="$PROJECT_ROOT"
|
||||
# shellcheck source=setup/lib/install-slug.sh
|
||||
source "$PROJECT_ROOT/setup/lib/install-slug.sh"
|
||||
UNINSTALL_RUNTIME="${CONTAINER_RUNTIME:-docker}"
|
||||
echo "Can't run the uninstaller: dependencies are missing (node_modules/)."
|
||||
echo "Either re-run 'bash nanoclaw.sh' once to restore them, or clean up manually:"
|
||||
echo ""
|
||||
if [ "$(uname -s)" = "Darwin" ]; then
|
||||
echo " launchctl unload ~/Library/LaunchAgents/$(launchd_label).plist"
|
||||
echo " rm -f ~/Library/LaunchAgents/$(launchd_label).plist"
|
||||
else
|
||||
echo " systemctl --user disable --now $(systemd_unit).service"
|
||||
echo " rm -f ~/.config/systemd/user/$(systemd_unit).service && systemctl --user daemon-reload"
|
||||
fi
|
||||
echo " $UNINSTALL_RUNTIME ps -aq --filter label=nanoclaw-install=$(_nanoclaw_install_slug) | xargs -r $UNINSTALL_RUNTIME rm -f"
|
||||
echo " $UNINSTALL_RUNTIME rmi $(container_image_base):latest"
|
||||
echo " rm -f ~/.local/bin/ncl # only if it points at this folder"
|
||||
echo ""
|
||||
echo "Then back up $PROJECT_ROOT/.env if you need the keys, and delete the folder."
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
LOGS_DIR="$PROJECT_ROOT/logs"
|
||||
STEPS_DIR="$LOGS_DIR/setup-steps"
|
||||
PROGRESS_LOG="$LOGS_DIR/setup.log"
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "nanoclaw",
|
||||
"version": "2.0.76",
|
||||
"version": "2.1.11",
|
||||
"description": "Personal Claude assistant. Lightweight, secure, customizable.",
|
||||
"type": "module",
|
||||
"packageManager": "pnpm@10.33.0",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="90" height="20" role="img" aria-label="181k tokens, 91% of context window">
|
||||
<title>181k tokens, 91% of context window</title>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="90" height="20" role="img" aria-label="190k tokens, 95% of context window">
|
||||
<title>190k tokens, 95% of context window</title>
|
||||
<linearGradient id="s" x2="0" y2="100%">
|
||||
<stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
|
||||
<stop offset="1" stop-opacity=".1"/>
|
||||
@@ -15,8 +15,8 @@
|
||||
<g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" font-size="11">
|
||||
<text aria-hidden="true" x="26" y="15" fill="#010101" fill-opacity=".3">tokens</text>
|
||||
<text x="26" y="14">tokens</text>
|
||||
<text aria-hidden="true" x="71" y="15" fill="#010101" fill-opacity=".3">181k</text>
|
||||
<text x="71" y="14">181k</text>
|
||||
<text aria-hidden="true" x="71" y="15" fill="#010101" fill-opacity=".3">190k</text>
|
||||
<text x="71" y="14">190k</text>
|
||||
</g>
|
||||
</g>
|
||||
</a>
|
||||
|
||||
|
Before Width: | Height: | Size: 1.1 KiB After Width: | Height: | Size: 1.1 KiB |
@@ -0,0 +1,26 @@
|
||||
/**
|
||||
* scripts/upgrade-state.ts — read or stamp the upgrade marker.
|
||||
*
|
||||
* Usage:
|
||||
* pnpm exec tsx scripts/upgrade-state.ts get
|
||||
* pnpm exec tsx scripts/upgrade-state.ts set [version] [via]
|
||||
*
|
||||
* `set` with no version stamps the current package.json version. The
|
||||
* sanctioned upgrade paths (setup / update / migrate) call `set` on
|
||||
* success; running it by hand is also the documented way to clear the
|
||||
* startup tripwire — see docs/upgrade-recovery.md.
|
||||
*/
|
||||
import { getCodeVersion, markerPath, readUpgradeState, writeUpgradeState } from '../src/upgrade-state.js';
|
||||
|
||||
const [, , cmd, versionArg, viaArg] = process.argv;
|
||||
|
||||
if (cmd === 'get') {
|
||||
const state = readUpgradeState();
|
||||
console.log(state ? JSON.stringify(state) : 'none');
|
||||
} else if (cmd === 'set') {
|
||||
const state = writeUpgradeState({ version: versionArg || getCodeVersion(), via: viaArg || 'manual' });
|
||||
console.log(`Stamped ${markerPath()}: ${JSON.stringify(state)}`);
|
||||
} else {
|
||||
console.error('Usage: pnpm exec tsx scripts/upgrade-state.ts get | set [version] [via]');
|
||||
process.exit(2);
|
||||
}
|
||||
@@ -48,6 +48,8 @@ import {
|
||||
} from './lib/setup-config-parse.js';
|
||||
import { runAdvancedScreen } from './lib/setup-config-screen.js';
|
||||
import { runWindowedStep } from './lib/windowed-runner.js';
|
||||
import { runUninstallFlow } from './uninstall/flow.js';
|
||||
import { detectExistingInstall } from './uninstall/scan.js';
|
||||
import { detectRegisteredGroups, detectExistingDisplayName } from './environment.js';
|
||||
import { pollHealth } from './onecli.js';
|
||||
import { getLaunchdLabel, getSystemdUnit } from '../src/install-slug.js';
|
||||
@@ -88,6 +90,17 @@ async function main(): Promise<void> {
|
||||
let configValues = { ...readFromEnv(), ...flagResult.values };
|
||||
applyToEnv(configValues);
|
||||
|
||||
// --uninstall routes to the uninstall flow before any setup side effects —
|
||||
// in particular before initProgressionLog(), so an uninstall never resets
|
||||
// logs/setup.log on its way to (possibly) deleting logs/ entirely.
|
||||
if (configValues.uninstall === true) {
|
||||
await runUninstallFlow({
|
||||
dryRun: configValues.dryRun === true,
|
||||
yes: configValues.yes === true,
|
||||
invokedFrom: 'flag',
|
||||
});
|
||||
}
|
||||
|
||||
printIntro();
|
||||
initProgressionLog();
|
||||
phEmit('auto_started');
|
||||
@@ -121,6 +134,37 @@ async function main(): Promise<void> {
|
||||
.filter(Boolean),
|
||||
);
|
||||
|
||||
// Offer removal when setup lands on an existing install. Skipped on every
|
||||
// resume path — both the fail() retry and the sg-docker re-exec pass
|
||||
// NANOCLAW_SKIP (and the latter sets NANOCLAW_REEXEC_SG) — so the prompt
|
||||
// appears at most once per fresh run.
|
||||
const isResume = process.env.NANOCLAW_REEXEC_SG === '1' || skip.size > 0;
|
||||
if (!isResume && detectExistingInstall(process.cwd())) {
|
||||
const action = ensureAnswer(
|
||||
await brightSelect<'keep' | 'uninstall'>({
|
||||
message: 'NanoClaw is already installed in this folder. What would you like to do?',
|
||||
options: [
|
||||
{
|
||||
value: 'keep',
|
||||
label: 'Keep it & continue setup',
|
||||
hint: 'recommended — re-running setup is safe',
|
||||
},
|
||||
{
|
||||
value: 'uninstall',
|
||||
label: 'Uninstall NanoClaw & exit',
|
||||
hint: 'removes service, data, and agent files — asks before each step',
|
||||
},
|
||||
],
|
||||
initialValue: 'keep',
|
||||
}),
|
||||
) as 'keep' | 'uninstall';
|
||||
setupLog.userInput('existing_install', action);
|
||||
phEmit('existing_install_detected', { action });
|
||||
if (action === 'uninstall') {
|
||||
await runUninstallFlow({ dryRun: false, yes: false, invokedFrom: 'setup-detection' });
|
||||
}
|
||||
}
|
||||
|
||||
if (!skip.has('environment')) {
|
||||
const res = await runQuietStep('environment', {
|
||||
running: 'Checking your system…',
|
||||
|
||||
+54
-47
@@ -11,9 +11,17 @@
|
||||
* 1. Build a handoff prompt from the caller's context: channel, current
|
||||
* step, completed steps, collected values (secrets redacted), relevant
|
||||
* files to read.
|
||||
* 2. Spawn `claude --append-system-prompt "<context>"
|
||||
* --permission-mode acceptEdits` with `stdio: 'inherit'` so Claude owns
|
||||
* the terminal.
|
||||
* 2. Spawn `claude "<prompt>" --permission-mode auto` with
|
||||
* `stdio: 'inherit'` so Claude owns the terminal. The positional prompt
|
||||
* is auto-submitted as the first user message, so Claude starts
|
||||
* orienting immediately instead of sitting at an empty prompt — and the
|
||||
* context stays visible in the transcript and survives `--resume`,
|
||||
* which an --append-system-prompt would not.
|
||||
* 2a. All handoffs in one setup run share a single session: the first
|
||||
* spawn pins a generated UUID via `--session-id`, later spawns pass
|
||||
* `--resume <uuid>` so Claude keeps the context of earlier handoffs.
|
||||
* (stdio is inherited, so we can't *read* the session id Claude picks —
|
||||
* pinning our own is the only way to find the session again.)
|
||||
* 3. When Claude exits (user types /exit, Ctrl-D, or closes the session),
|
||||
* control returns to the setup driver. The driver can then re-offer the
|
||||
* same step (e.g., "How did that go?" select).
|
||||
@@ -23,6 +31,7 @@
|
||||
* attempting to parse it as a real answer.
|
||||
*/
|
||||
import { execSync, spawn } from 'child_process';
|
||||
import { randomUUID } from 'crypto';
|
||||
import path from 'path';
|
||||
|
||||
import * as p from '@clack/prompts';
|
||||
@@ -61,8 +70,8 @@ export interface HandoffContext {
|
||||
}
|
||||
|
||||
/**
|
||||
* Spawn interactive Claude with context pre-loaded as a system-prompt
|
||||
* append. Returns when Claude exits.
|
||||
* Spawn interactive Claude with the handoff context as an auto-submitted
|
||||
* first prompt. Returns when Claude exits.
|
||||
*
|
||||
* Silently no-ops (returns `false`) if `claude` isn't on PATH — setup runs
|
||||
* where the binary is guaranteed to exist (we install it in the auth step),
|
||||
@@ -78,8 +87,6 @@ export async function offerClaudeHandoff(ctx: HandoffContext): Promise<boolean>
|
||||
return false;
|
||||
}
|
||||
|
||||
const systemPrompt = buildSystemPrompt(ctx);
|
||||
|
||||
note(
|
||||
[
|
||||
"I'm handing you off to Claude in interactive mode.",
|
||||
@@ -90,18 +97,39 @@ export async function offerClaudeHandoff(ctx: HandoffContext): Promise<boolean>
|
||||
'Handing off to Claude',
|
||||
);
|
||||
|
||||
return spawnInteractiveClaude(buildHandoffPrompt(ctx));
|
||||
}
|
||||
|
||||
// One session shared by every interactive handoff in this setup-driver
|
||||
// process. We pin the id ourselves (--session-id) on the first spawn because
|
||||
// stdio is inherited and Claude's own id is never visible to us; subsequent
|
||||
// spawns --resume it so Claude remembers earlier handoffs. Separate from
|
||||
// claude-assist's non-interactive session — the two formats don't mix.
|
||||
const handoffSessionId = randomUUID();
|
||||
let handoffSessionStarted = false;
|
||||
|
||||
/**
|
||||
* Spawn interactive Claude with the handoff context auto-submitted as the
|
||||
* first user message. Resolves when Claude exits and control returns to
|
||||
* the setup driver.
|
||||
*/
|
||||
function spawnInteractiveClaude(prompt: string): Promise<boolean> {
|
||||
const sessionArgs = handoffSessionStarted
|
||||
? ['--resume', handoffSessionId]
|
||||
: ['--session-id', handoffSessionId];
|
||||
return new Promise<boolean>((resolve) => {
|
||||
const child = spawn(
|
||||
'claude',
|
||||
[
|
||||
'--append-system-prompt',
|
||||
systemPrompt,
|
||||
prompt,
|
||||
'--permission-mode',
|
||||
'acceptEdits',
|
||||
'auto',
|
||||
...sessionArgs,
|
||||
],
|
||||
{ stdio: 'inherit' },
|
||||
);
|
||||
child.on('close', () => {
|
||||
handoffSessionStarted = true;
|
||||
p.log.success(brandBody("Back from Claude. Let's continue."));
|
||||
resolve(true);
|
||||
});
|
||||
@@ -164,20 +192,20 @@ function isClaudeUsable(): boolean {
|
||||
}
|
||||
}
|
||||
|
||||
function buildSystemPrompt(ctx: HandoffContext): string {
|
||||
function buildHandoffPrompt(ctx: HandoffContext): string {
|
||||
const lines: string[] = [
|
||||
`The user is running NanoClaw's interactive \`setup:auto\` flow to wire the ${ctx.channel} channel.`,
|
||||
`They got stuck at the step: "${ctx.step}" (${ctx.stepDescription}) and asked for help.`,
|
||||
`I'm running NanoClaw's interactive \`setup:auto\` flow to wire the ${ctx.channel} channel`,
|
||||
`and got stuck at the step: "${ctx.step}" (${ctx.stepDescription}).`,
|
||||
'',
|
||||
"Your job: help them complete this specific step and get back to setup.",
|
||||
"You can read files, run commands (with acceptEdits permissions), search the web,",
|
||||
"and explain concepts. Be concise. When they're ready to resume, tell them to type",
|
||||
"/exit and they'll return to the setup flow at the same step.",
|
||||
'Help me complete this specific step and get back to setup.',
|
||||
'You can read files, run commands, search the web,',
|
||||
"and explain concepts. Be concise. When I'm ready to resume, remind me to type",
|
||||
"/exit and I'll return to the setup flow at the same step.",
|
||||
'',
|
||||
];
|
||||
|
||||
if (ctx.completedSteps && ctx.completedSteps.length > 0) {
|
||||
lines.push('Steps they have already completed:');
|
||||
lines.push("Steps I've already completed:");
|
||||
for (const s of ctx.completedSteps) lines.push(` ✓ ${s}`);
|
||||
lines.push('');
|
||||
}
|
||||
@@ -243,8 +271,6 @@ async function offerFailureHandoff(
|
||||
);
|
||||
if (!want) return false;
|
||||
|
||||
const systemPrompt = buildFailureSystemPrompt(ctx, projectRoot);
|
||||
|
||||
note(
|
||||
[
|
||||
"Launching Claude to help debug this failure.",
|
||||
@@ -255,29 +281,10 @@ async function offerFailureHandoff(
|
||||
'Handing off to Claude',
|
||||
);
|
||||
|
||||
return new Promise<boolean>((resolve) => {
|
||||
const child = spawn(
|
||||
'claude',
|
||||
[
|
||||
'--append-system-prompt',
|
||||
systemPrompt,
|
||||
'--permission-mode',
|
||||
'acceptEdits',
|
||||
],
|
||||
{ stdio: 'inherit' },
|
||||
);
|
||||
child.on('close', () => {
|
||||
p.log.success(brandBody("Back from Claude. Let's continue."));
|
||||
resolve(true);
|
||||
});
|
||||
child.on('error', () => {
|
||||
p.log.error("Couldn't launch Claude. Continuing without handoff.");
|
||||
resolve(false);
|
||||
});
|
||||
});
|
||||
return spawnInteractiveClaude(buildFailurePrompt(ctx, projectRoot));
|
||||
}
|
||||
|
||||
function buildFailureSystemPrompt(ctx: AssistContext, projectRoot: string): string {
|
||||
function buildFailurePrompt(ctx: AssistContext, projectRoot: string): string {
|
||||
const stepRefs = STEP_FILES[ctx.stepName] ?? [];
|
||||
const references = [
|
||||
...BIG_PICTURE_FILES,
|
||||
@@ -289,20 +296,20 @@ function buildFailureSystemPrompt(ctx: AssistContext, projectRoot: string): stri
|
||||
].filter((v, i, a) => a.indexOf(v) === i);
|
||||
|
||||
const lines: string[] = [
|
||||
"The user is running NanoClaw's interactive setup flow and hit a failure.",
|
||||
"I'm running NanoClaw's interactive setup flow and hit a failure.",
|
||||
'',
|
||||
`Failed step: ${ctx.stepName}`,
|
||||
`Error: ${ctx.msg}`,
|
||||
];
|
||||
|
||||
if (ctx.hint) lines.push(`Hint: ${ctx.hint}`);
|
||||
if (ctx.hint) lines.push(`Hint shown to me: ${ctx.hint}`);
|
||||
|
||||
lines.push(
|
||||
'',
|
||||
'Your job: help them diagnose and fix this issue. Read the referenced files',
|
||||
'and logs to understand what went wrong, then help them fix it. You can read',
|
||||
'files, run commands, check logs, and explain what happened. Be concise.',
|
||||
"When they're ready to resume setup, tell them to type /exit.",
|
||||
'Help me diagnose and fix this issue. Read the referenced files and logs',
|
||||
'to understand what went wrong, then help me fix it. You can read files,',
|
||||
'run commands, check logs, and explain what happened. Be concise.',
|
||||
"When I'm ready to resume setup, remind me to type /exit.",
|
||||
'',
|
||||
'Relevant files (read as needed with the Read tool):',
|
||||
);
|
||||
|
||||
@@ -16,7 +16,13 @@ const INSTALL_ID_PATH = path.join('data', 'install-id');
|
||||
|
||||
let cached: string | null = null;
|
||||
|
||||
export function installId(): string {
|
||||
/**
|
||||
* `persist: false` reads an existing id but never creates `data/install-id`
|
||||
* — required by the uninstall path, which must not mutate the filesystem
|
||||
* before (or instead of) removing it. Events in one process still join:
|
||||
* the generated id is cached.
|
||||
*/
|
||||
export function installId(persist = true): string {
|
||||
if (cached) return cached;
|
||||
try {
|
||||
const existing = fs.readFileSync(INSTALL_ID_PATH, 'utf-8').trim();
|
||||
@@ -28,11 +34,13 @@ export function installId(): string {
|
||||
// fall through to create
|
||||
}
|
||||
const id = randomUUID().toLowerCase();
|
||||
try {
|
||||
fs.mkdirSync(path.dirname(INSTALL_ID_PATH), { recursive: true });
|
||||
fs.writeFileSync(INSTALL_ID_PATH, id);
|
||||
} catch {
|
||||
// best-effort; still return the id so the event fires
|
||||
if (persist) {
|
||||
try {
|
||||
fs.mkdirSync(path.dirname(INSTALL_ID_PATH), { recursive: true });
|
||||
fs.writeFileSync(INSTALL_ID_PATH, id);
|
||||
} catch {
|
||||
// best-effort; still return the id so the event fires
|
||||
}
|
||||
}
|
||||
cached = id;
|
||||
return id;
|
||||
@@ -41,6 +49,7 @@ export function installId(): string {
|
||||
export function emit(
|
||||
event: string,
|
||||
props: Record<string, string | number | boolean | undefined> = {},
|
||||
opts: { persistId?: boolean } = {},
|
||||
): void {
|
||||
if (process.env.NANOCLAW_NO_DIAGNOSTICS === '1') return;
|
||||
|
||||
@@ -53,7 +62,7 @@ export function emit(
|
||||
const body = JSON.stringify({
|
||||
api_key: POSTHOG_KEY,
|
||||
event,
|
||||
distinct_id: installId(),
|
||||
distinct_id: installId(opts.persistId !== false),
|
||||
properties: cleaned,
|
||||
});
|
||||
|
||||
|
||||
@@ -132,6 +132,32 @@ export const CONFIG: Entry[] = [
|
||||
type: 'boolean',
|
||||
default: false,
|
||||
},
|
||||
|
||||
// Uninstall route — handled in auto.ts before any setup work begins.
|
||||
{
|
||||
key: 'uninstall',
|
||||
label: 'Uninstall',
|
||||
help: 'Remove this NanoClaw copy (service, containers, data, vault agents). Asks per group.',
|
||||
surface: 'flag',
|
||||
type: 'boolean',
|
||||
default: false,
|
||||
},
|
||||
{
|
||||
key: 'dryRun',
|
||||
label: 'Uninstall dry run',
|
||||
help: 'With --uninstall: preview what would be removed without changing anything.',
|
||||
surface: 'flag',
|
||||
type: 'boolean',
|
||||
default: false,
|
||||
},
|
||||
{
|
||||
key: 'yes',
|
||||
label: 'Uninstall without prompts',
|
||||
help: 'With --uninstall: delete everything found without asking (orphan vault agents are still kept).',
|
||||
surface: 'flag',
|
||||
type: 'boolean',
|
||||
default: false,
|
||||
},
|
||||
];
|
||||
|
||||
// ─── name derivation ───────────────────────────────────────────────────
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user