Compare commits

...

3 Commits

Author SHA1 Message Date
gavrielc 561a0b6217 merge: catch up with upstream main
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 15:27:59 +03:00
gavrielc ccb4523a54 chore: remove direct pino/pino-pretty dependency
Pino was replaced with a built-in logger on main. For branches
with baileys (WhatsApp), pino resolves as a transitive dependency
of @whiskeysockets/baileys.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-27 22:39:43 +03:00
gavrielc e87d15db96 feat: channel-aware text formatting for WhatsApp, Telegram, Slack, Signal
Adds src/text-styles.ts with two zero-dependency functions:

- parseTextStyles(text, channel) — converts Claude Markdown to each
  channel's native syntax before delivery
- parseSignalStyles(text) — strips Markdown markers and returns
  plain text + SignalTextStyle[] ranges for signal-cli

Wires parseTextStyles into the outbound pipeline via formatOutbound
in router.ts and both sendMessage paths in index.ts.

Includes 73 tests in src/formatting.test.ts.

Co-Authored-By: Ken Bolton <ken@bscientific.com>
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-25 21:55:16 +02:00
4 changed files with 602 additions and 4 deletions
+257
View File
@@ -11,6 +11,7 @@ import {
formatOutbound,
stripInternalTags,
} from './router.js';
import { parseTextStyles, parseSignalStyles } from './text-styles.js';
import { NewMessage } from './types.js';
function makeMsg(overrides: Partial<NewMessage> = {}): NewMessage {
@@ -292,3 +293,259 @@ describe('trigger gating (requiresTrigger interaction)', () => {
expect(shouldProcess(false, false, undefined, msgs)).toBe(true);
});
});
// --- parseTextStyles ---
describe('parseTextStyles — passthrough channels', () => {
it('passes text through unchanged on discord', () => {
const md = '**bold** and *italic* and [link](https://example.com)';
expect(parseTextStyles(md, 'discord')).toBe(md);
});
it('passes text through unchanged on signal (signal uses parseSignalStyles)', () => {
const md = '**bold** and *italic* and [link](https://example.com)';
expect(parseTextStyles(md, 'signal')).toBe(md);
});
});
describe('parseTextStyles — bold', () => {
it('converts **bold** to *bold* on whatsapp', () => {
expect(parseTextStyles('**hello**', 'whatsapp')).toBe('*hello*');
});
it('converts **bold** to *bold* on telegram', () => {
expect(parseTextStyles('say **this** now', 'telegram')).toBe(
'say *this* now',
);
});
it('converts **bold** to *bold* on slack', () => {
expect(parseTextStyles('**hello**', 'slack')).toBe('*hello*');
});
it('does not convert a lone * as bold', () => {
expect(parseTextStyles('a * b * c', 'whatsapp')).toBe('a * b * c');
});
});
describe('parseTextStyles — italic', () => {
it('converts *italic* to _italic_ on whatsapp', () => {
expect(parseTextStyles('say *this* now', 'whatsapp')).toBe(
'say _this_ now',
);
});
it('converts *italic* to _italic_ on telegram', () => {
expect(parseTextStyles('*italic*', 'telegram')).toBe('_italic_');
});
it('bold-before-italic: **bold** *italic* → *bold* _italic_', () => {
expect(parseTextStyles('**bold** *italic*', 'whatsapp')).toBe(
'*bold* _italic_',
);
});
});
describe('parseTextStyles — headings', () => {
it('converts # heading on whatsapp', () => {
expect(parseTextStyles('# Top', 'whatsapp')).toBe('*Top*');
});
it('converts ## heading on telegram', () => {
expect(parseTextStyles('## Hello World', 'telegram')).toBe('*Hello World*');
});
it('converts ### heading on telegram', () => {
expect(parseTextStyles('### Section', 'telegram')).toBe('*Section*');
});
it('only converts headings at line start', () => {
const input = 'not a ## heading in middle';
expect(parseTextStyles(input, 'whatsapp')).toBe(input);
});
});
describe('parseTextStyles — links', () => {
it('converts [text](url) to text (url) on whatsapp', () => {
expect(parseTextStyles('[Link](https://example.com)', 'whatsapp')).toBe(
'Link (https://example.com)',
);
});
it('converts [text](url) to text (url) on telegram', () => {
expect(parseTextStyles('[Link](https://example.com)', 'telegram')).toBe(
'Link (https://example.com)',
);
});
it('converts [text](url) to <url|text> on slack', () => {
expect(parseTextStyles('[Click here](https://example.com)', 'slack')).toBe(
'<https://example.com|Click here>',
);
});
});
describe('parseTextStyles — horizontal rules', () => {
it('strips --- on telegram', () => {
expect(parseTextStyles('above\n---\nbelow', 'telegram')).toBe(
'above\n\nbelow',
);
});
it('strips *** on whatsapp', () => {
expect(parseTextStyles('above\n***\nbelow', 'whatsapp')).toBe(
'above\n\nbelow',
);
});
});
describe('parseTextStyles — code block protection', () => {
it('does not transform **bold** inside fenced code block', () => {
const input = '```\n**not bold**\n```';
expect(parseTextStyles(input, 'whatsapp')).toBe(input);
});
it('does not transform *italic* inside inline code', () => {
const input = 'use `*star*` literally';
expect(parseTextStyles(input, 'telegram')).toBe(input);
});
it('transforms text outside code blocks but not inside', () => {
const input = '**bold** and `*code*` and *italic*';
expect(parseTextStyles(input, 'whatsapp')).toBe(
'*bold* and `*code*` and _italic_',
);
});
it('transforms text outside fenced block but not inside', () => {
const input = '**bold**\n```\n**raw**\n```\n*italic*';
expect(parseTextStyles(input, 'telegram')).toBe(
'*bold*\n```\n**raw**\n```\n_italic_',
);
});
});
// --- parseSignalStyles ---
describe('parseSignalStyles — basic styles', () => {
it('extracts BOLD from **text**', () => {
const { text, textStyle } = parseSignalStyles('**hello**');
expect(text).toBe('hello');
expect(textStyle).toEqual([{ style: 'BOLD', start: 0, length: 5 }]);
});
it('extracts ITALIC from *text*', () => {
const { text, textStyle } = parseSignalStyles('*hello*');
expect(text).toBe('hello');
expect(textStyle).toEqual([{ style: 'ITALIC', start: 0, length: 5 }]);
});
it('extracts ITALIC from _text_', () => {
const { text, textStyle } = parseSignalStyles('_hello_');
expect(text).toBe('hello');
expect(textStyle).toEqual([{ style: 'ITALIC', start: 0, length: 5 }]);
});
it('extracts STRIKETHROUGH from ~~text~~', () => {
const { text, textStyle } = parseSignalStyles('~~hello~~');
expect(text).toBe('hello');
expect(textStyle).toEqual([
{ style: 'STRIKETHROUGH', start: 0, length: 5 },
]);
});
it('extracts MONOSPACE from `inline code`', () => {
const { text, textStyle } = parseSignalStyles('`code`');
expect(text).toBe('code');
expect(textStyle).toEqual([{ style: 'MONOSPACE', start: 0, length: 4 }]);
});
it('extracts BOLD from ## heading and strips marker', () => {
const { text, textStyle } = parseSignalStyles('## Hello World');
expect(text).toBe('Hello World');
expect(textStyle).toEqual([{ style: 'BOLD', start: 0, length: 11 }]);
});
it('no styles for plain text', () => {
const { text, textStyle } = parseSignalStyles('just plain text');
expect(text).toBe('just plain text');
expect(textStyle).toHaveLength(0);
});
});
describe('parseSignalStyles — mixed content', () => {
it('correctly offsets styles in mixed text', () => {
const { text, textStyle } = parseSignalStyles('say **hi** now');
expect(text).toBe('say hi now');
expect(textStyle).toEqual([{ style: 'BOLD', start: 4, length: 2 }]);
});
it('handles multiple styles with correct offsets', () => {
const { text, textStyle } = parseSignalStyles('**bold** and *italic*');
expect(text).toBe('bold and italic');
expect(textStyle[0]).toEqual({ style: 'BOLD', start: 0, length: 4 });
expect(textStyle[1]).toEqual({ style: 'ITALIC', start: 9, length: 6 });
});
it('strips link markers, no style applied', () => {
const { text, textStyle } = parseSignalStyles(
'[Click here](https://example.com)',
);
expect(text).toBe('Click here (https://example.com)');
expect(textStyle).toHaveLength(0);
});
it('strips horizontal rules', () => {
const { text, textStyle } = parseSignalStyles('above\n---\nbelow');
expect(text).toBe('above\nbelow');
expect(textStyle).toHaveLength(0);
});
});
describe('parseSignalStyles — code block protection', () => {
it('protects fenced code block content with MONOSPACE', () => {
const input = '```\n**not bold**\n```';
const { text, textStyle } = parseSignalStyles(input);
expect(text).toBe('**not bold**');
expect(textStyle).toEqual([{ style: 'MONOSPACE', start: 0, length: 12 }]);
});
it('styles outside block are still processed', () => {
const input = '**bold**\n```\nraw code\n```';
const { text, textStyle } = parseSignalStyles(input);
expect(text).toContain('bold');
expect(text).toContain('raw code');
const boldStyle = textStyle.find((s) => s.style === 'BOLD');
const codeStyle = textStyle.find((s) => s.style === 'MONOSPACE');
expect(boldStyle).toBeDefined();
expect(codeStyle).toBeDefined();
});
});
describe('parseSignalStyles — snake_case guard', () => {
it('does not italicise underscores in snake_case', () => {
const { text, textStyle } = parseSignalStyles('use snake_case_here');
expect(text).toBe('use snake_case_here');
expect(textStyle).toHaveLength(0);
});
});
describe('formatOutbound — channel-aware', () => {
it('applies parseTextStyles when channel is provided', () => {
expect(formatOutbound('**bold**', 'whatsapp')).toBe('*bold*');
});
it('returns plain stripped text when no channel provided', () => {
expect(formatOutbound('**bold**')).toBe('**bold**');
});
it('strips internal tags then applies channel formatting', () => {
expect(
formatOutbound('<internal>thinking</internal>**done**', 'telegram'),
).toBe('*done*');
});
it('signal channel is passthrough — raw markdown preserved for parseSignalStyles', () => {
expect(formatOutbound('**bold**', 'signal')).toBe('**bold**');
});
});
+5 -2
View File
@@ -49,6 +49,7 @@ import { GroupQueue } from './group-queue.js';
import { resolveGroupFolderPath } from './group-folder.js';
import { startIpcWatcher } from './ipc.js';
import { findChannel, formatMessages, formatOutbound } from './router.js';
import { ChannelType } from './text-styles.js';
import {
restoreRemoteControl,
startRemoteControl,
@@ -686,14 +687,16 @@ async function main(): Promise<void> {
logger.warn({ jid }, 'No channel owns JID, cannot send message');
return;
}
const text = formatOutbound(rawText);
const text = formatOutbound(rawText, channel.name as ChannelType);
if (text) await channel.sendMessage(jid, text);
},
});
startIpcWatcher({
sendMessage: (jid, text) => {
sendMessage: (jid, rawText) => {
const channel = findChannel(channels, jid);
if (!channel) throw new Error(`No channel for JID: ${jid}`);
const text = formatOutbound(rawText, channel.name as ChannelType);
if (!text) return Promise.resolve();
return channel.sendMessage(jid, text);
},
registeredGroups: () => registeredGroups,
+3 -2
View File
@@ -1,5 +1,6 @@
import { Channel, NewMessage } from './types.js';
import { formatLocalTime } from './timezone.js';
import { parseTextStyles, ChannelType } from './text-styles.js';
export function escapeXml(s: string): string {
if (!s) return '';
@@ -28,10 +29,10 @@ export function stripInternalTags(text: string): string {
return text.replace(/<internal>[\s\S]*?<\/internal>/g, '').trim();
}
export function formatOutbound(rawText: string): string {
export function formatOutbound(rawText: string, channel?: ChannelType): string {
const text = stripInternalTags(rawText);
if (!text) return '';
return text;
return channel ? parseTextStyles(text, channel) : text;
}
export function routeOutbound(
+337
View File
@@ -0,0 +1,337 @@
/**
* parseTextStyles — convert Claude's Markdown output to channel-native formatting.
*
* Claude outputs standard Markdown. Each channel has its own text style syntax:
* - Signal: passthrough (SignalChannel handles rich text styles natively
* via the signal-cli JSON-RPC textStyle param — see parseSignalStyles)
* - WhatsApp / Telegram: *bold*, _italic_, no headings, plain links
* - Slack: *bold*, _italic_, <url|text> links
* - Discord: passthrough (already Markdown)
*
* Code blocks (fenced and inline) are NEVER transformed by marker substitution.
*/
export type ChannelType =
| 'signal'
| 'whatsapp'
| 'telegram'
| 'slack'
| 'discord';
/** Transform Markdown text for the target channel's native format. */
export function parseTextStyles(text: string, channel: ChannelType): string {
if (!text) return text;
// Discord and Signal are passthrough — no marker substitution.
// Discord is already Markdown; Signal uses parseSignalStyles() for rich text.
if (channel === 'discord' || channel === 'signal') return text;
// Split into protected (code) and unprotected regions, transform only the latter.
const segments = splitProtectedRegions(text);
return segments
.map(({ content, protected: isProtected }) =>
isProtected ? content : transformSegment(content, channel),
)
.join('');
}
// ---------------------------------------------------------------------------
// Signal rich-text formatting
// ---------------------------------------------------------------------------
export interface SignalTextStyle {
/** One of Signal's supported text styles. */
style: 'BOLD' | 'ITALIC' | 'STRIKETHROUGH' | 'MONOSPACE' | 'SPOILER';
/** Start position in the final message string, in UTF-16 code units. */
start: number;
/** Length of the styled range, in UTF-16 code units. */
length: number;
}
/**
* Parse Claude's Markdown into a plain string + Signal textStyle ranges.
*
* The returned `text` has all markdown markers stripped. The `textStyle`
* array uses UTF-16 code-unit offsets (JavaScript's native string indexing),
* matching what signal-cli's JSON-RPC `send.textStyle` param expects.
*
* Supported patterns:
* **bold** → BOLD
* *italic* → ITALIC
* _italic_ → ITALIC
* ~~strike~~ → STRIKETHROUGH
* `inline code` → MONOSPACE
* ```code block``` → MONOSPACE
* ## Heading → BOLD (markers stripped)
* [text](url) → "text (url)" (no style)
* --- → removed
*/
export function parseSignalStyles(rawText: string): {
text: string;
textStyle: SignalTextStyle[];
} {
const textStyle: SignalTextStyle[] = [];
let out = '';
let i = 0;
const s = rawText;
const n = s.length;
function addStyle(
style: SignalTextStyle['style'],
startOut: number,
endOut: number,
): void {
const length = endOut - startOut;
if (length > 0) textStyle.push({ style, start: startOut, length });
}
while (i < n) {
// ── Fenced code block ```[lang]\n...\n``` ──────────────────────────
if (s[i] === '`' && s[i + 1] === '`' && s[i + 2] === '`') {
const langNl = s.indexOf('\n', i + 3);
if (langNl !== -1) {
// Find closing ``` on its own line
const closeAt = s.indexOf('\n```', langNl);
if (closeAt !== -1) {
const content = s.slice(langNl + 1, closeAt);
const startOut = out.length;
out += content;
addStyle('MONOSPACE', startOut, out.length);
// Advance past \n``` + optional trailing newline
const afterClose = s.indexOf('\n', closeAt + 4);
i = afterClose !== -1 ? afterClose + 1 : n;
continue;
}
}
// Malformed fence — copy literally
out += s[i];
i++;
continue;
}
// ── Inline code `text` ────────────────────────────────────────────
if (s[i] === '`') {
const end = s.indexOf('`', i + 1);
const nl = s.indexOf('\n', i + 1);
if (end !== -1 && (nl === -1 || end < nl)) {
const content = s.slice(i + 1, end);
const startOut = out.length;
out += content;
addStyle('MONOSPACE', startOut, out.length);
i = end + 1;
continue;
}
}
// ── Bold **text** ─────────────────────────────────────────────────
if (s[i] === '*' && s[i + 1] === '*' && s[i + 2] && s[i + 2] !== ' ') {
const end = s.indexOf('**', i + 2);
if (end !== -1 && s[end - 1] !== ' ') {
const content = s.slice(i + 2, end);
const startOut = out.length;
out += content;
addStyle('BOLD', startOut, out.length);
i = end + 2;
continue;
}
}
// ── Strikethrough ~~text~~ ────────────────────────────────────────
if (s[i] === '~' && s[i + 1] === '~' && s[i + 2] && s[i + 2] !== ' ') {
const end = s.indexOf('~~', i + 2);
if (end !== -1) {
const content = s.slice(i + 2, end);
const startOut = out.length;
out += content;
addStyle('STRIKETHROUGH', startOut, out.length);
i = end + 2;
continue;
}
}
// ── Italic *text* (single star, not part of **) ─────────────────
if (
s[i] === '*' &&
s[i + 1] !== '*' &&
s[i + 1] !== ' ' &&
s[i + 1] !== undefined
) {
const end = findClosingStar(s, i + 1);
if (end !== -1) {
const content = s.slice(i + 1, end);
const startOut = out.length;
out += content;
addStyle('ITALIC', startOut, out.length);
i = end + 1;
continue;
}
}
// ── Italic _text_ (only at word boundaries) ──────────────────────
if (s[i] === '_' && s[i + 1] !== '_' && s[i + 1] !== ' ' && s[i + 1]) {
// Guard against snake_case: only treat as italic when preceded by a
// non-word character (or start of string).
const prevChar = i > 0 ? s[i - 1] : '';
if (!/\w/.test(prevChar)) {
const end = findClosingUnderscore(s, i + 1);
if (end !== -1) {
const content = s.slice(i + 1, end);
const startOut = out.length;
out += content;
addStyle('ITALIC', startOut, out.length);
i = end + 1;
continue;
}
}
}
// ── ATX Heading ## text → text (as BOLD) ─────────────────────────
if ((i === 0 || s[i - 1] === '\n') && s[i] === '#') {
let j = i;
while (j < n && s[j] === '#') j++;
if (j < n && s[j] === ' ') {
const lineEnd = s.indexOf('\n', j + 1);
const headingText =
lineEnd !== -1 ? s.slice(j + 1, lineEnd) : s.slice(j + 1);
const startOut = out.length;
out += headingText;
addStyle('BOLD', startOut, out.length);
if (lineEnd !== -1) {
out += '\n';
i = lineEnd + 1;
} else i = n;
continue;
}
}
// ── Links [text](url) → text (url) ───────────────────────────────
if (s[i] === '[') {
const closeBracket = s.indexOf(']', i + 1);
if (closeBracket !== -1 && s[closeBracket + 1] === '(') {
const closeParen = s.indexOf(')', closeBracket + 2);
if (closeParen !== -1) {
const linkText = s.slice(i + 1, closeBracket);
const url = s.slice(closeBracket + 2, closeParen);
out += `${linkText} (${url})`;
i = closeParen + 1;
continue;
}
}
}
// ── Horizontal rule --- / *** / ___ ──────────────────────────────
if (i === 0 || s[i - 1] === '\n') {
const hrMatch = /^(-{3,}|\*{3,}|_{3,}) *(\n|$)/.exec(s.slice(i));
if (hrMatch) {
i += hrMatch[0].length;
continue;
}
}
// ── Default: copy character, preserving surrogate pairs ───────────
const code = s.charCodeAt(i);
if (code >= 0xd800 && code <= 0xdbff && i + 1 < n) {
out += s[i] + s[i + 1];
i += 2;
} else {
out += s[i];
i++;
}
}
return { text: out, textStyle };
}
// ---------------------------------------------------------------------------
// Helpers for parseSignalStyles
// ---------------------------------------------------------------------------
/** Find the position of a closing single `*` that isn't part of `**`. */
function findClosingStar(s: string, from: number): number {
for (let i = from; i < s.length; i++) {
if (s[i] === '\n') return -1; // italics don't span lines
if (s[i] === '*' && s[i + 1] !== '*' && s[i - 1] !== ' ') return i;
}
return -1;
}
/** Find the closing `_` that isn't part of `__` and is at a word boundary. */
function findClosingUnderscore(s: string, from: number): number {
for (let i = from; i < s.length; i++) {
if (s[i] === '\n') return -1;
if (s[i] === '_' && s[i + 1] !== '_' && !/\w/.test(s[i + 1] ?? '')) {
return i;
}
}
return -1;
}
// ---------------------------------------------------------------------------
// Marker-substitution helpers (WhatsApp / Telegram / Slack)
// ---------------------------------------------------------------------------
interface Segment {
content: string;
protected: boolean;
}
/**
* Split text into alternating unprotected/protected segments.
* Protected = fenced code blocks (```...```) and inline code (`...`).
*/
function splitProtectedRegions(text: string): Segment[] {
const segments: Segment[] = [];
const CODE_PATTERN = /```[\s\S]*?```|`[^`\n]+`/g;
let lastIndex = 0;
let match: RegExpExecArray | null;
while ((match = CODE_PATTERN.exec(text)) !== null) {
if (match.index > lastIndex) {
segments.push({
content: text.slice(lastIndex, match.index),
protected: false,
});
}
segments.push({ content: match[0], protected: true });
lastIndex = match.index + match[0].length;
}
if (lastIndex < text.length) {
segments.push({ content: text.slice(lastIndex), protected: false });
}
return segments.length > 0 ? segments : [{ content: text, protected: false }];
}
/** Apply marker-substitution transformations to a non-code segment. */
function transformSegment(text: string, channel: ChannelType): string {
let t = text;
// Order matters: italic before bold.
// The italic regex won't match **bold** (it requires the char after the opening *
// to be a non-* non-space), so running italic first is safe. If we ran bold
// first (**bold** → *bold*), the italic step would immediately re-convert *bold*
// to _bold_, producing wrong output.
// 1. Italic: *text* → _text_ (whatsapp/telegram/slack use _)
t = t.replace(/(?<!\*)\*(?=[^\s*])([^*\n]+?)(?<=[^\s*])\*(?!\*)/g, '_$1_');
// 2. Bold: **text** → *text* (whatsapp/telegram/slack use single *)
t = t.replace(/\*\*(?=[^\s*])([^*]+?)(?<=[^\s*])\*\*/g, '*$1*');
// 3. Headings: ## Title → *Title* (any level, line-start only)
t = t.replace(/^#{1,6}\s+(.+)$/gm, '*$1*');
// 4. Links
if (channel === 'slack') {
t = t.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<$2|$1>');
} else {
t = t.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '$1 ($2)');
}
// 5. Horizontal rules: strip them
t = t.replace(/^(-{3,}|\*{3,}|_{3,})$/gm, '');
return t;
}