mirror of
https://github.com/qwibitai/nanoclaw.git
synced 2026-06-12 18:11:51 +08:00
fix(telegram): flatten Markdown horizontal rules in the sanitizer
Bare --- / *** / ___ HR lines confuse Telegram's legacy Markdown parser and (for ***/___) unbalance the delimiter count the sanitizer relies on, which causes the fallback to strip all formatting. Replace them with a plain Unicode divider (⎯⎯⎯) before the delimiter pass — same approach the bullet conversion already uses. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -65,4 +65,16 @@ describe('sanitizeTelegramLegacyMarkdown', () => {
|
||||
it('preserves indented list structure', () => {
|
||||
expect(sanitizeTelegramLegacyMarkdown(' - nested')).toBe(' • nested');
|
||||
});
|
||||
|
||||
it('flattens Markdown horizontal rules (---, ***, ___)', () => {
|
||||
const input = 'before\n---\n***\n___\nafter';
|
||||
expect(sanitizeTelegramLegacyMarkdown(input)).toBe(
|
||||
'before\n⎯⎯⎯\n⎯⎯⎯\n⎯⎯⎯\nafter',
|
||||
);
|
||||
});
|
||||
|
||||
it('leaves horizontal rules inside code blocks alone', () => {
|
||||
const input = '```\n---\n```';
|
||||
expect(sanitizeTelegramLegacyMarkdown(input)).toBe(input);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -28,6 +28,11 @@ export function sanitizeTelegramLegacyMarkdown(input: string): string {
|
||||
// as prose.
|
||||
text = text.replace(/^(\s*)[-+]\s+/gm, '$1• ');
|
||||
|
||||
// Flatten Markdown horizontal rules (bare --- / *** / ___ lines) to a
|
||||
// plain Unicode divider. The parser doesn't understand HR syntax and the
|
||||
// `*` / `_` characters would otherwise unbalance the delimiter counts below.
|
||||
text = text.replace(/^[ \t]*[-_*]{3,}[ \t]*$/gm, '⎯⎯⎯');
|
||||
|
||||
text = text.replace(/\*\*([^*\n]+?)\*\*/g, '*$1*');
|
||||
text = text.replace(/__([^_\n]+?)__/g, '_$1_');
|
||||
|
||||
|
||||
Reference in New Issue
Block a user