fix(telegram): flatten Markdown horizontal rules in the sanitizer

Bare --- / *** / ___ HR lines confuse Telegram's legacy Markdown parser and (for ***/___) unbalance the delimiter count the sanitizer relies on, which causes the fallback to strip all formatting. Replace them with a plain Unicode divider (⎯⎯⎯) before the delimiter pass — same approach the bullet conversion already uses. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-12 18:11:51 +08:00 · 2026-04-23 01:50:21 +03:00
parent 22ed951f05
commit 68352351e4
2 changed files with 17 additions and 0 deletions
@@ -65,4 +65,16 @@ describe('sanitizeTelegramLegacyMarkdown', () => {
  it('preserves indented list structure', () => {
    expect(sanitizeTelegramLegacyMarkdown('  - nested')).toBe('  • nested');
  });
+
+  it('flattens Markdown horizontal rules (---, ***, ___)', () => {
+    const input = 'before\n---\n***\n___\nafter';
+    expect(sanitizeTelegramLegacyMarkdown(input)).toBe(
+      'before\n⎯⎯⎯\n⎯⎯⎯\n⎯⎯⎯\nafter',
+    );
+  });
+
+  it('leaves horizontal rules inside code blocks alone', () => {
+    const input = '```\n---\n```';
+    expect(sanitizeTelegramLegacyMarkdown(input)).toBe(input);
+  });
 });
@@ -28,6 +28,11 @@ export function sanitizeTelegramLegacyMarkdown(input: string): string {
  // as prose.
  text = text.replace(/^(\s*)[-+]\s+/gm, '$1• ');

+  // Flatten Markdown horizontal rules (bare --- / *** / ___ lines) to a
+  // plain Unicode divider. The parser doesn't understand HR syntax and the
+  // `*` / `_` characters would otherwise unbalance the delimiter counts below.
+  text = text.replace(/^[ \t]*[-_*]{3,}[ \t]*$/gm, '⎯⎯⎯');
+
  text = text.replace(/\*\*([^*\n]+?)\*\*/g, '*$1*');
  text = text.replace(/__([^_\n]+?)__/g, '_$1_');