From f5359d4656772bca0c3b10eda259cbe8ca6654d0 Mon Sep 17 00:00:00 2001 From: dakkar <dakkar@thenautilus.net> Date: Sat, 9 Mar 2024 09:39:48 +0000 Subject: [PATCH] normalise emoji text also, tests --- src/internal/parser.ts | 2 +- test/parser.ts | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/internal/parser.ts b/src/internal/parser.ts index ef3264d..a31109b 100644 --- a/src/internal/parser.ts +++ b/src/internal/parser.ts @@ -629,7 +629,7 @@ export const language = P.createLanguage({ P.regexp(/[\p{Letter}\p{Number}\p{Mark}_+-]+/iu), mark, P.alt([P.lineEnd, side]), - ], 2).map(name => M.EMOJI_CODE(name as string)); + ], 2).map(name => M.EMOJI_CODE((name as string).normalize('NFC'))); }, link: r => { diff --git a/test/parser.ts b/test/parser.ts index d3000df..1bad6c8 100644 --- a/test/parser.ts +++ b/test/parser.ts @@ -338,6 +338,30 @@ hoge`; const output = [EMOJI_CODE('abc')]; assert.deepStrictEqual(mfm.parse(input), output); }); + + test('non-ASCII', () => { + const input = ':taneÅima_Äojas:, :मारà¥à¤œà¤¾à¤°à¤ƒ:, :é¹…:, :taneÅima_malsanas:, :à¹à¸¡à¸§:, and :लकà¥à¤·à¤£à¤¾:'; + const output = [ + EMOJI_CODE('taneÅima_Äojas'), + TEXT(', '), + EMOJI_CODE('मारà¥à¤œà¤¾à¤°à¤ƒ'), + TEXT(', '), + EMOJI_CODE('é¹…'), + TEXT(', '), + EMOJI_CODE('taneÅima_malsanas'), + TEXT(', '), + EMOJI_CODE('à¹à¸¡à¸§'), + TEXT(', and '), + EMOJI_CODE('लकà¥à¤·à¤£à¤¾'), + ]; + assert.deepStrictEqual(mfm.parse(input), output); + }); + + test('non-ASCII normalization', () => { + const input = ":fo\u{0308}o:"; + const output = [EMOJI_CODE("f\u{00F6}o")]; + assert.deepStrictEqual(mfm.parse(input), output); + }); }); describe('unicode emoji', () => { -- GitLab