From 93895b8a01535ee70ccc4d5e3189862a3cae61a2 Mon Sep 17 00:00:00 2001 From: marihachi <marihachi0620@gmail.com> Date: Thu, 6 Jan 2022 01:05:37 +0900 Subject: [PATCH] Introduce nesting limit (#87) * support fnDepthLimit * fix parse option * rename fnDepthLimit to nestLimit * support limit nesting: big, bold, small, italic, strike * improve mention * fix hashtag * support limit nesting: hashtag * support limit nesting: url * refine link label * refactor * fix link * change default value of nestLimit * fix link label * add test * restore and modify test --- etc/mfm-js.api.md | 1 + src/api.ts | 8 ++- src/internal/parser.pegjs | 147 ++++++++++++++++++++++---------------- test/parser.ts | 144 ++++++++++++++++++++++++++++++++++++- 4 files changed, 235 insertions(+), 65 deletions(-) diff --git a/etc/mfm-js.api.md b/etc/mfm-js.api.md index d4e14c3..ea4f238 100644 --- a/etc/mfm-js.api.md +++ b/etc/mfm-js.api.md @@ -233,6 +233,7 @@ export type NodeType<T extends MfmNode['type']> = T extends 'quote' ? MfmQuote : // @public (undocumented) export function parse(input: string, opts?: Partial<{ fnNameList: string[]; + nestLimit: number; }>): MfmNode[]; // Warning: (ae-forgotten-export) The symbol "MfmPlainNode" needs to be exported by the entry point index.d.ts diff --git a/src/api.ts b/src/api.ts index 4124061..eaf2b3b 100644 --- a/src/api.ts +++ b/src/api.ts @@ -8,8 +8,12 @@ const parser: peg.Parser = require('./internal/parser'); /** * Generates a MfmNode tree from the MFM string. */ -export function parse(input: string, opts: Partial<{ fnNameList: string[]; }> = {}): MfmNode[] { - const nodes = parser.parse(input, { startRule: 'fullParser', fnNameList: opts.fnNameList }); +export function parse(input: string, opts: Partial<{ fnNameList: string[]; nestLimit: number; }> = {}): MfmNode[] { + const nodes = parser.parse(input, { + startRule: 'fullParser', + fnNameList: opts.fnNameList, + nestLimit: opts.nestLimit + }); return nodes; } diff --git a/src/internal/parser.pegjs b/src/internal/parser.pegjs index 76cc315..715e02c 100644 --- a/src/internal/parser.pegjs +++ b/src/internal/parser.pegjs @@ -64,6 +64,28 @@ } return options.fnNameList.includes(name); } + + // nesting control + + const nestLimit = options.nestLimit || 20; + let depth = 0; + function enterNest() { + if (depth + 1 > nestLimit) { + return false; + } + depth++; + return true; + } + + function leaveNest() { + depth--; + return true; + } + + function fallbackNest() { + depth--; + return false; + } } // @@ -230,19 +252,22 @@ unicodeEmoji // inline: big big - = "***" content:(!"***" @inline)+ "***" + = "***" content:bigContent "***" { return FN('tada', { }, mergeText(content)); } +bigContent + = &{ return enterNest(); } @(@(!"***" @inline)+ &{ return leaveNest(); } / &{ return fallbackNest(); }) + // inline: bold bold - = "**" content:(!"**" @inline)+ "**" + = "**" content:boldContent "**" { return BOLD(mergeText(content)); } - / "<b>" content:(!"</b>" @inline)+ "</b>" + / "<b>" content:boldTagContent "</b>" { return BOLD(mergeText(content)); } @@ -252,25 +277,31 @@ bold return BOLD(parsedContent); } +boldContent + = &{ return enterNest(); } @(@(!"**" @inline)+ &{ return leaveNest(); } / &{ return fallbackNest(); }) + +boldTagContent + = &{ return enterNest(); } @(@(!"</b>" @inline)+ &{ return leaveNest(); } / &{ return fallbackNest(); }) + // inline: small small - = "<small>" content:(!"</small>" @inline)+ "</small>" + = "<small>" content:smallContent "</small>" { return SMALL(mergeText(content)); } +smallContent + = &{ return enterNest(); } @(@(!"</small>" @inline)+ &{ return leaveNest(); } / &{ return fallbackNest(); }) + // inline: italic italic - = italicTag - / italicAlt - -italicTag - = "<i>" content:(!"</i>" @inline)+ "</i>" + = "<i>" content:italicContent "</i>" { return ITALIC(mergeText(content)); } + / italicAlt italicAlt = "*" content:$(!"*" ([a-z0-9]i / _))+ "*" &(EOF / LF / _ / ![a-z0-9]i) @@ -284,18 +315,27 @@ italicAlt return ITALIC(parsedContent); } +italicContent + = &{ return enterNest(); } @(@(!"</i>" @inline)+ &{ return leaveNest(); } / &{ return fallbackNest(); }) + // inline: strike strike - = "~~" content:(!("~" / LF) @inline)+ "~~" + = "~~" content:strikeContent "~~" { return STRIKE(mergeText(content)); } - / "<s>" content:(!("</s>" / LF) @inline)+ "</s>" + / "<s>" content:strikeTagContent "</s>" { return STRIKE(mergeText(content)); } +strikeContent + = &{ return enterNest(); } @(@(!("~" / LF) @inline)+ &{ return leaveNest(); } / &{ return fallbackNest(); }) + +strikeTagContent + = &{ return enterNest(); } @(@(!("</s>" / LF) @inline)+ &{ return leaveNest(); } / &{ return fallbackNest(); }) + // inline: inlineCode inlineCode @@ -321,83 +361,59 @@ mention } mentionName - = !"-" mentionNamePart+ // first char is not "-". + = [a-z0-9_]i (&("-"+ [a-z0-9_]i) . / [a-z0-9_]i)* { + // NOTE: first char and last char are not "-". return text(); } -mentionNamePart - = "-" &mentionNamePart // last char is not "-". - / [a-z0-9_]i - mentionHost - = ![.-] mentionHostPart+ // first char is neither "." nor "-". + = [a-z0-9_]i (&([.-]i+ [a-z0-9_]i) . / [a-z0-9_]i)* { + // NOTE: first char and last char are neither "." nor "-". return text(); } -mentionHostPart - = [.-] &mentionHostPart // last char is neither "." nor "-". - / [a-z0-9_]i - // inline: hashtag hashtag - = "#" !("\uFE0F"? "\u20E3") content:hashtagContent + = "#" !("\uFE0F"? "\u20E3") !(invalidHashtagContent !hashtagContentPart) content:$hashtagContentPart+ { return HASHTAG(content); } -hashtagContent - = !(invalidHashtagContent !hashtagContentPart) hashtagContentPart+ { return text(); } - invalidHashtagContent = [0-9]+ hashtagContentPart - = hashtagBracketPair - / hashtagChar + = "(" hashPairInner ")" + / "[" hashPairInner "]" + / "「" hashPairInner "ã€" + / ![  \t.,!?'"#:\/\[\]ã€ã€‘()「ã€<>] CHAR -hashtagBracketPair - = "(" hashtagContent* ")" - / "[" hashtagContent* "]" - / "「" hashtagContent* "ã€" - -hashtagChar - = ![  \t.,!?'"#:\/\[\]ã€ã€‘()「ã€<>] CHAR +hashPairInner + = &{ return enterNest(); } @(@hashtagContentPart* &{ return leaveNest(); } / &{ return fallbackNest(); }) // inline: URL url - = "<" url:altUrlFormat ">" + = "<" url:$("http" "s"? "://" (!(">" / _) CHAR)+) ">" { return N_URL(url, true); } - / url:urlFormat -{ - return N_URL(url); -} - -urlFormat - = "http" "s"? "://" urlContentPart+ + / "http" "s"? "://" (&([.,]+ urlContentPart) . / urlContentPart)+ { - return text(); + // NOTE: last char is neither "." nor ",". + return N_URL(text()); } urlContentPart - = urlBracketPair - / [.,] &urlContentPart // last char is neither "." nor ",". + = "(" urlPairInner ")" + / "[" urlPairInner "]" / [a-z0-9_/:%#@$&?!~=+-]i -urlBracketPair - = "(" urlContentPart* ")" - / "[" urlContentPart* "]" - -altUrlFormat - = "http" "s"? "://" (!(">" / _) CHAR)+ -{ - return text(); -} +urlPairInner + = &{ return enterNest(); } @(@(urlContentPart / [.,])* &{ return leaveNest(); } / &{ return fallbackNest(); }) // inline: link @@ -408,23 +424,34 @@ link } linkLabel - = linkLabelPart+ + = (!"]" @linkLabelPart)+ linkLabelPart - = url { return text(); /* text node */ } - / link { return text(); /* text node */ } - / mention { return text(); /* text node */ } - / !"]" @inline + = emojiCode + / unicodeEmoji + / big + / bold + / small + / italic + / strike + / inlineCode + / mathInline + / hashtag + / fn + / inlineText // inline: fn fn - = "$[" name:$([a-z0-9_]i)+ &{ return ensureFnName(name); } args:fnArgs? _ content:(!("]") @inline)+ "]" + = "$[" name:$([a-z0-9_]i)+ &{ return ensureFnName(name); } args:fnArgs? _ content:fnContent "]" { args = args || {}; return FN(name, args, mergeText(content)); } +fnContent + = &{ return enterNest(); } @(@(!"]" @inline)+ &{ return leaveNest(); } / &{ return fallbackNest(); }) + fnArgs = "." head:fnArg tails:("," @fnArg)* { diff --git a/test/parser.ts b/test/parser.ts index 11d00e2..bcc5757 100644 --- a/test/parser.ts +++ b/test/parser.ts @@ -984,14 +984,16 @@ hoge`; assert.deepStrictEqual(mfm.parse(input), output); }); - it('do not yield link node even if label is recognisable as a link', () => { + it('cannot nest a link in a link label', () => { const input = 'official instance: [[https://misskey.io/@ai](https://misskey.io/@ai)](https://misskey.io/@ai).'; const output = [ TEXT('official instance: '), LINK(false, 'https://misskey.io/@ai', [ - TEXT('[https://misskey.io/@ai](https://misskey.io/@ai)') + TEXT('[https://misskey.io/@ai') ]), - TEXT('.') + TEXT(']('), + N_URL('https://misskey.io/@ai'), + TEXT(').'), ]; assert.deepStrictEqual(mfm.parse(input), output); }); @@ -1081,6 +1083,142 @@ hoge`; }); }); + describe('nesting limit', () => { + it('big', () => { + const input = '<b><b>***abc***</b></b>'; + const output = [ + BOLD([ + BOLD([ + TEXT('**'), + ITALIC([ + TEXT('abc'), + ]), + TEXT('**'), + ]), + ]), + ]; + assert.deepStrictEqual(mfm.parse(input, { nestLimit: 2 }), output); + }); + + describe('bold', () => { + it('basic', () => { + const input = '<i><i>**abc**</i></i>'; + const output = [ + ITALIC([ + ITALIC([ + TEXT('*'), + ITALIC([ + TEXT('abc'), + ]), + TEXT('*'), + ]), + ]), + ]; + assert.deepStrictEqual(mfm.parse(input, { nestLimit: 2 }), output); + }); + + it('tag', () => { + const input = '<i><i><b>abc</b></i></i>'; + const output = [ + ITALIC([ + ITALIC([ + TEXT('<b>abc</b>'), + ]), + ]), + ]; + assert.deepStrictEqual(mfm.parse(input, { nestLimit: 2 }), output); + }); + }); + + it('small', () => { + const input = '<i><i><small>abc</small></i></i>'; + const output = [ + ITALIC([ + ITALIC([ + TEXT('<small>abc</small>'), + ]), + ]), + ]; + assert.deepStrictEqual(mfm.parse(input, { nestLimit: 2 }), output); + }); + + it('italic', () => { + const input = '<b><b><i>abc</i></b></b>'; + const output = [ + BOLD([ + BOLD([ + TEXT('<i>abc</i>'), + ]), + ]), + ]; + assert.deepStrictEqual(mfm.parse(input, { nestLimit: 2 }), output); + }); + + describe('strike', () => { + it('basic', () => { + const input = '<b><b>~~abc~~</b></b>'; + const output = [ + BOLD([ + BOLD([ + TEXT('~~abc~~'), + ]), + ]), + ]; + assert.deepStrictEqual(mfm.parse(input, { nestLimit: 2 }), output); + }); + + it('tag', () => { + const input = '<b><b><s>abc</s></b></b>'; + const output = [ + BOLD([ + BOLD([ + TEXT('<s>abc</s>'), + ]), + ]), + ]; + assert.deepStrictEqual(mfm.parse(input, { nestLimit: 2 }), output); + }); + }); + + it('hashtag', () => { + const input = '<b><b>#abc(xyz)</b></b>'; + const output = [ + BOLD([ + BOLD([ + HASHTAG('abc'), + TEXT('(xyz)'), + ]), + ]), + ]; + assert.deepStrictEqual(mfm.parse(input, { nestLimit: 2 }), output); + }); + + it('url', () => { + const input = '<b><b>https://example.com/abc(xyz)</b></b>'; + const output = [ + BOLD([ + BOLD([ + N_URL('https://example.com/abc'), + TEXT('(xyz)'), + ]), + ]), + ]; + assert.deepStrictEqual(mfm.parse(input, { nestLimit: 2 }), output); + }); + + it('fn', () => { + const input = '<b><b>$[a b]</b></b>'; + const output = [ + BOLD([ + BOLD([ + TEXT('$[a b]'), + ]), + ]), + ]; + assert.deepStrictEqual(mfm.parse(input, { nestLimit: 2 }), output); + }); + }); + it('composite', () => { const input = `before -- GitLab