From 0706cd17179632ab529256ef16c8234fa78d5816 Mon Sep 17 00:00:00 2001 From: st0012 Date: Sat, 28 Feb 2026 23:29:47 +0000 Subject: [PATCH] Fix blockquote parsing to match GFM behavior Two changes to the BlockQuoteRaw rule in the PEG grammar: 1. Stop lazy continuation from consuming block-level elements by adding negative lookaheads for headings, list markers, and code fences. 2. End blockquotes at unquoted blank lines. Previously, all blank lines were consumed and the parser continued matching subsequent > lines as part of the same blockquote. Now only blank lines prefixed with > continue the blockquote, matching GFM where an unquoted blank line separates two distinct blockquotes. --- lib/rdoc/markdown.kpeg | 4 +- lib/rdoc/markdown.rb | 126 +++++++++++++++++++++++++------- test/rdoc/rdoc_markdown_test.rb | 67 ++++++++++++++++- 3 files changed, 168 insertions(+), 29 deletions(-) diff --git a/lib/rdoc/markdown.kpeg b/lib/rdoc/markdown.kpeg index d95a88a823..5bb479b4e0 100644 --- a/lib/rdoc/markdown.kpeg +++ b/lib/rdoc/markdown.kpeg @@ -617,8 +617,8 @@ BlockQuote = BlockQuoteRaw:a BlockQuoteRaw = @StartList:a (( ">" " "? Line:l { a << l } ) - ( !">" !@BlankLine Line:c { a << c } )* - ( @BlankLine:n { a << n } )* + ( !">" !@BlankLine !AtxStart !Bullet !Enumerator !Ticks3 Line:c { a << c } )* + ( ">" @BlankLine:n { a << n } )* )+ { inner_parse a.join } diff --git a/lib/rdoc/markdown.rb b/lib/rdoc/markdown.rb index e4d0ae9ff6..790672d95b 100644 --- a/lib/rdoc/markdown.rb +++ b/lib/rdoc/markdown.rb @@ -1656,7 +1656,7 @@ def _BlockQuote return _tmp end - # BlockQuoteRaw = @StartList:a (">" " "? Line:l { a << l } (!">" !@BlankLine Line:c { a << c })* (@BlankLine:n { a << n })*)+ { inner_parse a.join } + # BlockQuoteRaw = @StartList:a (">" " "? Line:l { a << l } (!">" !@BlankLine !AtxStart !Bullet !Enumerator !Ticks3 Line:c { a << c })* (">" @BlankLine:n { a << n })*)+ { inner_parse a.join } def _BlockQuoteRaw _save = self.pos @@ -1718,6 +1718,38 @@ def _BlockQuoteRaw self.pos = _save5 break end + _save8 = self.pos + _tmp = apply(:_AtxStart) + _tmp = _tmp ? nil : true + self.pos = _save8 + unless _tmp + self.pos = _save5 + break + end + _save9 = self.pos + _tmp = apply(:_Bullet) + _tmp = _tmp ? nil : true + self.pos = _save9 + unless _tmp + self.pos = _save5 + break + end + _save10 = self.pos + _tmp = apply(:_Enumerator) + _tmp = _tmp ? nil : true + self.pos = _save10 + unless _tmp + self.pos = _save5 + break + end + _save11 = self.pos + _tmp = apply(:_Ticks3) + _tmp = _tmp ? nil : true + self.pos = _save11 + unless _tmp + self.pos = _save5 + break + end _tmp = apply(:_Line) c = @result unless _tmp @@ -1741,18 +1773,23 @@ def _BlockQuoteRaw end while true - _save9 = self.pos + _save13 = self.pos while true # sequence + _tmp = match_string(">") + unless _tmp + self.pos = _save13 + break + end _tmp = _BlankLine() n = @result unless _tmp - self.pos = _save9 + self.pos = _save13 break end @result = begin; a << n ; end _tmp = true unless _tmp - self.pos = _save9 + self.pos = _save13 end break end # end sequence @@ -1769,65 +1806,97 @@ def _BlockQuoteRaw if _tmp while true - _save10 = self.pos + _save14 = self.pos while true # sequence _tmp = match_string(">") unless _tmp - self.pos = _save10 + self.pos = _save14 break end - _save11 = self.pos + _save15 = self.pos _tmp = match_string(" ") unless _tmp _tmp = true - self.pos = _save11 + self.pos = _save15 end unless _tmp - self.pos = _save10 + self.pos = _save14 break end _tmp = apply(:_Line) l = @result unless _tmp - self.pos = _save10 + self.pos = _save14 break end @result = begin; a << l ; end _tmp = true unless _tmp - self.pos = _save10 + self.pos = _save14 break end while true - _save13 = self.pos + _save17 = self.pos while true # sequence - _save14 = self.pos + _save18 = self.pos _tmp = match_string(">") _tmp = _tmp ? nil : true - self.pos = _save14 + self.pos = _save18 unless _tmp - self.pos = _save13 + self.pos = _save17 break end - _save15 = self.pos + _save19 = self.pos _tmp = _BlankLine() _tmp = _tmp ? nil : true - self.pos = _save15 + self.pos = _save19 + unless _tmp + self.pos = _save17 + break + end + _save20 = self.pos + _tmp = apply(:_AtxStart) + _tmp = _tmp ? nil : true + self.pos = _save20 + unless _tmp + self.pos = _save17 + break + end + _save21 = self.pos + _tmp = apply(:_Bullet) + _tmp = _tmp ? nil : true + self.pos = _save21 + unless _tmp + self.pos = _save17 + break + end + _save22 = self.pos + _tmp = apply(:_Enumerator) + _tmp = _tmp ? nil : true + self.pos = _save22 + unless _tmp + self.pos = _save17 + break + end + _save23 = self.pos + _tmp = apply(:_Ticks3) + _tmp = _tmp ? nil : true + self.pos = _save23 unless _tmp - self.pos = _save13 + self.pos = _save17 break end _tmp = apply(:_Line) c = @result unless _tmp - self.pos = _save13 + self.pos = _save17 break end @result = begin; a << c ; end _tmp = true unless _tmp - self.pos = _save13 + self.pos = _save17 end break end # end sequence @@ -1836,23 +1905,28 @@ def _BlockQuoteRaw end _tmp = true unless _tmp - self.pos = _save10 + self.pos = _save14 break end while true - _save17 = self.pos + _save25 = self.pos while true # sequence + _tmp = match_string(">") + unless _tmp + self.pos = _save25 + break + end _tmp = _BlankLine() n = @result unless _tmp - self.pos = _save17 + self.pos = _save25 break end @result = begin; a << n ; end _tmp = true unless _tmp - self.pos = _save17 + self.pos = _save25 end break end # end sequence @@ -1861,7 +1935,7 @@ def _BlockQuoteRaw end _tmp = true unless _tmp - self.pos = _save10 + self.pos = _save14 end break end # end sequence @@ -16457,7 +16531,7 @@ def _DefinitionListDefinition Rules[:_SetextHeading2] = rule_info("SetextHeading2", "&(@RawLine SetextBottom2) @StartList:a (!@Endline Inline:b { a << b })+ @Sp @Newline SetextBottom2 { RDoc::Markup::Heading.new(2, a.join) }") Rules[:_Heading] = rule_info("Heading", "(SetextHeading | AtxHeading)") Rules[:_BlockQuote] = rule_info("BlockQuote", "BlockQuoteRaw:a { RDoc::Markup::BlockQuote.new(*a) }") - Rules[:_BlockQuoteRaw] = rule_info("BlockQuoteRaw", "@StartList:a (\">\" \" \"? Line:l { a << l } (!\">\" !@BlankLine Line:c { a << c })* (@BlankLine:n { a << n })*)+ { inner_parse a.join }") + Rules[:_BlockQuoteRaw] = rule_info("BlockQuoteRaw", "@StartList:a (\">\" \" \"? Line:l { a << l } (!\">\" !@BlankLine !AtxStart !Bullet !Enumerator !Ticks3 Line:c { a << c })* (\">\" @BlankLine:n { a << n })*)+ { inner_parse a.join }") Rules[:_NonblankIndentedLine] = rule_info("NonblankIndentedLine", "!@BlankLine IndentedLine") Rules[:_VerbatimChunk] = rule_info("VerbatimChunk", "@BlankLine*:a NonblankIndentedLine+:b { a.concat b }") Rules[:_Verbatim] = rule_info("Verbatim", "VerbatimChunk+:a { RDoc::Markup::Verbatim.new(*a.flatten) }") diff --git a/test/rdoc/rdoc_markdown_test.rb b/test/rdoc/rdoc_markdown_test.rb index 608974d7c9..fe7b01c5ee 100644 --- a/test/rdoc/rdoc_markdown_test.rb +++ b/test/rdoc/rdoc_markdown_test.rb @@ -120,12 +120,77 @@ def test_parse_block_quote_separate expected = doc( block( - para("this is\na block quote"), + para("this is\na block quote")), + block( para("that continues"))) assert_equal expected, doc end + def test_parse_block_quote_no_lazy_continuation_for_list + doc = parse <<-BLOCK_QUOTE +> foo +- bar + BLOCK_QUOTE + + expected = + doc( + block( + para("foo")), + list(:BULLET, + item(nil, para("bar")))) + + assert_equal expected, doc + end + + def test_parse_block_quote_no_lazy_continuation_for_ordered_list + doc = parse <<-BLOCK_QUOTE +> foo +1. bar + BLOCK_QUOTE + + expected = + doc( + block( + para("foo")), + list(:NUMBER, + item(nil, para("bar")))) + + assert_equal expected, doc + end + + def test_parse_block_quote_no_lazy_continuation_for_heading + doc = parse <<-BLOCK_QUOTE +> foo +# bar + BLOCK_QUOTE + + expected = + doc( + block( + para("foo")), + head(1, "bar")) + + assert_equal expected, doc + end + + def test_parse_block_quote_no_lazy_continuation_for_code_fence + doc = parse <<~BLOCK_QUOTE + > foo + ``` + code + ``` + BLOCK_QUOTE + + expected = + doc( + block( + para("foo")), + verb("code\n")) + + assert_equal expected, doc + end + def test_parse_char_entity doc = parse 'π &nn;'