From bb57a5f0921ab7e46f44128c875720617452564a Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Fri, 20 Jun 2025 21:55:30 +1000 Subject: [PATCH] Parse tables without blank line before GitHub doesn't mention it in their spec but it does work, e.g.: text |a| |---| |b| Parses as a paragraph with "text", and a table. --- CHANGELOG.md | 2 + .../gfm/tables/internal/TableBlockParser.java | 6 +- .../commonmark/ext/gfm/tables/TablesTest.java | 58 +++++++- .../commonmark/internal/BlockStartImpl.java | 13 ++ .../commonmark/internal/DocumentParser.java | 41 +++--- .../commonmark/internal/HeadingParser.java | 2 +- .../LinkReferenceDefinitionParser.java | 19 +++ .../commonmark/internal/ParagraphParser.java | 4 + .../commonmark/parser/block/BlockStart.java | 41 ++++++ .../parser/block/MatchedBlockParser.java | 3 +- .../test/BlockParserFactoryTest.java | 127 ++++++++++++++++++ .../java/org/commonmark/test/ParserTest.java | 41 ------ 12 files changed, 286 insertions(+), 71 deletions(-) create mode 100644 commonmark/src/test/java/org/commonmark/test/BlockParserFactoryTest.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 0808edb72..7ae5ffed2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ with the exception that 0.x versions can break between minor versions. ### Added - More documentation with examples for `Node` classes ### Changed +- GitHub tables: Tables are now parsed even if there's no blank line before the + table heading, matching GitHub's behavior. ### Fixed - `MarkdownRenderer`: Fix precedence for `nodeRendererFactory`: Factories passed to the builder can now override rendering for core node types. diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java index 0dfdd3159..57af128d8 100644 --- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java @@ -274,17 +274,17 @@ public static class Factory extends AbstractBlockParserFactory { @Override public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser) { List paragraphLines = matchedBlockParser.getParagraphLines().getLines(); - if (paragraphLines.size() == 1 && Characters.find('|', paragraphLines.get(0).getContent(), 0) != -1) { + if (paragraphLines.size() >= 1 && Characters.find('|', paragraphLines.get(paragraphLines.size() - 1).getContent(), 0) != -1) { SourceLine line = state.getLine(); SourceLine separatorLine = line.substring(state.getIndex(), line.getContent().length()); List columns = parseSeparator(separatorLine.getContent()); if (columns != null && !columns.isEmpty()) { - SourceLine paragraph = paragraphLines.get(0); + SourceLine paragraph = paragraphLines.get(paragraphLines.size() - 1); List headerCells = split(paragraph); if (columns.size() >= headerCells.size()) { return BlockStart.of(new TableBlockParser(columns, paragraph)) .atIndex(state.getIndex()) - .replaceActiveBlockParser(); + .replaceParagraphLines(1); } } } diff --git a/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesTest.java b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesTest.java index 3135b4d8e..3f4b37d54 100644 --- a/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesTest.java +++ b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesTest.java @@ -1,8 +1,7 @@ package org.commonmark.ext.gfm.tables; import org.commonmark.Extension; -import org.commonmark.node.Node; -import org.commonmark.node.SourceSpan; +import org.commonmark.node.*; import org.commonmark.parser.IncludeSourceSpans; import org.commonmark.parser.Parser; import org.commonmark.renderer.html.AttributeProvider; @@ -79,11 +78,6 @@ public void separatorNeedsPipes() { assertRendering("Abc|Def\n|--- ---", "

Abc|Def\n|--- ---

\n"); } - @Test - public void headerMustBeOneLine() { - assertRendering("No\nAbc|Def\n---|---", "

No\nAbc|Def\n---|---

\n"); - } - @Test public void oneHeadNoBody() { assertRendering("Abc|Def\n---|---", "\n" + @@ -702,6 +696,26 @@ public void danglingPipe() { "

|

\n"); } + @Test + public void interruptsParagraph() { + assertRendering("text\n" + + "|a |\n" + + "|---|\n" + + "|b |", "

text

\n" + + "
\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "
a
b
\n"); + } + @Test public void attributeProviderIsApplied() { AttributeProviderFactory factory = new AttributeProviderFactory() { @@ -835,6 +849,36 @@ public void sourceSpans() { assertThat(bodyRow3Cell2.getSourceSpans()).isEqualTo(List.of()); } + @Test + public void sourceSpansWhenInterrupting() { + var parser = Parser.builder() + .extensions(EXTENSIONS) + .includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES) + .build(); + var document = parser.parse("a\n" + + "bc\n" + + "|de|\n" + + "|---|\n" + + "|fg|"); + + var paragraph = (Paragraph) document.getFirstChild(); + var text = (Text) paragraph.getFirstChild(); + assertThat(text.getLiteral()).isEqualTo("a"); + assertThat(text.getNext()).isInstanceOf(SoftLineBreak.class); + var text2 = (Text) text.getNext().getNext(); + assertThat(text2.getLiteral()).isEqualTo("bc"); + + assertThat(paragraph.getSourceSpans()).isEqualTo(List.of( + SourceSpan.of(0, 0, 0, 1), + SourceSpan.of(1, 0, 2, 2))); + + var table = (TableBlock) document.getLastChild(); + assertThat(table.getSourceSpans()).isEqualTo(List.of( + SourceSpan.of(2, 0, 5, 4), + SourceSpan.of(3, 0, 10, 5), + SourceSpan.of(4, 0, 16, 4))); + } + @Override protected String render(String source) { return RENDERER.render(PARSER.parse(source)); diff --git a/commonmark/src/main/java/org/commonmark/internal/BlockStartImpl.java b/commonmark/src/main/java/org/commonmark/internal/BlockStartImpl.java index c7e967d46..516f944b2 100644 --- a/commonmark/src/main/java/org/commonmark/internal/BlockStartImpl.java +++ b/commonmark/src/main/java/org/commonmark/internal/BlockStartImpl.java @@ -9,6 +9,7 @@ public class BlockStartImpl extends BlockStart { private int newIndex = -1; private int newColumn = -1; private boolean replaceActiveBlockParser = false; + private int replaceParagraphLines = 0; public BlockStartImpl(BlockParser... blockParsers) { this.blockParsers = blockParsers; @@ -30,6 +31,10 @@ public boolean isReplaceActiveBlockParser() { return replaceActiveBlockParser; } + int getReplaceParagraphLines() { + return replaceParagraphLines; + } + @Override public BlockStart atIndex(int newIndex) { this.newIndex = newIndex; @@ -48,4 +53,12 @@ public BlockStart replaceActiveBlockParser() { return this; } + @Override + public BlockStart replaceParagraphLines(int lines) { + if (!(lines >= 1)) { + throw new IllegalArgumentException("Lines must be >= 1"); + } + this.replaceParagraphLines = lines; + return this; + } } diff --git a/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java b/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java index 6059cc51c..d935f8d27 100644 --- a/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java @@ -270,9 +270,15 @@ private void parseLine(String ln, int inputIndex) { } List replacedSourceSpans = null; - if (blockStart.isReplaceActiveBlockParser()) { - Block replacedBlock = prepareActiveBlockParserForReplacement(); - replacedSourceSpans = replacedBlock.getSourceSpans(); + if (blockStart.getReplaceParagraphLines() >= 1 || blockStart.isReplaceActiveBlockParser()) { + var activeBlockParser = getActiveBlockParser(); + if (activeBlockParser instanceof ParagraphParser) { + var paragraphParser = (ParagraphParser) activeBlockParser; + var lines = blockStart.isReplaceActiveBlockParser() ? Integer.MAX_VALUE : blockStart.getReplaceParagraphLines(); + replacedSourceSpans = replaceParagraphLines(lines, paragraphParser); + } else if (blockStart.isReplaceActiveBlockParser()) { + replacedSourceSpans = prepareActiveBlockParserForReplacement(activeBlockParser); + } } for (BlockParser newBlockParser : blockStart.getBlockParsers()) { @@ -498,24 +504,23 @@ private OpenBlockParser deactivateBlockParser() { return openBlockParsers.remove(openBlockParsers.size() - 1); } - private Block prepareActiveBlockParserForReplacement() { - // Note that we don't want to parse inlines, as it's getting replaced. - BlockParser old = deactivateBlockParser().blockParser; + private List replaceParagraphLines(int lines, ParagraphParser paragraphParser) { + // Remove lines from paragraph as the new block is using them. + // If all lines are used, this also unlinks the Paragraph block. + var sourceSpans = paragraphParser.removeLines(lines); + // Close the paragraph block parser, which will finalize it. + closeBlockParsers(1); + return sourceSpans; + } - if (old instanceof ParagraphParser) { - ParagraphParser paragraphParser = (ParagraphParser) old; - // Collect any link reference definitions. Note that replacing the active block parser is done after a - // block parser got the current paragraph content using MatchedBlockParser#getContentString. In case the - // paragraph started with link reference definitions, we parse and strip them before the block parser gets - // the content. We want to keep them. - // If no replacement happens, we collect the definitions as part of finalizing blocks. - addDefinitionsFrom(paragraphParser); - } + private List prepareActiveBlockParserForReplacement(BlockParser blockParser) { + // Note that we don't want to parse inlines here, as it's getting replaced. + deactivateBlockParser(); // Do this so that source positions are calculated, which we will carry over to the replacing block. - old.closeBlock(); - old.getBlock().unlink(); - return old.getBlock(); + blockParser.closeBlock(); + blockParser.getBlock().unlink(); + return blockParser.getBlock().getSourceSpans(); } private Document finalizeAndProcess() { diff --git a/commonmark/src/main/java/org/commonmark/internal/HeadingParser.java b/commonmark/src/main/java/org/commonmark/internal/HeadingParser.java index 3bc9ba5c4..05f070137 100644 --- a/commonmark/src/main/java/org/commonmark/internal/HeadingParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/HeadingParser.java @@ -60,7 +60,7 @@ public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockPar if (!paragraph.isEmpty()) { return BlockStart.of(new HeadingParser(setextHeadingLevel, paragraph)) .atIndex(line.getContent().length()) - .replaceActiveBlockParser(); + .replaceParagraphLines(paragraph.getLines().size()); } } diff --git a/commonmark/src/main/java/org/commonmark/internal/LinkReferenceDefinitionParser.java b/commonmark/src/main/java/org/commonmark/internal/LinkReferenceDefinitionParser.java index b58e669ef..637d3b111 100644 --- a/commonmark/src/main/java/org/commonmark/internal/LinkReferenceDefinitionParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/LinkReferenceDefinitionParser.java @@ -10,6 +10,7 @@ import org.commonmark.parser.beta.Scanner; import java.util.ArrayList; +import java.util.Collections; import java.util.List; /** @@ -102,6 +103,14 @@ State getState() { return state; } + List removeLines(int lines) { + var removedSpans = Collections.unmodifiableList(new ArrayList<>( + sourceSpans.subList(Math.max(sourceSpans.size() - lines, 0), sourceSpans.size()))); + removeLast(lines, paragraphLines); + removeLast(lines, sourceSpans); + return removedSpans; + } + private boolean startDefinition(Scanner scanner) { // Finish any outstanding references now. We don't do this earlier because we need addSourceSpan to have been // called before we do it. @@ -269,6 +278,16 @@ private void finishReference() { title = null; } + private static void removeLast(int n, List list) { + if (n >= list.size()) { + list.clear(); + } else { + for (int i = 0; i < n; i++) { + list.remove(list.size() - 1); + } + } + } + enum State { // Looking for the start of a definition, i.e. `[` START_DEFINITION, diff --git a/commonmark/src/main/java/org/commonmark/internal/ParagraphParser.java b/commonmark/src/main/java/org/commonmark/internal/ParagraphParser.java index 93a2dd593..27eb1e647 100644 --- a/commonmark/src/main/java/org/commonmark/internal/ParagraphParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/ParagraphParser.java @@ -79,4 +79,8 @@ public void parseInlines(InlineParser inlineParser) { public SourceLines getParagraphLines() { return linkReferenceDefinitionParser.getParagraphLines(); } + + public List removeLines(int lines) { + return linkReferenceDefinitionParser.removeLines(lines); + } } diff --git a/commonmark/src/main/java/org/commonmark/parser/block/BlockStart.java b/commonmark/src/main/java/org/commonmark/parser/block/BlockStart.java index d9e7a2b49..c41f1caa3 100644 --- a/commonmark/src/main/java/org/commonmark/parser/block/BlockStart.java +++ b/commonmark/src/main/java/org/commonmark/parser/block/BlockStart.java @@ -10,18 +10,59 @@ public abstract class BlockStart { protected BlockStart() { } + /** + * Result for when there is no block start. + */ public static BlockStart none() { return null; } + /** + * Start block(s) with the specified parser(s). + */ public static BlockStart of(BlockParser... blockParsers) { return new BlockStartImpl(blockParsers); } + /** + * Continue parsing at the specified index. + * + * @param newIndex the new index, see {@link ParserState#getIndex()} + */ public abstract BlockStart atIndex(int newIndex); + /** + * Continue parsing at the specified column (for tab handling). + * + * @param newColumn the new column, see {@link ParserState#getColumn()} + */ public abstract BlockStart atColumn(int newColumn); + /** + * @deprecated use {@link #replaceParagraphLines(int)} instead; please raise an issue if that doesn't work for you + * for some reason. + */ + @Deprecated public abstract BlockStart replaceActiveBlockParser(); + /** + * Replace a number of lines from the current paragraph (as returned by + * {@link MatchedBlockParser#getParagraphLines()}) with the new block. + *

+ * This is useful for parsing blocks that start with normal paragraphs and only have special marker syntax in later + * lines, e.g. in this: + *

+     * Foo
+     * ===
+     * 
+ * The Foo line is initially parsed as a normal paragraph, then === is parsed as a heading + * marker, replacing the 1 paragraph line before. The end result is a single Heading block. + *

+ * Note that source spans from the replaced lines are automatically added to the new block. + * + * @param lines the number of lines to replace (at least 1); use {@link Integer#MAX_VALUE} to replace the whole + * paragraph + */ + public abstract BlockStart replaceParagraphLines(int lines); + } diff --git a/commonmark/src/main/java/org/commonmark/parser/block/MatchedBlockParser.java b/commonmark/src/main/java/org/commonmark/parser/block/MatchedBlockParser.java index 1f2bcfb2a..c4619d8c2 100644 --- a/commonmark/src/main/java/org/commonmark/parser/block/MatchedBlockParser.java +++ b/commonmark/src/main/java/org/commonmark/parser/block/MatchedBlockParser.java @@ -12,7 +12,8 @@ public interface MatchedBlockParser { BlockParser getMatchedBlockParser(); /** - * Returns the current paragraph lines if the matched block is a paragraph. + * Returns the current paragraph lines if the matched block is a paragraph. If you want to use some or all of the + * lines for starting a new block instead, use {@link BlockStart#replaceParagraphLines(int)}. * * @return paragraph content or an empty list */ diff --git a/commonmark/src/test/java/org/commonmark/test/BlockParserFactoryTest.java b/commonmark/src/test/java/org/commonmark/test/BlockParserFactoryTest.java new file mode 100644 index 000000000..b733d7970 --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/test/BlockParserFactoryTest.java @@ -0,0 +1,127 @@ +package org.commonmark.test; + +import org.commonmark.node.*; +import org.commonmark.parser.IncludeSourceSpans; +import org.commonmark.parser.InlineParser; +import org.commonmark.parser.Parser; +import org.commonmark.parser.SourceLines; +import org.commonmark.parser.block.*; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +public class BlockParserFactoryTest { + + @Test + public void customBlockParserFactory() { + var parser = Parser.builder().customBlockParserFactory(new DashBlockParser.Factory()).build(); + + // The dashes would normally be a ThematicBreak + var doc = parser.parse("hey\n\n---\n"); + + assertThat(doc.getFirstChild()).isInstanceOf(Paragraph.class); + assertThat(((Text) doc.getFirstChild().getFirstChild()).getLiteral()).isEqualTo("hey"); + assertThat(doc.getLastChild()).isInstanceOf(DashBlock.class); + } + + @Test + public void replaceActiveBlockParser() { + var parser = Parser.builder() + .customBlockParserFactory(new StarHeadingBlockParser.Factory()) + .includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES) + .build(); + + var doc = parser.parse("a\nbc\n***\n"); + + var heading = doc.getFirstChild(); + assertThat(heading).isInstanceOf(StarHeading.class); + assertThat(heading.getNext()).isNull(); + var a = heading.getFirstChild(); + assertThat(a).isInstanceOf(Text.class); + assertThat(((Text) a).getLiteral()).isEqualTo("a"); + var bc = a.getNext().getNext(); + assertThat(bc).isInstanceOf(Text.class); + assertThat(((Text) bc).getLiteral()).isEqualTo("bc"); + assertThat(bc.getNext()).isNull(); + + assertThat(heading.getSourceSpans()).isEqualTo(List.of( + SourceSpan.of(0, 0, 0, 1), + SourceSpan.of(1, 0, 2, 2), + SourceSpan.of(2, 0, 5, 3))); + assertThat(a.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 1))); + assertThat(bc.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(1, 0, 2, 2))); + } + + private static class DashBlock extends CustomBlock { + } + + private static class DashBlockParser extends AbstractBlockParser { + + private DashBlock dash = new DashBlock(); + + @Override + public Block getBlock() { + return dash; + } + + @Override + public BlockContinue tryContinue(ParserState parserState) { + return BlockContinue.none(); + } + + static class Factory extends AbstractBlockParserFactory { + + @Override + public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser) { + if (state.getLine().getContent().equals("---")) { + return BlockStart.of(new DashBlockParser()); + } + return BlockStart.none(); + } + } + } + + private static class StarHeading extends CustomBlock { + } + + private static class StarHeadingBlockParser extends AbstractBlockParser { + + private final SourceLines content; + private final StarHeading heading = new StarHeading(); + + StarHeadingBlockParser(SourceLines content) { + this.content = content; + } + + @Override + public Block getBlock() { + return heading; + } + + @Override + public BlockContinue tryContinue(ParserState parserState) { + return BlockContinue.none(); + } + + @Override + public void parseInlines(InlineParser inlineParser) { + inlineParser.parse(content, heading); + } + + static class Factory extends AbstractBlockParserFactory { + + @Override + public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser) { + var lines = matchedBlockParser.getParagraphLines(); + if (state.getLine().getContent().toString().startsWith("***")) { + return BlockStart.of(new StarHeadingBlockParser(lines)) + .replaceActiveBlockParser(); + } else { + return BlockStart.none(); + } + } + } + } +} diff --git a/commonmark/src/test/java/org/commonmark/test/ParserTest.java b/commonmark/src/test/java/org/commonmark/test/ParserTest.java index 3b9ef09e9..c119b5e2d 100644 --- a/commonmark/src/test/java/org/commonmark/test/ParserTest.java +++ b/commonmark/src/test/java/org/commonmark/test/ParserTest.java @@ -42,18 +42,6 @@ public void ioReaderTest() throws IOException { assertThat(renderer.render(document1)).isEqualTo(renderer.render(document2)); } - @Test - public void customBlockParserFactory() { - Parser parser = Parser.builder().customBlockParserFactory(new DashBlockParserFactory()).build(); - - // The dashes would normally be a ThematicBreak - Node document = parser.parse("hey\n\n---\n"); - - assertThat(document.getFirstChild()).isInstanceOf(Paragraph.class); - assertThat(((Text) document.getFirstChild().getFirstChild()).getLiteral()).isEqualTo("hey"); - assertThat(document.getLastChild()).isInstanceOf(DashBlock.class); - } - @Test public void enabledBlockTypes() { String given = "# heading 1\n\nnot a heading"; @@ -154,33 +142,4 @@ private String firstText(Node n) { } return ((Text) n).getLiteral(); } - - private static class DashBlock extends CustomBlock { - } - - private static class DashBlockParser extends AbstractBlockParser { - - private DashBlock dash = new DashBlock(); - - @Override - public Block getBlock() { - return dash; - } - - @Override - public BlockContinue tryContinue(ParserState parserState) { - return BlockContinue.none(); - } - } - - private static class DashBlockParserFactory extends AbstractBlockParserFactory { - - @Override - public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser) { - if (state.getLine().getContent().equals("---")) { - return BlockStart.of(new DashBlockParser()); - } - return BlockStart.none(); - } - } }