From ea731d203b9f9a80d061cfca70d92a6bdacb2fc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89amonn=20McManus?= Date: Mon, 27 Apr 2026 15:26:08 -0700 Subject: [PATCH] Add support for Markdown tables. This is very basic, and the main intent is just to avoid mangling tables. We essentially preserve the formatting of any table we find, without attempting to adjust it in any way. PiperOrigin-RevId: 906574344 --- core/pom.xml | 15 ++++--- .../java/javadoc/JavadocFormatter.java | 2 + .../java/javadoc/JavadocWriter.java | 16 ++++++++ .../java/javadoc/MarkdownPositions.java | 18 +++++++- .../googlejavaformat/java/javadoc/Token.java | 14 +++++++ .../java/JavadocFormattingTest.java | 41 +++++++++---------- 6 files changed, 78 insertions(+), 28 deletions(-) diff --git a/core/pom.xml b/core/pom.xml index d3235cb18..273e314fb 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -39,11 +39,6 @@ com.google.guava guava - - org.commonmark - commonmark - 0.28.0 - @@ -66,6 +61,16 @@ auto-service-annotations true + + org.commonmark + commonmark + 0.28.0 + + + org.commonmark + commonmark-ext-gfm-tables + 0.28.0 + diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java index dbc2be841..f801a4d75 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java +++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java @@ -44,6 +44,7 @@ import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanStart; import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock; import com.google.googlejavaformat.java.javadoc.Token.MarkdownHardLineBreak; +import com.google.googlejavaformat.java.javadoc.Token.MarkdownTable; import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment; import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment; import com.google.googlejavaformat.java.javadoc.Token.OptionalLineBreak; @@ -137,6 +138,7 @@ private static String render(List input, int blockIndent, boolean classic case MarkdownHardLineBreak unused -> output.writeMarkdownHardLineBreak(); case Literal t -> output.writeLiteral(t); case MarkdownFencedCodeBlock t -> output.writeMarkdownFencedCodeBlock(t); + case MarkdownTable t -> output.writeMarkdownTable(t); case ListItemCloseTag unused -> {} case OptionalLineBreak unused -> {} case ParagraphCloseTag unused -> {} diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocWriter.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocWriter.java index 182762a7f..53f6f1a29 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocWriter.java +++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocWriter.java @@ -35,6 +35,7 @@ import com.google.googlejavaformat.java.javadoc.Token.ListOpenTag; import com.google.googlejavaformat.java.javadoc.Token.Literal; import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock; +import com.google.googlejavaformat.java.javadoc.Token.MarkdownTable; import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment; import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment; import com.google.googlejavaformat.java.javadoc.Token.PreCloseTag; @@ -44,6 +45,7 @@ import com.google.googlejavaformat.java.javadoc.Token.StartOfLineToken; import com.google.googlejavaformat.java.javadoc.Token.TableCloseTag; import com.google.googlejavaformat.java.javadoc.Token.TableOpenTag; +import java.util.List; /** * Stateful object that accepts "requests" and "writes," producing formatted Javadoc. @@ -354,6 +356,20 @@ void writeMarkdownFencedCodeBlock(MarkdownFencedCodeBlock token) { requestBlankLine(); } + void writeMarkdownTable(MarkdownTable token) { + if (wroteAnythingSignificant && !atStartOfLine) { + requestBlankLine(); + } + flushWhitespace(); + List lines = token.value().lines().toList(); + output.append(lines.get(0)); + for (String line : lines.subList(1, lines.size())) { + writeNewline(AutoIndent.NO_AUTO_INDENT); + output.append(line); + } + requestBlankLine(); + } + @Override public String toString() { return output.toString(); diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/MarkdownPositions.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/MarkdownPositions.java index d5fdde5d9..0bbb56fee 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/MarkdownPositions.java +++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/MarkdownPositions.java @@ -28,11 +28,14 @@ import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanEnd; import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanStart; import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock; +import com.google.googlejavaformat.java.javadoc.Token.MarkdownTable; import com.google.googlejavaformat.java.javadoc.Token.ParagraphCloseTag; import com.google.googlejavaformat.java.javadoc.Token.ParagraphOpenTag; import java.util.Objects; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.commonmark.ext.gfm.tables.TableBlock; +import org.commonmark.ext.gfm.tables.TablesExtension; import org.commonmark.node.BulletList; import org.commonmark.node.Code; import org.commonmark.node.FencedCodeBlock; @@ -93,6 +96,10 @@ void visit(Node node) { case OrderedList orderedList -> addSpan(orderedList, LIST_OPEN_TOKEN, LIST_CLOSE_TOKEN); case ListItem listItem -> alreadyVisitedChildren = visitListItem(listItem); case FencedCodeBlock fencedCodeBlock -> visitFencedCodeBlock(fencedCodeBlock); + case TableBlock tableBlock -> { + visitTableBlock(tableBlock); + alreadyVisitedChildren = true; + } case Code code -> visitCodeSpan(code); // TODO: others default -> {} @@ -143,6 +150,12 @@ private void visitFencedCodeBlock(FencedCodeBlock fencedCodeBlock) { positionToToken.get(start).addLast(token); } + private void visitTableBlock(TableBlock tableBlock) { + int start = startPosition(tableBlock); + int end = endPosition(tableBlock); + positionToToken.get(start).addLast(new MarkdownTable(input.substring(start, end))); + } + private void visitCodeSpan(Code code) { int start = startPosition(code); int end = endPosition(code); @@ -200,7 +213,10 @@ public String toString() { } private static final Parser PARSER = - Parser.builder().includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES).build(); + Parser.builder() + .includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES) + .extensions(ImmutableList.of(TablesExtension.create())) + .build(); private static final HeaderOpenTag HEADER_OPEN_TOKEN = new HeaderOpenTag(""); private static final HeaderCloseTag HEADER_CLOSE_TOKEN = new HeaderCloseTag(""); diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java index 42b73b069..caa35d3fa 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java +++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java @@ -140,6 +140,20 @@ record MarkdownCodeSpanEnd(String value) implements Token {} record MarkdownFencedCodeBlock(String value, String start, String end, String literal) implements Token {} + /** + * A Markdown table, like: + * + * {@snippet : + * | foo | bar | + * | --- | --- | + * | baz | qux | + * } + * + * @param value the full text of the table as it appeared in the input, including the delimiters + * and the literal content. + */ + record MarkdownTable(String value) implements Token {} + /** * Whitespace that is not in a {@code
} or {@code } section. Whitespace includes
    * leading newlines, asterisks, and tabs and spaces. In the output, it is translated to newlines
diff --git a/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java b/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java
index 02af3d137..5f2f12526 100644
--- a/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java
+++ b/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java
@@ -1516,15 +1516,6 @@ class Test {}
     doFormatTest(input, expected);
   }
 
-  private void doFormatTest(String input, String expected) {
-    try {
-      String actual = formatter.formatSource(input);
-      assertThat(actual).isEqualTo(expected);
-    } catch (FormatterException e) {
-      throw new AssertionError(e);
-    }
-  }
-
   @Test
   public void windowsLineSeparator() throws FormatterException {
     String input =
@@ -2016,6 +2007,8 @@ public void markdownTables() {
     assume().that(MARKDOWN_JAVADOC_SUPPORTED).isTrue();
     String input =
 """
+/// Table McTableface
+///
 /// | foo | bar |
 /// | --- | --- |
 /// | baz | qux |
@@ -2023,18 +2016,28 @@ public void markdownTables() {
 /// - |foo|bar|
 ///   |--:|:--|
 ///   |baz|qux|
+///
+/// - Another list.
+///
+///   | which | contains |
+///   | ----- | -------- |
+///   | a | table |
 class Test {}
 """;
-    // TODO: unmangle the tables
-    String expected =
-"""
-/// | foo | bar | | --- | --- | | baz | qux |
-/// - |foo|bar| |--:|:--| |baz|qux|
-class Test {}
-""";
+    // We don't currently try to align the column markers in the rows of the last table.
+    String expected = input;
     doFormatTest(input, expected);
   }
 
+  private void doFormatTest(String input, String expected) {
+    try {
+      String actual = formatter.formatSource(input);
+      assertThat(actual).isEqualTo(expected);
+    } catch (FormatterException e) {
+      throw new AssertionError(e);
+    }
+  }
+
   // TODO: b/346668798 - Test the following Markdown constructs, and make the tests work as needed.
   // We can assume that the CommonMark parser correctly handles Markdown, so the question is whether
   // they are subsequently mishandled by our formatting logic. So for example the CommonMark parser
@@ -2074,10 +2077,4 @@ class Test {}
   //
   // - Autolinks
   //    should be preserved. https://spec.commonmark.org/0.31.2/#autolink
-  //
-  // - Tables
-  //   | foo | bar |
-  //   | --- | --- |
-  //   | baz | qux |
-  //   Probably we should just try not to mangle them. https://spec.commonmark.org/0.31.2/#tables
 }