diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java index bb39a0f22..649669030 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java +++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java @@ -40,6 +40,8 @@ import com.google.googlejavaformat.java.javadoc.Token.ListItemOpenTag; import com.google.googlejavaformat.java.javadoc.Token.ListOpenTag; import com.google.googlejavaformat.java.javadoc.Token.Literal; +import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanEnd; +import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanStart; import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock; import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment; import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment; @@ -132,10 +134,12 @@ private static String render(List input, int blockIndent, boolean classic case Whitespace unused -> output.requestWhitespace(); case ForcedNewline unused -> output.writeLineBreakNoAutoIndent(); case Literal t -> output.writeLiteral(t); - case ParagraphCloseTag unused -> {} + case MarkdownFencedCodeBlock t -> output.writeMarkdownFencedCodeBlock(t); case ListItemCloseTag unused -> {} case OptionalLineBreak unused -> {} - case MarkdownFencedCodeBlock t -> output.writeMarkdownFencedCodeBlock(t); + case ParagraphCloseTag unused -> {} + case MarkdownCodeSpanStart unused -> {} + case MarkdownCodeSpanEnd unused -> {} } } throw new AssertionError(); diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java index f0a794b3f..634b55e06 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java +++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java @@ -44,6 +44,8 @@ import com.google.googlejavaformat.java.javadoc.Token.ListItemOpenTag; import com.google.googlejavaformat.java.javadoc.Token.ListOpenTag; import com.google.googlejavaformat.java.javadoc.Token.Literal; +import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanEnd; +import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanStart; import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment; import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment; import com.google.googlejavaformat.java.javadoc.Token.OptionalLineBreak; @@ -67,6 +69,7 @@ final class JavadocLexer { /** Takes a Javadoc comment, including ∕✱✱ and ✱∕, and returns tokens, including ∕✱✱ and ✱∕. */ static ImmutableList lex(String input, boolean classicJavadoc) throws LexException { + input = normalizeLineEndings(input); MarkdownPositions markdownPositions; if (classicJavadoc) { /* @@ -81,7 +84,6 @@ static ImmutableList lex(String input, boolean classicJavadoc) throws Lex input = input.substring("///".length()); markdownPositions = MarkdownPositions.parse(input); } - input = normalizeLineEndings(input); return new JavadocLexer(new CharStream(input), markdownPositions, classicJavadoc) .generateTokens(); } @@ -116,6 +118,9 @@ enum NestingContext { /** {@code ...}. */ HTML_CODE_CONTEXT, + /** Markdown {@code `...`}. */ + MARKDOWN_CODE_CONTEXT, + /** {@code ...
}. */ TABLE, @@ -161,6 +166,21 @@ private ImmutableList generateTokens() throws LexException { // assumed that there are no other tokens (markdown or otherwise) in a non-empty text span // covered by a markdown token. for (Token markdownToken : markdownPositions.tokensAt(input.position())) { + // For `...`, we switch to MARKDOWN_CODE_CONTEXT for the duration of the span, and we + // change the start or end token to a Literal so it will get joined to adjacent Literal + // tokens. That prevents line breaks adjacent to the backticks in "foo`bar`baz", but still + // allows them at the spaces in "foo `bar` baz" or "foo` bar `baz". + switch (markdownToken) { + case MarkdownCodeSpanStart unused -> { + contextStack.push(NestingContext.MARKDOWN_CODE_CONTEXT); + markdownToken = new Literal(markdownToken.value()); + } + case MarkdownCodeSpanEnd unused -> { + contextStack.popUntil(NestingContext.MARKDOWN_CODE_CONTEXT); + markdownToken = new Literal(markdownToken.value()); + } + default -> {} + } tokens.add(markdownToken); if (!markdownToken.value().isEmpty()) { boolean consumed = input.tryConsume(markdownToken.value()); @@ -211,6 +231,17 @@ private Function consumeToken() throws LexException { return preserveExistingFormatting ? Literal::new : Whitespace::new; } + if (contextStack.contains(NestingContext.MARKDOWN_CODE_CONTEXT)) { + // Consume one or more characters. We know the first character isn't a newline or space + // because we've eliminated those possibilities, and it can't be the end of the `...` span + // either because that would have caused us to pop MARKDOWN_CODE_CONTEXT from the stack. The + // remaining characters being matched *could* be those things, so the regex stops at + // whitespace or a backtick. The *first* character could be a backtick, in constructs like + // `` `foo` ``, where the backticks adjacent to "foo" are part of the text of the code span. + verify(input.tryConsumeRegex(WORD_IN_CODE_SPAN_PATTERN)); + return Literal::new; + } + /* * TODO(cpovirk): Maybe try to detect things like "{@code\n@GwtCompatible}" that aren't intended * as tags. But in the most likely case, in which that happens inside
{@code, we have no
@@ -369,7 +400,7 @@ private static ImmutableList joinAdjacentLiteralsAndAdjacentWhitespace(Li
        * it into a tag.
        */
 
-      if (accumulated.length() == 0) {
+      if (accumulated.isEmpty()) {
         output.add(tokens.next());
         continue;
       }
@@ -389,7 +420,7 @@ private static ImmutableList joinAdjacentLiteralsAndAdjacentWhitespace(Li
       output.add(new Literal(accumulated.toString()));
       accumulated.setLength(0);
 
-      if (seenWhitespace.length() > 0) {
+      if (!seenWhitespace.isEmpty()) {
         output.add(new Whitespace(seenWhitespace.toString()));
       }
 
@@ -629,6 +660,7 @@ private static boolean hasMultipleNewlines(String s) {
   private static final Pattern BR_PATTERN = openTagPattern("br");
   private static final Pattern SNIPPET_TAG_OPEN_PATTERN = compile("[{]@snippet\\b");
   private static final Pattern INLINE_TAG_OPEN_PATTERN = compile("[{]@\\w*");
+  private static final Pattern WORD_IN_CODE_SPAN_PATTERN = compile(".[^ \t\n`]*");
 
   /*
    * We exclude < so that we don't swallow following HTML tags. This lets us fix up "foo

" (~400 diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/MarkdownPositions.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/MarkdownPositions.java index 82504d805..7f66b32e8 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/MarkdownPositions.java +++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/MarkdownPositions.java @@ -25,12 +25,15 @@ import com.google.googlejavaformat.java.javadoc.Token.ListItemCloseTag; import com.google.googlejavaformat.java.javadoc.Token.ListItemOpenTag; import com.google.googlejavaformat.java.javadoc.Token.ListOpenTag; +import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanEnd; +import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanStart; import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock; import com.google.googlejavaformat.java.javadoc.Token.ParagraphCloseTag; import com.google.googlejavaformat.java.javadoc.Token.ParagraphOpenTag; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.commonmark.node.BulletList; +import org.commonmark.node.Code; import org.commonmark.node.FencedCodeBlock; import org.commonmark.node.Heading; import org.commonmark.node.ListItem; @@ -89,6 +92,7 @@ void visit(Node node) { case OrderedList orderedList -> addSpan(orderedList, LIST_OPEN_TOKEN, LIST_CLOSE_TOKEN); case ListItem listItem -> alreadyVisitedChildren = visitListItem(listItem); case FencedCodeBlock fencedCodeBlock -> visitFencedCodeBlock(fencedCodeBlock); + case Code code -> visitCodeSpan(code); // TODO: others default -> {} } @@ -134,6 +138,22 @@ private void visitFencedCodeBlock(FencedCodeBlock fencedCodeBlock) { positionToToken.get(start).addLast(token); } + private void visitCodeSpan(Code code) { + int start = startPosition(code); + int end = endPosition(code); + int count; + for (count = 0; input.charAt(start + count) == '`'; count++) { + verify( + input.charAt(end - 1 - count) == '`', + "Mismatched backticks: %s", + input.substring(start, end)); + } + verify(count > 0, "Code span does not start with backticks: %s", input.substring(start, end)); + String backticks = "`".repeat(count); + positionToToken.get(start).addLast(new MarkdownCodeSpanStart(backticks)); + positionToToken.get(end - count).addFirst(new MarkdownCodeSpanEnd(backticks)); + } + /** * Visits the given node and the other nodes that are reachable from it via the {@link * Node#getNext()} references. Does nothing if {@code node} is null. diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java index 23bdd4b76..ffdf7916a 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java +++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java @@ -103,6 +103,21 @@ record HtmlComment(String value) implements Token {} record BrTag(String value) implements Token {} + /** + * A Markdown code span start, like the start of {@code `foo`} or {@code ``bar``}. + * + * @param value the start backtick string, one or more backtick characters. + */ + record MarkdownCodeSpanStart(String value) implements Token {} + + /** + * A Markdown code span end, like the end of {@code `foo`} or {@code ``bar``}. + * + * @param value the end backtick string, one or more backtick characters (the same number as the + * corresponding start backtick string). + */ + record MarkdownCodeSpanEnd(String value) implements Token {} + /** * A fenced code block, like: * diff --git a/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java b/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java index 72f1d784f..1eff32a85 100644 --- a/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java +++ b/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java @@ -1927,18 +1927,30 @@ class Test {} public void markdownCodeSpans() { assume().that(MARKDOWN_JAVADOC_SUPPORTED).isTrue(); String input = - """ - /// `

    ` should not trigger list handling. - class Test {} - """; - // TODO: the
      should not be recognized as a list, so `
        ` should be preserved. - // TODO: test that text with `...` is subject to line wrapping, including joining short lines. +""" +/// `
          ` should not trigger list handling. +/// +/// `This very long code line should eventually trigger line wrapping because newlines are allowed in code spans.` +/// +/// This other long line is carefully crafted to provoke a line break inside a double-backtick `` `
            ` `` code span. +/// +/// There should not be a line break immediately before or after a backtick in an example like this`and`that. +class Test {} +"""; String expected = - """ - /// `
              - /// ` should not trigger list handling. - class Test {} - """; +""" +/// `
                ` should not trigger list handling. +/// +/// `This very long code line should eventually trigger line wrapping because newlines are allowed +/// in code spans.` +/// +/// This other long line is carefully crafted to provoke a line break inside a double-backtick `` +/// `
                  ` `` code span. +/// +/// There should not be a line break immediately before or after a backtick in an example like +/// this`and`that. +class Test {} +"""; doFormatTest(input, expected); } @@ -2001,12 +2013,6 @@ class Test {} // We need to ensure that each > stays at the start of its line with appropriate indentation if // inside a list. https://spec.commonmark.org/0.31.2/#block-quotes // - // - Code spans - // `
                    ` should not trigger list handling. - // Text within `...` should still be subject to line wrapping, both splitting long lines and - // joining short lines. https://spec.commonmark.org/0.31.2/#code-spans - // - // // - Autolinks // should be preserved. https://spec.commonmark.org/0.31.2/#autolink }