Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
import com.google.googlejavaformat.java.javadoc.Token.ListItemOpenTag;
import com.google.googlejavaformat.java.javadoc.Token.ListOpenTag;
import com.google.googlejavaformat.java.javadoc.Token.Literal;
import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanEnd;
import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanStart;
import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock;
import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment;
import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment;
Expand Down Expand Up @@ -132,10 +134,12 @@ private static String render(List<Token> input, int blockIndent, boolean classic
case Whitespace unused -> output.requestWhitespace();
case ForcedNewline unused -> output.writeLineBreakNoAutoIndent();
case Literal t -> output.writeLiteral(t);
case ParagraphCloseTag unused -> {}
case MarkdownFencedCodeBlock t -> output.writeMarkdownFencedCodeBlock(t);
case ListItemCloseTag unused -> {}
case OptionalLineBreak unused -> {}
case MarkdownFencedCodeBlock t -> output.writeMarkdownFencedCodeBlock(t);
case ParagraphCloseTag unused -> {}
case MarkdownCodeSpanStart unused -> {}
case MarkdownCodeSpanEnd unused -> {}
}
}
throw new AssertionError();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
import com.google.googlejavaformat.java.javadoc.Token.ListItemOpenTag;
import com.google.googlejavaformat.java.javadoc.Token.ListOpenTag;
import com.google.googlejavaformat.java.javadoc.Token.Literal;
import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanEnd;
import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanStart;
import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment;
import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment;
import com.google.googlejavaformat.java.javadoc.Token.OptionalLineBreak;
Expand All @@ -67,6 +69,7 @@
final class JavadocLexer {
/** Takes a Javadoc comment, including ∕✱✱ and ✱∕, and returns tokens, including ∕✱✱ and ✱∕. */
static ImmutableList<Token> lex(String input, boolean classicJavadoc) throws LexException {
input = normalizeLineEndings(input);
MarkdownPositions markdownPositions;
if (classicJavadoc) {
/*
Expand All @@ -81,7 +84,6 @@ static ImmutableList<Token> lex(String input, boolean classicJavadoc) throws Lex
input = input.substring("///".length());
markdownPositions = MarkdownPositions.parse(input);
}
input = normalizeLineEndings(input);
return new JavadocLexer(new CharStream(input), markdownPositions, classicJavadoc)
.generateTokens();
}
Expand Down Expand Up @@ -116,6 +118,9 @@ enum NestingContext {
/** {@code <code>...</code>}. */
HTML_CODE_CONTEXT,

/** Markdown {@code `...`}. */
MARKDOWN_CODE_CONTEXT,

/** {@code <table>...</table>}. */
TABLE,

Expand Down Expand Up @@ -161,6 +166,21 @@ private ImmutableList<Token> generateTokens() throws LexException {
// assumed that there are no other tokens (markdown or otherwise) in a non-empty text span
// covered by a markdown token.
for (Token markdownToken : markdownPositions.tokensAt(input.position())) {
// For `...`, we switch to MARKDOWN_CODE_CONTEXT for the duration of the span, and we
// change the start or end token to a Literal so it will get joined to adjacent Literal
// tokens. That prevents line breaks adjacent to the backticks in "foo`bar`baz", but still
// allows them at the spaces in "foo `bar` baz" or "foo` bar `baz".
switch (markdownToken) {
case MarkdownCodeSpanStart unused -> {
contextStack.push(NestingContext.MARKDOWN_CODE_CONTEXT);
markdownToken = new Literal(markdownToken.value());
}
case MarkdownCodeSpanEnd unused -> {
contextStack.popUntil(NestingContext.MARKDOWN_CODE_CONTEXT);
markdownToken = new Literal(markdownToken.value());
}
default -> {}
}
tokens.add(markdownToken);
if (!markdownToken.value().isEmpty()) {
boolean consumed = input.tryConsume(markdownToken.value());
Expand Down Expand Up @@ -211,6 +231,17 @@ private Function<String, Token> consumeToken() throws LexException {
return preserveExistingFormatting ? Literal::new : Whitespace::new;
}

if (contextStack.contains(NestingContext.MARKDOWN_CODE_CONTEXT)) {
// Consume one or more characters. We know the first character isn't a newline or space
// because we've eliminated those possibilities, and it can't be the end of the `...` span
// either because that would have caused us to pop MARKDOWN_CODE_CONTEXT from the stack. The
// remaining characters being matched *could* be those things, so the regex stops at
// whitespace or a backtick. The *first* character could be a backtick, in constructs like
// `` `foo` ``, where the backticks adjacent to "foo" are part of the text of the code span.
verify(input.tryConsumeRegex(WORD_IN_CODE_SPAN_PATTERN));
return Literal::new;
}

/*
* TODO(cpovirk): Maybe try to detect things like "{@code\n@GwtCompatible}" that aren't intended
* as tags. But in the most likely case, in which that happens inside <pre>{@code, we have no
Expand Down Expand Up @@ -369,7 +400,7 @@ private static ImmutableList<Token> joinAdjacentLiteralsAndAdjacentWhitespace(Li
* it into a tag.
*/

if (accumulated.length() == 0) {
if (accumulated.isEmpty()) {
output.add(tokens.next());
continue;
}
Expand All @@ -389,7 +420,7 @@ private static ImmutableList<Token> joinAdjacentLiteralsAndAdjacentWhitespace(Li
output.add(new Literal(accumulated.toString()));
accumulated.setLength(0);

if (seenWhitespace.length() > 0) {
if (!seenWhitespace.isEmpty()) {
output.add(new Whitespace(seenWhitespace.toString()));
}

Expand Down Expand Up @@ -629,6 +660,7 @@ private static boolean hasMultipleNewlines(String s) {
private static final Pattern BR_PATTERN = openTagPattern("br");
private static final Pattern SNIPPET_TAG_OPEN_PATTERN = compile("[{]@snippet\\b");
private static final Pattern INLINE_TAG_OPEN_PATTERN = compile("[{]@\\w*");
private static final Pattern WORD_IN_CODE_SPAN_PATTERN = compile(".[^ \t\n`]*");

/*
* We exclude < so that we don't swallow following HTML tags. This lets us fix up "foo<p>" (~400
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,15 @@
import com.google.googlejavaformat.java.javadoc.Token.ListItemCloseTag;
import com.google.googlejavaformat.java.javadoc.Token.ListItemOpenTag;
import com.google.googlejavaformat.java.javadoc.Token.ListOpenTag;
import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanEnd;
import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanStart;
import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock;
import com.google.googlejavaformat.java.javadoc.Token.ParagraphCloseTag;
import com.google.googlejavaformat.java.javadoc.Token.ParagraphOpenTag;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.commonmark.node.BulletList;
import org.commonmark.node.Code;
import org.commonmark.node.FencedCodeBlock;
import org.commonmark.node.Heading;
import org.commonmark.node.ListItem;
Expand Down Expand Up @@ -89,6 +92,7 @@ void visit(Node node) {
case OrderedList orderedList -> addSpan(orderedList, LIST_OPEN_TOKEN, LIST_CLOSE_TOKEN);
case ListItem listItem -> alreadyVisitedChildren = visitListItem(listItem);
case FencedCodeBlock fencedCodeBlock -> visitFencedCodeBlock(fencedCodeBlock);
case Code code -> visitCodeSpan(code);
// TODO: others
default -> {}
}
Expand Down Expand Up @@ -134,6 +138,22 @@ private void visitFencedCodeBlock(FencedCodeBlock fencedCodeBlock) {
positionToToken.get(start).addLast(token);
}

private void visitCodeSpan(Code code) {
int start = startPosition(code);
int end = endPosition(code);
int count;
for (count = 0; input.charAt(start + count) == '`'; count++) {
verify(
input.charAt(end - 1 - count) == '`',
"Mismatched backticks: %s",
input.substring(start, end));
}
verify(count > 0, "Code span does not start with backticks: %s", input.substring(start, end));
String backticks = "`".repeat(count);
positionToToken.get(start).addLast(new MarkdownCodeSpanStart(backticks));
positionToToken.get(end - count).addFirst(new MarkdownCodeSpanEnd(backticks));
}

/**
* Visits the given node and the other nodes that are reachable from it via the {@link
* Node#getNext()} references. Does nothing if {@code node} is null.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,21 @@ record HtmlComment(String value) implements Token {}

record BrTag(String value) implements Token {}

/**
* A Markdown code span start, like the start of {@code `foo`} or {@code ``bar``}.
*
* @param value the start backtick string, one or more backtick characters.
*/
record MarkdownCodeSpanStart(String value) implements Token {}

/**
* A Markdown code span end, like the end of {@code `foo`} or {@code ``bar``}.
*
* @param value the end backtick string, one or more backtick characters (the same number as the
* corresponding start backtick string).
*/
record MarkdownCodeSpanEnd(String value) implements Token {}

/**
* A fenced code block, like:
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1927,18 +1927,30 @@ class Test {}
public void markdownCodeSpans() {
assume().that(MARKDOWN_JAVADOC_SUPPORTED).isTrue();
String input =
"""
/// `<ul>` should not trigger list handling.
class Test {}
""";
// TODO: the <ul> should not be recognized as a list, so `<ul>` should be preserved.
// TODO: test that text with `...` is subject to line wrapping, including joining short lines.
"""
/// `<ul>` should not trigger list handling.
///
/// `This very long code line should eventually trigger line wrapping because newlines are allowed in code spans.`
///
/// This other long line is carefully crafted to provoke a line break inside a double-backtick `` `<ul>` `` code span.
///
/// There should not be a line break immediately before or after a backtick in an example like this`and`that.
class Test {}
""";
String expected =
"""
/// `<ul>
/// ` should not trigger list handling.
class Test {}
""";
"""
/// `<ul>` should not trigger list handling.
///
/// `This very long code line should eventually trigger line wrapping because newlines are allowed
/// in code spans.`
///
/// This other long line is carefully crafted to provoke a line break inside a double-backtick ``
/// `<ul>` `` code span.
///
/// There should not be a line break immediately before or after a backtick in an example like
/// this`and`that.
class Test {}
""";
doFormatTest(input, expected);
}

Expand Down Expand Up @@ -2001,12 +2013,6 @@ class Test {}
// We need to ensure that each > stays at the start of its line with appropriate indentation if
// inside a list. https://spec.commonmark.org/0.31.2/#block-quotes
//
// - Code spans
// `<ul>` should not trigger list handling.
// Text within `...` should still be subject to line wrapping, both splitting long lines and
// joining short lines. https://spec.commonmark.org/0.31.2/#code-spans
//
//
// - Autolinks
// <http://example.com> should be preserved. https://spec.commonmark.org/0.31.2/#autolink
}
Loading