Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanEnd;
import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanStart;
import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock;
import com.google.googlejavaformat.java.javadoc.Token.MarkdownHardLineBreak;
import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment;
import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment;
import com.google.googlejavaformat.java.javadoc.Token.OptionalLineBreak;
Expand Down Expand Up @@ -133,6 +134,7 @@ private static String render(List<Token> input, int blockIndent, boolean classic
case BrTag t -> output.writeBr(standardizeBrToken(t));
case Whitespace unused -> output.requestWhitespace();
case ForcedNewline unused -> output.writeLineBreakNoAutoIndent();
case MarkdownHardLineBreak unused -> output.writeMarkdownHardLineBreak();
case Literal t -> output.writeLiteral(t);
case MarkdownFencedCodeBlock t -> output.writeMarkdownFencedCodeBlock(t);
case ListItemCloseTag unused -> {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import com.google.googlejavaformat.java.javadoc.Token.Literal;
import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanEnd;
import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanStart;
import com.google.googlejavaformat.java.javadoc.Token.MarkdownHardLineBreak;
import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment;
import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment;
import com.google.googlejavaformat.java.javadoc.Token.OptionalLineBreak;
Expand Down Expand Up @@ -238,9 +239,22 @@ private Function<String, Token> consumeToken() throws LexException {
// remaining characters being matched *could* be those things, so the regex stops at
// whitespace or a backtick. The *first* character could be a backtick, in constructs like
// `` `foo` ``, where the backticks adjacent to "foo" are part of the text of the code span.
//
// Backslash has no special meaning inside `...` so this code precedes the backslash code.
verify(input.tryConsumeRegex(WORD_IN_CODE_SPAN_PATTERN));
return Literal::new;
}
if (!classicJavadoc) {
// Markdown backslash handling. \ at end of line, optionally followed by whitespace, is a hard
// line break. \ elsewhere cancels any special meaning of the following character.
if (input.tryConsumeRegex(MARKDOWN_HARD_LINE_BREAK_PATTERN)) {
somethingSinceNewline = false;
return MarkdownHardLineBreak::new;
} else if (input.tryConsumeRegex(BACKSLASH_PLUS_CHARACTER_PATTERN)) {
somethingSinceNewline = true;
return Literal::new;
}
}

/*
* TODO(cpovirk): Maybe try to detect things like "{@code\n@GwtCompatible}" that aren't intended
Expand Down Expand Up @@ -661,6 +675,8 @@ private static boolean hasMultipleNewlines(String s) {
private static final Pattern SNIPPET_TAG_OPEN_PATTERN = compile("[{]@snippet\\b");
private static final Pattern INLINE_TAG_OPEN_PATTERN = compile("[{]@\\w*");
private static final Pattern WORD_IN_CODE_SPAN_PATTERN = compile(".[^ \t\n`]*");
private static final Pattern MARKDOWN_HARD_LINE_BREAK_PATTERN = compile("\\\\[ \t]*\n");
private static final Pattern BACKSLASH_PLUS_CHARACTER_PATTERN = compile("\\\\.");

/*
* We exclude < so that we don't swallow following HTML tags. This lets us fix up "foo<p>" (~400
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@
* are we inside?"
*/
final class JavadocWriter {

private static final Literal BACKSLASH_LITERAL = new Literal("\\");

private final int blockIndent;
private final boolean classicJavadoc;
private final StringBuilder output = new StringBuilder();
Expand Down Expand Up @@ -322,6 +325,11 @@ void writeLineBreakNoAutoIndent() {
writeNewline(NO_AUTO_INDENT);
}

void writeMarkdownHardLineBreak() {
writeLiteral(BACKSLASH_LITERAL);
writeNewline();
}

void writeLiteral(Literal token) {
writeToken(token);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@ record Whitespace(String value) implements Token {}
*/
record ForcedNewline(String value) implements Token {}

/** A Markdown hard line break ({@code \} at the end of a line). */
record MarkdownHardLineBreak(String value) implements Token {}

/**
* Token that permits but does not force a line break. The way that we accomplish this is somewhat
* indirect: As far as {@link JavadocWriter} is concerned, this token is meaningless. But its mere
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1778,22 +1778,42 @@ class Test {}
@Test
public void markdownBackslashes() {
assume().that(MARKDOWN_JAVADOC_SUPPORTED).isTrue();
// We write `╲` (a box drawing character) instead of `\\` here and then substitute. That makes
// the test case a bit easier to read and also means that we can see where the line wrapping
// should happen. (Having to write \\ instead of \ would make the source text lines wider than
// the strings they represent.)
@SuppressWarnings("MisleadingEscapedSpace")
String input =
"""
/// \\<br> is not a break.
/// \\&#42; is not an HTML entity.
/// foo\\
/// bar
class Test {}
""";
// TODO: the <br> should not cause a line break, and the end-of-line backslash should.
// I don't think anything changes if we do or do not respect the \& backslash.
"""
/// ╲<br> is not a break.
/// ╲&#42; is not an HTML entity.
/// Backslash does not escape the end of a `code span╲` so <br> is a real break,
/// but backslash does escape the *start* of a ╲`code span so <br> is also a real break.
/// hard╲
/// line╲\t\s
/// breaks
/// - foo ╲
/// bar
/// ╲@param not a param tag
/// ╲╲@param not a param tag either
class Test {}
"""
.replace('╲', '\\');
// I don't think anything changes if we do or do not respect the \& backslash so nothing here
// proves whether we do.
String expected =
"""
/// \\<br>
/// is not a break. \\&#42; is not an HTML entity. foo\\ bar
class Test {}
""";
"""
/// ╲<br> is not a break. ╲&#42; is not an HTML entity. Backslash does not escape the end of a `code
/// span╲` so <br>
/// is a real break, but backslash does escape the *start* of a ╲`code span so <br>
/// is also a real break. hard╲
/// line╲
/// breaks
/// - foo ╲
/// bar ╲@param not a param tag ╲╲@param not a param tag either
class Test {}
"""
.replace('╲', '\\');
doFormatTest(input, expected);
}

Expand Down Expand Up @@ -1976,14 +1996,6 @@ class Test {}
// <pre> handling elsewhere. On the other hand, if we don't handle Markdown code spans (`...`)
// correctly then we might incorrectly recognize HTML tags like `<ul>` inside them.
//
// - Backslashes
// - \<br> is not a break.
// - \&#42; is not an HTML entity.
// - \⏎ is a hard line break. https://spec.commonmark.org/0.31.2/#hard-line-break
// A hard line break can also be written as two or more spaces followed by a newline. I think
// that is ridiculous and it is absolutely fine to destroy those spaces. However the line
// break will show up in the CommonMark parse.
//
// - Thematic breaks: ---, ***, ___, which are all rendered as <hr> and should presumably have a
// line break before and after. https://spec.commonmark.org/0.31.2/#thematic-breaks
//
Expand Down
Loading