diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java index 649669030..dbc2be841 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java +++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java @@ -43,6 +43,7 @@ import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanEnd; import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanStart; import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock; +import com.google.googlejavaformat.java.javadoc.Token.MarkdownHardLineBreak; import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment; import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment; import com.google.googlejavaformat.java.javadoc.Token.OptionalLineBreak; @@ -133,6 +134,7 @@ private static String render(List input, int blockIndent, boolean classic case BrTag t -> output.writeBr(standardizeBrToken(t)); case Whitespace unused -> output.requestWhitespace(); case ForcedNewline unused -> output.writeLineBreakNoAutoIndent(); + case MarkdownHardLineBreak unused -> output.writeMarkdownHardLineBreak(); case Literal t -> output.writeLiteral(t); case MarkdownFencedCodeBlock t -> output.writeMarkdownFencedCodeBlock(t); case ListItemCloseTag unused -> {} diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java index 634b55e06..34e754261 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java +++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java @@ -46,6 +46,7 @@ import com.google.googlejavaformat.java.javadoc.Token.Literal; import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanEnd; import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanStart; +import com.google.googlejavaformat.java.javadoc.Token.MarkdownHardLineBreak; import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment; import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment; import com.google.googlejavaformat.java.javadoc.Token.OptionalLineBreak; @@ -238,9 +239,22 @@ private Function consumeToken() throws LexException { // remaining characters being matched *could* be those things, so the regex stops at // whitespace or a backtick. The *first* character could be a backtick, in constructs like // `` `foo` ``, where the backticks adjacent to "foo" are part of the text of the code span. + // + // Backslash has no special meaning inside `...` so this code precedes the backslash code. verify(input.tryConsumeRegex(WORD_IN_CODE_SPAN_PATTERN)); return Literal::new; } + if (!classicJavadoc) { + // Markdown backslash handling. \ at end of line, optionally followed by whitespace, is a hard + // line break. \ elsewhere cancels any special meaning of the following character. + if (input.tryConsumeRegex(MARKDOWN_HARD_LINE_BREAK_PATTERN)) { + somethingSinceNewline = false; + return MarkdownHardLineBreak::new; + } else if (input.tryConsumeRegex(BACKSLASH_PLUS_CHARACTER_PATTERN)) { + somethingSinceNewline = true; + return Literal::new; + } + } /* * TODO(cpovirk): Maybe try to detect things like "{@code\n@GwtCompatible}" that aren't intended @@ -661,6 +675,8 @@ private static boolean hasMultipleNewlines(String s) { private static final Pattern SNIPPET_TAG_OPEN_PATTERN = compile("[{]@snippet\\b"); private static final Pattern INLINE_TAG_OPEN_PATTERN = compile("[{]@\\w*"); private static final Pattern WORD_IN_CODE_SPAN_PATTERN = compile(".[^ \t\n`]*"); + private static final Pattern MARKDOWN_HARD_LINE_BREAK_PATTERN = compile("\\\\[ \t]*\n"); + private static final Pattern BACKSLASH_PLUS_CHARACTER_PATTERN = compile("\\\\."); /* * We exclude < so that we don't swallow following HTML tags. This lets us fix up "foo

" (~400 diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocWriter.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocWriter.java index 078fd44b6..492bb0150 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocWriter.java +++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocWriter.java @@ -53,6 +53,9 @@ * are we inside?" */ final class JavadocWriter { + + private static final Literal BACKSLASH_LITERAL = new Literal("\\"); + private final int blockIndent; private final boolean classicJavadoc; private final StringBuilder output = new StringBuilder(); @@ -322,6 +325,11 @@ void writeLineBreakNoAutoIndent() { writeNewline(NO_AUTO_INDENT); } + void writeMarkdownHardLineBreak() { + writeLiteral(BACKSLASH_LITERAL); + writeNewline(); + } + void writeLiteral(Literal token) { writeToken(token); } diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java index ffdf7916a..134fbc8f6 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java +++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java @@ -152,6 +152,9 @@ record Whitespace(String value) implements Token {} */ record ForcedNewline(String value) implements Token {} + /** A Markdown hard line break ({@code \} at the end of a line). */ + record MarkdownHardLineBreak(String value) implements Token {} + /** * Token that permits but does not force a line break. The way that we accomplish this is somewhat * indirect: As far as {@link JavadocWriter} is concerned, this token is meaningless. But its mere diff --git a/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java b/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java index 1eff32a85..4b3fd3da6 100644 --- a/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java +++ b/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java @@ -1778,22 +1778,42 @@ class Test {} @Test public void markdownBackslashes() { assume().that(MARKDOWN_JAVADOC_SUPPORTED).isTrue(); + // We write `╲` (a box drawing character) instead of `\\` here and then substitute. That makes + // the test case a bit easier to read and also means that we can see where the line wrapping + // should happen. (Having to write \\ instead of \ would make the source text lines wider than + // the strings they represent.) + @SuppressWarnings("MisleadingEscapedSpace") String input = - """ - /// \\
is not a break. - /// \\* is not an HTML entity. - /// foo\\ - /// bar - class Test {} - """; - // TODO: the
should not cause a line break, and the end-of-line backslash should. - // I don't think anything changes if we do or do not respect the \& backslash. +""" +/// ╲
is not a break. +/// ╲* is not an HTML entity. +/// Backslash does not escape the end of a `code span╲` so
is a real break, +/// but backslash does escape the *start* of a ╲`code span so
is also a real break. +/// hard╲ +/// line╲\t\s +/// breaks +/// - foo ╲ +/// bar +/// ╲@param not a param tag +/// ╲╲@param not a param tag either +class Test {} +""" + .replace('╲', '\\'); + // I don't think anything changes if we do or do not respect the \& backslash so nothing here + // proves whether we do. String expected = - """ - /// \\
- /// is not a break. \\* is not an HTML entity. foo\\ bar - class Test {} - """; +""" +/// ╲
is not a break. ╲* is not an HTML entity. Backslash does not escape the end of a `code +/// span╲` so
+/// is a real break, but backslash does escape the *start* of a ╲`code span so
+/// is also a real break. hard╲ +/// line╲ +/// breaks +/// - foo ╲ +/// bar ╲@param not a param tag ╲╲@param not a param tag either +class Test {} +""" + .replace('╲', '\\'); doFormatTest(input, expected); } @@ -1976,14 +1996,6 @@ class Test {} //

 handling elsewhere. On the other hand, if we don't handle Markdown code spans (`...`)
   // correctly then we might incorrectly recognize HTML tags like `
    ` inside them. // - // - Backslashes - // - \
    is not a break. - // - \* is not an HTML entity. - // - \⏎ is a hard line break. https://spec.commonmark.org/0.31.2/#hard-line-break - // A hard line break can also be written as two or more spaces followed by a newline. I think - // that is ridiculous and it is absolutely fine to destroy those spaces. However the line - // break will show up in the CommonMark parse. - // // - Thematic breaks: ---, ***, ___, which are all rendered as
    and should presumably have a // line break before and after. https://spec.commonmark.org/0.31.2/#thematic-breaks //