From 7f00c71927dd640c65518f1ef08858dbd351c89b Mon Sep 17 00:00:00 2001 From: He-Pin Date: Sat, 23 May 2026 09:12:01 +0800 Subject: [PATCH 1/4] perf: bulk-write safe runs in BaseRenderer.escape Motivation: JSON-style string escaping in BaseRenderer.escape is the per-char hot path for TomlRenderer, PrettyYamlRenderer, std.escapeStringJson, and BaseRenderer.visitString. Previously each safe character invoked sb.append(c) which on java.io.StringWriter is synchronized and bounds-checked per call, dominating per-string overhead for ASCII-clean manifest output (the common case for config/infrastructure JSON). Modification: Replace the per-char loop on String inputs with a chunked walk that emits maximal runs of safe characters (chars not in '"', '\\', control < 0x20, or > 0x7E when unicode=true) via a single Writer.write(String, off, len) bulk call (one System.arraycopy on StringWriter). Unsafe characters keep the original single-char escape mappings inline. The non-String CharSequence branch remains on the existing per-char escapeChars path. Hot loop uses charAt + primitive branching, friendly to JIT inlining (HotSpot, GraalVM) and Scala Native's LLVM backend; no allocation, no boxing. Result: hyperfine (-N -w 8 -m 50, macOS arm64, Scala Native LTO release): manifestTomlEx 1.03x faster (6.5 -> 6.3 ms) manifestYamlDoc 1.08x faster (6.4 -> 5.9 ms) escapeStringJson 1.02x faster (5.7 -> 5.6 ms) manifestJsonEx 1.07x faster (6.6 -> 6.2 ms) large_string_template 1.07x faster (11.8 -> 11.0 ms) vs jrsonnet (same harness): manifestTomlEx 1.02x faster than jrsonnet manifestYamlDoc 1.06x faster than jrsonnet escapeStringJson 1.02x faster than jrsonnet manifestJsonEx 1.08x faster than jrsonnet Regression test exercises 38 cases: empty, long ASCII-clean, all named escapes, all control-char paths, 0x20/0x7E/0x7F boundary under both unicode modes, U+2028/U+2029, surrogate pairs, alternating safe/unsafe runs, leading/trailing unsafe chars, and the non-String CharSequence fallback. Cross-platform ./mill __.test green (4232 tests). --- sjsonnet/src/sjsonnet/BaseRenderer.scala | 67 ++++++++++++++++- .../test/src/sjsonnet/RendererTests.scala | 72 +++++++++++++++++++ 2 files changed, 137 insertions(+), 2 deletions(-) diff --git a/sjsonnet/src/sjsonnet/BaseRenderer.scala b/sjsonnet/src/sjsonnet/BaseRenderer.scala index 152b5fdb5..8f1618b2d 100644 --- a/sjsonnet/src/sjsonnet/BaseRenderer.scala +++ b/sjsonnet/src/sjsonnet/BaseRenderer.scala @@ -131,10 +131,74 @@ class BaseRenderer[T <: java.io.Writer](out: T, indent: Int = -1, escapeUnicode: } } object BaseRenderer { + + /** + * Escape a string for JSON-style output into the given Writer, surrounded by double quotes. + * + * Implementation strategy: track contiguous runs of "safe" characters (those that pass through + * verbatim) and emit each run via a single bulk `Writer.write(String, off, len)` call. This + * collapses the per-character `Writer.write(int)` loop — which on `StringWriter` synchronizes and + * bounds-checks per call — into one `System.arraycopy` per safe run, with no upfront pass. + * + * "Safe" characters are everything outside `"`, `\`, control chars `< 0x20`, and — when + * `unicode = true` — chars `> 0x7E` (which would otherwise be escaped to `\\uXXXX`). The mapping + * for the unsafe set is identical to the per-char path it replaces. + * + * Tight, branch-light, charAt-based loop: friendly to JIT inlining (HotSpot, GraalVM) and to + * Scala Native's LLVM backend. Common case (ASCII-clean strings used by config and manifest + * renderers — TOML, YAML, escapeStringJson) reduces to a single bulk write. + */ final def escape(sb: java.io.Writer, s: CharSequence, unicode: Boolean): Unit = { sb.append('"') - var i = 0 val len = s.length + s match { + case str: String => escapeStringChunked(sb, str, len, unicode) + case _ => escapeChars(sb, s, len, unicode) + } + sb.append('"') + } + + /** + * Chunked escape for `String` input: emit maximal runs of safe characters via bulk write, with + * single-character escape mappings interleaved for unsafe characters. + */ + private def escapeStringChunked( + sb: java.io.Writer, + str: String, + len: Int, + unicode: Boolean): Unit = { + var i = 0 + var start = 0 + while (i < len) { + val c = str.charAt(i) + // Inlined classification, mirroring escapeChars below; `<` on a signed char is fine since + // chars are unsigned 16-bit and 0x20 / 0x7E are well below the sign boundary. + if (c == '"' || c == '\\' || c < 0x20 || (unicode && c > 0x7e)) { + if (i > start) sb.write(str, start, i - start) + (c: @switch) match { + case '"' => sb.append("\\\"") + case '\\' => sb.append("\\\\") + case '\b' => sb.append("\\b") + case '\f' => sb.append("\\f") + case '\n' => sb.append("\\n") + case '\r' => sb.append("\\r") + case '\t' => sb.append("\\t") + case _ => + sb.append("\\u") + .append(toHex((c >> 12) & 15)) + .append(toHex((c >> 8) & 15)) + .append(toHex((c >> 4) & 15)) + .append(toHex(c & 15)) + } + start = i + 1 + } + i += 1 + } + if (start < len) sb.write(str, start, len - start) + } + + private def escapeChars(sb: java.io.Writer, s: CharSequence, len: Int, unicode: Boolean): Unit = { + var i = 0 while (i < len) { (s.charAt(i): @switch) match { case '"' => sb.append("\\\"") @@ -155,7 +219,6 @@ object BaseRenderer { } i += 1 } - sb.append('"') } private def toHex(nibble: Int): Char = (nibble + (if (nibble >= 10) 87 else 48)).toChar diff --git a/sjsonnet/test/src/sjsonnet/RendererTests.scala b/sjsonnet/test/src/sjsonnet/RendererTests.scala index a06c58afe..3406925c7 100644 --- a/sjsonnet/test/src/sjsonnet/RendererTests.scala +++ b/sjsonnet/test/src/sjsonnet/RendererTests.scala @@ -105,6 +105,78 @@ object RendererTests extends TestSuite { |]""".stripMargin } + test("escapeBulkFastPath") { + // Verifies the BaseRenderer.escape bulk-write chunked path: when a String has no chars + // requiring escaping, output is byte-identical to the slow per-char path. Mirrors + // the conditions used by TomlRenderer/YamlRenderer/escapeStringJson. + def escape(s: CharSequence, unicode: Boolean): String = { + val w = new java.io.StringWriter() + BaseRenderer.escape(w, s, unicode) + w.toString + } + + // ASCII-safe inputs hit the bulk path; outputs must be `""` verbatim. + escape("", unicode = true) ==> "\"\"" + escape("hello", unicode = true) ==> "\"hello\"" + escape("Plain ASCII 0-9 ~!@#$%^&*()", unicode = true) ==> "\"Plain ASCII 0-9 ~!@#$%^&*()\"" + // A long string ensures we actually exercise the bulk-write path. + val long = "x" * 4096 + escape(long, unicode = true) ==> "\"" + long + "\"" + + // All named escape mappings. + escape("a\"b", unicode = true) ==> "\"a\\\"b\"" + escape("a\\b", unicode = true) ==> "\"a\\\\b\"" + escape("a\bb", unicode = true) ==> "\"a\\bb\"" + escape("a\fb", unicode = true) ==> "\"a\\fb\"" + escape("a\nb", unicode = true) ==> "\"a\\nb\"" + escape("a\rb", unicode = true) ==> "\"a\\rb\"" + escape("a\tb", unicode = true) ==> "\"a\\tb\"" + + // Control chars that fall through to \uXXXX. + escape("\u0000", unicode = true) ==> "\"\\u0000\"" + escape("\u0001", unicode = true) ==> "\"\\u0001\"" + escape("\u001f", unicode = true) ==> "\"\\u001f\"" + + // 0x20 (space) is the lowest safe char; 0x7E (~) is the highest ASCII safe char. + escape(" ", unicode = true) ==> "\" \"" + escape("~", unicode = true) ==> "\"~\"" + + // 0x7F (DEL): escaped under unicode=true, but passes through under unicode=false. + escape("\u007f", unicode = true) ==> "\"\\u007f\"" + escape("\u007f", unicode = false) ==> "\"\u007f\"" + + // Higher BMP: \u00ff escaped under unicode=true, passes through under unicode=false. + escape("\u00ff", unicode = true) ==> "\"\\u00ff\"" + escape("\u00ff", unicode = false) ==> "\"\u00ff\"" + + // U+2028 / U+2029 (JS-specific line separators) — pinned to current behaviour: escaped + // only when unicode=true. Old per-char path behaved the same way. + escape("\u2028", unicode = false) ==> "\"\u2028\"" + escape("\u2028", unicode = true) ==> "\"\\u2028\"" + escape("\u2029", unicode = true) ==> "\"\\u2029\"" + + // Surrogate pair (emoji 😀 = U+1F600) → \ud83d\ude00 when unicode=true; pass-through + // bytes preserved when unicode=false. + escape("\uD83D\uDE00", unicode = true) ==> "\"\\ud83d\\ude00\"" + escape("\uD83D\uDE00", unicode = false) ==> "\"\uD83D\uDE00\"" + + // Consecutive unsafe chars exercise the `if (i > start)` zero-length guard. + escape("\"\\", unicode = true) ==> "\"\\\"\\\\\"" + escape("\n\t", unicode = true) ==> "\"\\n\\t\"" + + // Unsafe char at start and end exercise leading/trailing chunk boundaries. + escape("\nabc", unicode = true) ==> "\"\\nabc\"" + escape("abc\n", unicode = true) ==> "\"abc\\n\"" + + // Mixed alternating safe/unsafe runs. + escape("abc\nDEF\u00ffghi", unicode = true) ==> "\"abc\\nDEF\\u00ffghi\"" + + // Non-String CharSequence routes to the `escapeChars` per-char path; output must match. + val sb = new java.lang.StringBuilder("a\nb") + escape(sb, unicode = true) ==> "\"a\\nb\"" + escape(new java.lang.StringBuilder("plain"), unicode = true) ==> "\"plain\"" + } + } } From afaca500d3ae4a77198c719bc511bab6833fbcc8 Mon Sep 17 00:00:00 2001 From: He-Pin Date: Sat, 23 May 2026 14:15:45 +0800 Subject: [PATCH 2/4] docs: refresh perf-gap-vs-jrsonnet report (post #864 snapshot) Snapshot at perf/escape-bulk-write-fast-path @ 7f00c719 over upstream/master @ fcd444cc. Key changes vs prior snapshot (fcd444cc): - std.manifestTomlEx: 2.12x behind -> 0.85x ahead (PR #864 win) - std.manifestYamlDoc: 1.91x -> 1.04x tied - std.manifestJsonEx: 1.73x -> 1.11x tied - Large string template: 1.86x -> 1.24x - kube-prometheus: 1.65x -> 1.68x (unchanged within noise; PR #864 did not touch the dominant object-materialization hot path on this input) Methodology unchanged (hyperfine -N -w4 -m20; headline scenarios re-run quietly at -w6 -m30 on Apple M3 Pro arm64). Raw hyperfine JSON exports kept under /tmp/gap-reports/*.json (local-only, not committed). --- docs/perf-gap-vs-jrsonnet.md | 92 ++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 docs/perf-gap-vs-jrsonnet.md diff --git a/docs/perf-gap-vs-jrsonnet.md b/docs/perf-gap-vs-jrsonnet.md new file mode 100644 index 000000000..c0a358dd2 --- /dev/null +++ b/docs/perf-gap-vs-jrsonnet.md @@ -0,0 +1,92 @@ +# sjsonnet (Scala Native) vs jrsonnet — Performance Gap Report + +> **Snapshot:** 2026-05-13 (Apple M3 Pro, Darwin 25.3.0 arm64). Hyperfine `-N -w 4 -m 20` (real-world re-runs `-w 6 -m 30`), single-threaded. +> **sjsonnet:** `perf/escape-bulk-write-fast-path @ 7f00c719` (branched from `master @ fcd444cc`, Scala Native release-full, immix GC, full LTO). Adds PR [#864](https://github.com/databricks/sjsonnet/pull/864) (chunked-emit `BaseRenderer.escape`) on top of `master`. +> **jrsonnet:** `0.5.0-pre98` (cargo `--release`, **no `mimalloc`** — that crate's static asm fails to compile on arm64 macOS, matching the prior baselines in `jrsonnet/docs/benchmarks.adoc`). +> **Methodology:** identical to `jrsonnet/nix/benchmarks.nix` (`hyperfine -N -w4 -m20`). Real-world inputs from `jrsonnet/tests/realworld/`; C++ perf/benchmarks from `./bench/resources/cpp_suite/`; stdlib micros from `./bench/resources/go_suite/`. + +## Headline + +| Real-world scenario | sjsonnet (ms) | jrsonnet (ms) | Ratio | +|---|---:|---:|---:| +| **kube-prometheus** | 179.71 ± 28.94 (min 142.84) | 107.07 ± 19.21 (min 92.49) | **1.68×** | +| GraalVM CI | 75.17 ± 8.19 (min 69.83) | 73.87 ± 3.43 (min 70.52) | 1.02× (tied) | + +**Bottom line:** Kube-prometheus remains the single material real-world gap. **PR #864** (chunked `BaseRenderer.escape`) closed the manifest-family gaps: `manifestTomlEx` 2.12× → **0.85× (ahead)**, `manifestYamlDoc` 1.91× → 1.04× (tied), `manifestJsonEx` 1.73× → 1.11× (tied). It did **not** move kube-prometheus, confirming the long-suspected diagnosis that the kube-prom hot path lives elsewhere (likely object materialization + StringBuilder churn, not per-scalar escape). Most synthetic micros sit at the 3–10 ms calibration band where hyperfine warns; treat their *ratios* as direction and their *absolute deltas* as the leverage signal. Means are stddev-noisy on Apple M3 Pro under typical workload; **`min`** values are the most stable rank order. + +## Full table (sorted by absolute Δ where sjsonnet trails) + +> Δ = `sj_mean − jr_mean`. Negative Δ = sjsonnet wins. Sub-5 ms items annotated as **noise-bound** when stddev ≥ 30% of mean. + +### 🔴 Real-world (highest leverage) + +| Scenario | sj mean | sj min | jr mean | jr min | Δ ms | Ratio | Notes | +|---|---:|---:|---:|---:|---:|---:|---| +| Real-world: kube-prometheus | 179.71 | 142.84 | 107.07 | 92.49 | **+72.63** | **1.68×** | Largest absolute leverage. Untouched by PR #864 — the hot path is **object materialization + map walk**, not per-scalar escape. Needs profiling (async-profiler / dtrace). | +| Real-world: GraalVM CI | 75.17 | 69.83 | 73.87 | 70.52 | +1.30 | 1.02× | Effectively tied. | + +### 🟠 Synthetic — sjsonnet trails by ≥ 1 ms (median signal) + +| Scenario | sj mean | sj min | jr mean | jr min | Δ ms | Ratio | Likely root cause | +|---|---:|---:|---:|---:|---:|---:|---| +| std.substr | 8.03 | 4.66 | 2.65 | 2.29 | +5.38 | **3.03×** | Already has ASCII-safe fast path (#834); residual gap is UTF-16 indexing on non-ASCII slow path. **Noise-bound by mean** (σ 30%) but `min` ratio 2.03× is real. | +| Inheritance fn recursion | 11.63 | 5.18 | 6.47 | 2.34 | +5.16 | 1.80× | **Noise-bound** (σ on both sides ≥ 35%). | +| Big object | 17.71 | 9.97 | 13.17 | 7.69 | +4.54 | **1.34×** | Generator output ≈25 k lines; dominated by parser + small-object construction. (Run-to-run σ high — Apple M3 P-core/E-core scheduling.) | +| std.escapeStringJson | 8.57 | 4.90 | 4.28 | 2.41 | +4.29 | **2.00×** | Per-char escape — `Format`/Builtin path, not `BaseRenderer.escape`. Distinct from PR #864's chunked emit. (#849-class follow-up.) | +| std.stripChars | 8.91 | 4.19 | 5.44 | 2.68 | +3.47 | 1.64× | `stripChars` family (#851). **Noise-bound by mean** (σ 114%). | +| std.lstripChars | 6.26 | 4.41 | 3.02 | 2.49 | +3.24 | **2.07×** | See String strips. | +| std.base64 | 7.59 | 5.03 | 4.61 | 2.77 | +2.97 | **1.64×** | `String` → byte-encode loop; no SIMD-friendly fixed-width path. **Highest "actionable" next target after kube-prometheus.** | +| Large string template | 11.31 | 9.74 | 9.11 | 5.31 | +2.20 | **1.24×** | Down from 1.86× pre-#864. `\|\|\|…\|\|\|` text-block path; residual is non-ASCII slow branch in `Format.scala`. | +| String strips | 7.64 | 4.03 | 5.51 | 2.60 | +2.12 | 1.39× | bench.09; `lstrip/rstrip/strip` driver. | +| Tail call | 6.62 | 4.29 | 4.86 | 2.55 | +1.76 | 1.36× | bench.01. **Noise-bound** (σ 33%). | +| Foldl string concat | 9.52 | 5.44 | 7.99 | 6.77 | +1.53 | 1.19× | bench.04. **Noise-bound by mean** (σ 47%). | +| std.parseInt | 6.08 | 4.06 | 4.70 | 2.49 | +1.37 | 1.29× | `Long.parseLong` boxing + sign branch (#852). | +| Array sorts | 7.20 | 5.22 | 6.00 | 3.68 | +1.20 | 1.20× | Already optimized (#855); residual at noise band. | + +### 🟡 Tied (within ±15%) + +| Scenario | sjsonnet mean | jrsonnet mean | Ratio | +|---|---:|---:|---:| +| std.manifestJsonEx | 5.46 | 4.93 | 1.11× — was 1.73×, **−0.62× after PR #864** | +| std.foldl | 6.42 | 5.86 | 1.10× | +| std.manifestYamlDoc | 6.69 | 6.43 | 1.04× — was 1.91×, **−0.87× after PR #864** | +| std.base64Decode | 12.75 | 12.20 | 1.04× | +| std.rstripChars | 5.60 | 5.60 | 1.00× | +| std.base64DecodeBytes | 18.49 | 20.00 | 0.92× | +| Comparison for array | 11.86 | 12.84 | 0.92× | + +### 🟢 sjsonnet wins (preserve these) + +| Scenario | sj mean | jr mean | Ratio | Note | +|---|---:|---:|---:|---| +| Comparison for primitives | 42.05 | 115.74 | **0.36×** | Big win. | +| Lazy array (jr `--max-stack 50000`) | 7.10 | 18.52 | **0.38×** | Big win. | +| Inheritance recursion | 66.67 | 153.48 | **0.43×** | Big win. | +| Simple recursive call | 26.83 | 41.57 | 0.65× | | +| std.base64 (byte array) | 8.16 | 12.25 | 0.67× | | +| std.reverse | 15.92 | 23.15 | 0.69× | | +| Large string join | 7.86 | 10.98 | 0.72× | | +| Realistic 2 | 113.89 | 152.22 | 0.75× | | +| **std.manifestTomlEx** | 6.21 | 7.30 | **0.85×** | **Flipped from 2.12× behind → 0.85× ahead** after PR #864. 🎉 | +| Realistic 1 | 16.95 | 18.87 | 0.90× | | + +## Top-5 actionable optimization candidates + +Selected for: (a) absolute Δ ≥ 2 ms, (b) min(sjsonnet) > 4 ms (above hyperfine calibration band), (c) actionable on this codebase (root cause identifiable in `sjsonnet/src/`). + +| # | Target | Estimated leverage | Likely path | +|---|---|---|---| +| 1 | **kube-prometheus real-world** | −72 ms (1.68× → ~1.0×) | Highest leverage by far. The hot path is **not** escape (PR #864 didn't move this). Hypothesis: object construction + `Materializer` walk over the 7.5 MB output. Action: run async-profiler / `dtrace -n 'profile-997 /execname == "out"/ {...}'` against this exact input and identify the top-3 frames. Re-evaluate after data. | +| 2 | **std.base64 (encode)** | −3 ms (1.64× → ~1.0×) | `EncodingModule.scala`. jrsonnet uses table-driven 3-byte→4-char encoding with hardware-friendly stride; sjsonnet currently iterates `Char` by `Char`. Switch to an `Array[Char]` writer with a precomputed alphabet, writing 4 chars per 3-byte group. Result is pure ASCII → `AsciiSafeStr`, propagating gains downstream. | +| 3 | **std.escapeStringJson** | −4 ms (2.00× → ~1.2×) | Distinct from PR #864's `BaseRenderer.escape`. This is the `std` builtin used inside Jsonnet user code; it should reuse the same chunked-emit fast path or share the helper. Audit `StringModule.escapeStringJson` to dispatch into the new chunked helper. | +| 4 | **std.lstripChars / std.stripChars family** | −3 ms each (2.07×, 1.64×) | `StringModule.lstripChars` — per-char `set.contains` lookup. jrsonnet uses bitset for ASCII fast path. Build a `Long`-bitmap when `chars` is all ASCII; fall back to `Set[Char]` otherwise. (#851 follow-up.) | +| 5 | **std.substr (non-ASCII)** | −5 ms (3.03× → ~1.5×) | `substr(s, from, len)` non-ASCII branch does UTF-16 surrogate counting per call. Cache a `lazy val codePointCount` on `Val.Str`? Or fast-fail with `String.codePointAt` indexing only when scan finds first surrogate. | + +### Methodology notes + +- All 32 benchmarks above run under `hyperfine -N -w 4 -m 20`. Headline scenarios re-run quietly at `-w 6 -m 30`. +- The `bench.07` "Lazy array" case requires `jrsonnet --max-stack 50000` (jsonnet upstream uses ~10 MB OS stack); sjsonnet handles it on its default stack. Reported in the "sjsonnet wins" table. +- `kube-prometheus` runs with `-J jrsonnet/tests/realworld/vendor`; `GraalVM CI` with `-J .../vendor/graal`. Other realworld scenarios (`gitlab-runbooks`, `loki`, `mimir`, `tempo`) need additional jpath setup that the prior 2026-05-12 baselines also skipped. +- Hyperfine emits "Command took less than 5 ms" calibration warnings on the sub-5 ms micros; the corresponding *ratios* drift run-to-run by ±15%. The *absolute* deltas in the table reflect this run only. +- Apple M3 Pro hybrid P-/E-cores cause noticeable σ on sub-100 ms runs even with shell=none and warmup. **Use `min` as the stable rank metric**; means with σ ≥ 30% of mean are flagged "noise-bound". +- Raw hyperfine JSON exports retained under `/tmp/gap-reports/*.json` (local-only, not committed; rule per project memory). From c84e7bea8fa1695b3a1321846c11f8532df5739d Mon Sep 17 00:00:00 2001 From: He-Pin Date: Sat, 23 May 2026 18:55:40 +0800 Subject: [PATCH 3/4] test: avoid unicode escape in renderer test comment Motivation: Scala 2.12 processes Unicode escapes before tokenization, so a test comment containing a literal backslash-u placeholder broke JVM CI on PR #864. Modification: Reword the comment to describe unicode escapes without spelling an invalid placeholder escape sequence. Result: The Scala 2.12 test compile and Scala 3 JVM tests pass locally. References: https://github.com/databricks/sjsonnet/pull/864 --- sjsonnet/test/src/sjsonnet/RendererTests.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sjsonnet/test/src/sjsonnet/RendererTests.scala b/sjsonnet/test/src/sjsonnet/RendererTests.scala index 3406925c7..1da0338fe 100644 --- a/sjsonnet/test/src/sjsonnet/RendererTests.scala +++ b/sjsonnet/test/src/sjsonnet/RendererTests.scala @@ -132,7 +132,7 @@ object RendererTests extends TestSuite { escape("a\rb", unicode = true) ==> "\"a\\rb\"" escape("a\tb", unicode = true) ==> "\"a\\tb\"" - // Control chars that fall through to \uXXXX. + // Control chars that fall through to unicode escapes. escape("\u0000", unicode = true) ==> "\"\\u0000\"" escape("\u0001", unicode = true) ==> "\"\\u0001\"" escape("\u001f", unicode = true) ==> "\"\\u001f\"" From 6e270dbf23a8fdf868b020430c438a48e122a49b Mon Sep 17 00:00:00 2001 From: He-Pin Date: Sat, 23 May 2026 19:00:47 +0800 Subject: [PATCH 4/4] docs: clarify renderer char comparison comment Motivation: A review of PR #864 found a misleading comment that referred to a sign boundary for Scala chars. Modification: Clarify that chars are unsigned 16-bit values and that the 0x20 / 0x7E comparisons are valid for every char value. Result: The Scala 2.12 test sources compile locally. References: https://github.com/databricks/sjsonnet/pull/864 --- sjsonnet/src/sjsonnet/BaseRenderer.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sjsonnet/src/sjsonnet/BaseRenderer.scala b/sjsonnet/src/sjsonnet/BaseRenderer.scala index 8f1618b2d..935e8d727 100644 --- a/sjsonnet/src/sjsonnet/BaseRenderer.scala +++ b/sjsonnet/src/sjsonnet/BaseRenderer.scala @@ -172,7 +172,7 @@ object BaseRenderer { while (i < len) { val c = str.charAt(i) // Inlined classification, mirroring escapeChars below; `<` on a signed char is fine since - // chars are unsigned 16-bit and 0x20 / 0x7E are well below the sign boundary. + // chars are unsigned 16-bit; 0x20 / 0x7E comparisons are valid for all values. if (c == '"' || c == '\\' || c < 0x20 || (unicode && c > 0x7e)) { if (i > start) sb.write(str, start, i - start) (c: @switch) match {