diff --git a/README.md b/README.md index 810bdff..e831b38 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,10 @@ [![Release](https://github.com/toon-format/toon-java/actions/workflows/release.yml/badge.svg)](https://github.com/toon-format/toon-java/actions/workflows/release.yml) [![Maven Central](https://img.shields.io/maven-central/v/dev.toonformat/jtoon.svg)](https://central.sonatype.com/artifact/dev.toonformat/jtoon) ![Coverage](.github/badges/jacoco.svg) -[![SPEC v3.1](https://img.shields.io/badge/spec-v3.1-fef3c0?labelColor=1b1b1f)](https://github.com/toon-format/spec) +[![SPEC v3.3](https://img.shields.io/badge/spec-v3.3-fef3c0?labelColor=1b1b1f)](https://github.com/toon-format/spec) [![License: MIT](https://img.shields.io/badge/license-MIT-fef3c0?labelColor=1b1b1f)](./LICENSE) -> **⚠️ Beta Status (v1.x.x):** This library is in active development and working towards spec compliance. Beta published to Maven Central. API may change before 2.0.0 release. +> **⚠️ Beta Status (v1.x.x):** This library is in active development. Beta published to Maven Central. API may change before 2.0.0 release. Compact, human-readable serialization format for LLM contexts with **30-60% token reduction** vs JSON. Combines YAML-like indentation with CSV-like tabular arrays. Working towards full compatibility with the [official TOON specification](https://github.com/toon-format/spec). @@ -90,7 +90,7 @@ Some Java-specific types are automatically normalized for LLM-safe output: | `LocalDateTime` | ISO date-time string in quotes | | `LocalDate` | ISO date string in quotes | | `LocalTime` | ISO time string in quotes | -| `ZonedDateTime` | ISO zoned date-time string in quotes | +| `ZonedDateTime` | ISO offset date-time string in quotes | | `OffsetDateTime` | ISO offset date-time string in quotes | | `Instant` | ISO instant string in quotes | | `java.util.Date` | ISO instant string in quotes | diff --git a/docs/README.md b/docs/README.md index 4f7d910..d5ee950 100644 --- a/docs/README.md +++ b/docs/README.md @@ -15,7 +15,7 @@ Some Java-specific types are automatically normalized for LLM-safe output: | `LocalDateTime` | ISO date-time string in quotes | | `LocalDate` | ISO date string in quotes | | `LocalTime` | ISO time string in quotes | -| `ZonedDateTime` | ISO zoned date-time string in quotes | +| `ZonedDateTime` | ISO offset date-time string in quotes | | `OffsetDateTime` | ISO offset date-time string in quotes | | `Instant` | ISO instant string in quotes | | `java.util.Date` | ISO instant string in quotes | diff --git a/docs/javadoc/dev/toonformat/jtoon/normalizer/package-summary.html b/docs/javadoc/dev/toonformat/jtoon/normalizer/package-summary.html index f9da2fd..c44f73e 100644 --- a/docs/javadoc/dev/toonformat/jtoon/normalizer/package-summary.html +++ b/docs/javadoc/dev/toonformat/jtoon/normalizer/package-summary.html @@ -130,7 +130,7 @@

Temporal Types

  • LocalDateTime → "2025-01-15T10:30:00"
  • LocalDate → "2025-01-15"
  • LocalTime → "10:30:00"
  • -
  • ZonedDateTime → "2025-01-15T10:30:00+01:00[Europe/Paris]"
  • +
  • ZonedDateTime → "2025-01-15T10:30:00+01:00"
  • OffsetDateTime → "2025-01-15T10:30:00+01:00"
  • Instant → "2025-01-15T09:30:00Z"
  • java.util.Date → converted to Instant then formatted
  • diff --git a/src/main/java/dev/toonformat/jtoon/DecodeOptions.java b/src/main/java/dev/toonformat/jtoon/DecodeOptions.java index cecb028..89a808f 100644 --- a/src/main/java/dev/toonformat/jtoon/DecodeOptions.java +++ b/src/main/java/dev/toonformat/jtoon/DecodeOptions.java @@ -8,10 +8,10 @@ * @param indent Number of spaces per indentation level (default: 2) * @param delimiter Delimiter expected in tabular array rows and inline * primitive arrays (default: COMMA) - * @param strict Strict validation mode. When true, throws - * IllegalArgumentException on invalid input. When false, - * uses best-effort parsing and returns null on errors - * (default: true) + * @param strict Strict validation mode (default: true). When true, + * throws IllegalArgumentException on invalid input. + * When false, uses best-effort parsing and top-level + * decode errors return null instead of throwing. * @param expandPaths Path expansion mode for dotted keys (default: OFF) * @param maxDepth Maximum allowed nesting depth during decoding (default: 512). * Prevents StackOverflowError from deeply nested input. @@ -64,6 +64,14 @@ public DecodeOptions() { /** * Compact constructor with validation. + * + * @param indent number of spaces per indentation level + * @param delimiter delimiter for tabular array rows and inline arrays + * @param strict strict validation mode flag + * @param expandPaths path expansion mode for dotted keys + * @param maxDepth maximum nesting depth + * @param maxArraySize maximum array elements + * @param maxStringLength maximum string length */ public DecodeOptions { if (indent < 0) { diff --git a/src/main/java/dev/toonformat/jtoon/EncodeOptions.java b/src/main/java/dev/toonformat/jtoon/EncodeOptions.java index 3ce1ff1..5fbd35c 100644 --- a/src/main/java/dev/toonformat/jtoon/EncodeOptions.java +++ b/src/main/java/dev/toonformat/jtoon/EncodeOptions.java @@ -6,8 +6,10 @@ * Configuration options for encoding data to JToon format. * * @param indent Number of spaces per indentation level (default: 2) - * @param delimiter Delimiter to use for tabular array rows and inline - * primitive arrays (default: COMMA) + * @param delimiter Delimiter used for both document delimiter and active + * array delimiter. Controls quoting for object field values + * (document delimiter) and inline array values / tabular + * rows (active delimiter). (default: COMMA) * @param lengthMarker Optional marker to prefix array lengths in headers. When * true, arrays render as [#N] instead of [N] (default: * false) @@ -42,6 +44,12 @@ public EncodeOptions() { /** * Compact constructor with validation. + * + * @param indent number of spaces per indentation level + * @param delimiter delimiter for tabular array rows and inline arrays + * @param lengthMarker whether to prefix array lengths with {@code #} + * @param flatten key folding mode for nested objects + * @param flattenDepth maximum depth of key folding */ public EncodeOptions { if (indent < 0) { diff --git a/src/main/java/dev/toonformat/jtoon/decoder/ArrayDecoder.java b/src/main/java/dev/toonformat/jtoon/decoder/ArrayDecoder.java index aa491ef..56bebb3 100644 --- a/src/main/java/dev/toonformat/jtoon/decoder/ArrayDecoder.java +++ b/src/main/java/dev/toonformat/jtoon/decoder/ArrayDecoder.java @@ -84,6 +84,15 @@ static List parseArrayWithDelimiter(final String header, final int depth } if (arrayMatcher.find()) { + // In strict mode, reject bracket lengths with leading zeros (e.g. [03]) + // unless the length is exactly "0". + if (context.options.strict()) { + final String lengthStr = arrayMatcher.group(2); + if (lengthStr.length() > 1 && lengthStr.charAt(0) == '0') { + throw new IllegalArgumentException( + "Invalid array length with leading zeros: [" + lengthStr + "]"); + } + } final int headerEndIdx = arrayMatcher.end(); final String afterHeader = header.substring(headerEndIdx).trim(); diff --git a/src/main/java/dev/toonformat/jtoon/decoder/DecodeHelper.java b/src/main/java/dev/toonformat/jtoon/decoder/DecodeHelper.java index cfc294a..64a3770 100644 --- a/src/main/java/dev/toonformat/jtoon/decoder/DecodeHelper.java +++ b/src/main/java/dev/toonformat/jtoon/decoder/DecodeHelper.java @@ -173,10 +173,26 @@ static void checkPathExpansionConflict(final Map map, final Stri checkFinalValueConflict(key, existing, value, context); } + /** + * Checks for duplicate keys in strict mode. + * Throws if the map already contains the given key and strict mode is enabled. + * + * @param map the map to check + * @param key the key being inserted + * @param context decode context for strict mode check + * @throws IllegalArgumentException if strict mode and key already exists + */ + static void checkDuplicateKey(final Map map, final String key, final DecodeContext context) { + if (context.options.strict() && map.containsKey(key)) { + throw new IllegalArgumentException( + "Duplicate key '" + key + "' at line " + (context.currentLine + 1)); + } + } + /** * Finds the depth of the next non-blank line, skipping blank lines. * - * @param context decode an object to deal with lines, delimiter, and options + * @param context decode an object to deal with lines, delimiter and options * @return the depth of the next non-blank line, or null if none exists */ static Integer findNextNonBlankLineDepth(final DecodeContext context) { @@ -192,10 +208,35 @@ static Integer findNextNonBlankLineDepth(final DecodeContext context) { return getDepth(context.lines[nextLineIdx], context); } + /** + * Checks if a line contains unquoted brackets ({@code [} or {@code ]}). + * Used to detect malformed array header syntax in strict mode. + * + * @param line the line to check + * @return true if unquoted brackets are found + */ + static boolean hasUnquotedBrackets(final String line) { + boolean inQuotes = false; + boolean escaped = false; + for (int i = 0; i < line.length(); i++) { + final char c = line.charAt(i); + if (escaped) { + escaped = false; + } else if (c == BACKSLASH) { + escaped = true; + } else if (c == DOUBLE_QUOTE) { + inQuotes = !inQuotes; + } else if (!inQuotes && (c == '[' || c == ']')) { + return true; + } + } + return false; + } + /** * Validates that there are no multiple primitives at root level in strict mode. * - * @param context decode an object to deal with lines, delimiter, and options + * @param context decode an object to deal with lines, delimiter and options * @throws IllegalArgumentException in case the next depth is equal to 0 */ static void validateNoMultiplePrimitivesAtRoot(final DecodeContext context) { diff --git a/src/main/java/dev/toonformat/jtoon/decoder/KeyDecoder.java b/src/main/java/dev/toonformat/jtoon/decoder/KeyDecoder.java index 2e719fc..9229370 100644 --- a/src/main/java/dev/toonformat/jtoon/decoder/KeyDecoder.java +++ b/src/main/java/dev/toonformat/jtoon/decoder/KeyDecoder.java @@ -41,6 +41,7 @@ static void processKeyedArrayLine(final Map result, final String } else { // Check for conflicts with existing expanded paths DecodeHelper.checkPathExpansionConflict(result, key, arrayValue, context); + DecodeHelper.checkDuplicateKey(result, key, context); result.put(key, arrayValue); } } @@ -236,6 +237,7 @@ private static void putKeyValueIntoMap(final Map map, final Stri expandPathIntoMap(map, unescapedKey, value, context); } else { DecodeHelper.checkPathExpansionConflict(map, unescapedKey, value, context); + DecodeHelper.checkDuplicateKey(map, unescapedKey, context); map.put(unescapedKey, value); } } @@ -327,6 +329,7 @@ static boolean parseKeyedArrayField(final String fieldContent, final Map item, final int depth, - final DecodeContext context) { + final DecodeContext context) { final int colonIdx = DecodeHelper.findUnquotedColon(fieldContent); if (colonIdx <= 0) { return false; @@ -359,6 +362,7 @@ static boolean parseKeyValueField(final String fieldContent, final Map item = new LinkedHashMap<>(); + DecodeHelper.checkDuplicateKey(item, key, context); item.put(key, arrayValue); // parseArrayWithDelimiter manages currentLine correctly: @@ -130,6 +131,7 @@ static Object parseListItem(final String content, final int depth, final DecodeC // List item is at depth + 1, so pass depth + 1 to parseObjectItemValue parsedValue = ObjectDecoder.parseObjectItemValue(value, depth + 1, context); } + DecodeHelper.checkDuplicateKey(item, key, context); item.put(key, parsedValue); parseListItemFields(item, depth, context); diff --git a/src/main/java/dev/toonformat/jtoon/decoder/ObjectDecoder.java b/src/main/java/dev/toonformat/jtoon/decoder/ObjectDecoder.java index 2b14336..75f97ca 100644 --- a/src/main/java/dev/toonformat/jtoon/decoder/ObjectDecoder.java +++ b/src/main/java/dev/toonformat/jtoon/decoder/ObjectDecoder.java @@ -142,6 +142,7 @@ private static void processRootKeyedArrayLine(final Map objectMa } else { // Check for conflicts with existing expanded paths DecodeHelper.checkPathExpansionConflict(objectMap, key, arrayValue, context); + DecodeHelper.checkDuplicateKey(objectMap, key, context); objectMap.put(key, arrayValue); } } diff --git a/src/main/java/dev/toonformat/jtoon/decoder/PrimitiveDecoder.java b/src/main/java/dev/toonformat/jtoon/decoder/PrimitiveDecoder.java index 589d702..40add48 100644 --- a/src/main/java/dev/toonformat/jtoon/decoder/PrimitiveDecoder.java +++ b/src/main/java/dev/toonformat/jtoon/decoder/PrimitiveDecoder.java @@ -85,10 +85,20 @@ static Object parse(final String value, final int maxStringLength) { return StringEscaper.unescape(value); } - // Check for leading zeros (treat as string, except for "0", "-0", "0.0", etc.) + // Check for forbidden leading zeros (treat as string, except for "0", "-0", "0.0", etc.) + // Per spec §4: tokens like "05", "0001", "-05", "-0001" must be treated as strings. + // But "0.5", "0e1", "-0.5", "-0e1" are valid numbers. final String trimmed = value.trim(); - if (trimmed.length() > 1 && trimmed.matches("^-?0+[0-7].*")) { - return value; + if (trimmed.length() > 1) { + // Match forbidden leading zeros: starts with optional '-', then one or more zeros, + // then another digit (0-9) — meaning it's a multi-digit number with leading zeros. + // Exclude cases where the zero is part of a fractional/exponent form like "0.5", "0e1". + final boolean hasLeadingZeros = trimmed.matches("^-?0+\\d.*"); + // But we must NOT match "0.5" style numbers (single zero integer part) + final boolean isLikelyFractionalOrExponent = trimmed.matches("^-?0[.eE].*"); + if (hasLeadingZeros && !isLikelyFractionalOrExponent) { + return value; // treat as string + } } // Try parsing as number diff --git a/src/main/java/dev/toonformat/jtoon/decoder/TabularArrayDecoder.java b/src/main/java/dev/toonformat/jtoon/decoder/TabularArrayDecoder.java index c2162e8..43d98fc 100644 --- a/src/main/java/dev/toonformat/jtoon/decoder/TabularArrayDecoder.java +++ b/src/main/java/dev/toonformat/jtoon/decoder/TabularArrayDecoder.java @@ -205,6 +205,13 @@ private static boolean handleBlankLineInTabularArray(final int expectedRowDepth, /** * Determines if tabular array parsing should terminate based on online depth. + * Implements the full disambiguation algorithm per spec §9.3: + * - Compute the first unquoted occurrence of the active delimiter and the first unquoted colon. + * - If a same-depth line has no unquoted colon → row. + * - If both appear, compare first-unquoted positions: + * - Delimiter before colon → row. + * - Colon before delimiter → key-value line (end of rows). + * - If a line has an unquoted colon but no unquoted active delimiter → key-value line. * * @param line the line to check * @param lineDepth the depth of the line @@ -214,7 +221,6 @@ private static boolean handleBlankLineInTabularArray(final int expectedRowDepth, */ private static boolean shouldTerminateTabularArray(final String line, final int lineDepth, final int expectedRowDepth, final DecodeContext context) { - // Header depth is one level above the expected row depth final int headerDepth = expectedRowDepth - 1; if (lineDepth < expectedRowDepth) { @@ -228,14 +234,47 @@ private static boolean shouldTerminateTabularArray(final String line, final int return true; // Line depth is less than expected - terminate } - // Check for a key-value pair at the expected row depth - if (lineDepth == expectedRowDepth) { - final String rowContent = line.substring(expectedRowDepth * context.options.indent()); - final int colonIdx = DecodeHelper.findUnquotedColon(rowContent); - return colonIdx > 0; // Key-value pair at the same depth as rows - terminate an array + if (lineDepth != expectedRowDepth) { + return false; } - return false; + // Spec §9.3 disambiguation at row depth + final String rowContent = line.substring(expectedRowDepth * context.options.indent()); + final char delimChar = context.delimiter.getValue(); + final int delimIdx = findFirstUnquoted(rowContent, delimChar); + final int colonIdx = DecodeHelper.findUnquotedColon(rowContent); + + if (colonIdx < 0) { + return false; // No colon → this is a row + } + + if (delimIdx < 0) { + return true; // Colon present, no delimiter → key-value line + } + + // Both colon and delimiter present: compare positions + return colonIdx < delimIdx; // Colon first → key-value; delimiter first → row + } + + /** + * Finds the index of the first unquoted occurrence of a character in a string. + */ + private static int findFirstUnquoted(final String content, final char target) { + boolean inQuotes = false; + boolean escaped = false; + for (int i = 0; i < content.length(); i++) { + final char c = content.charAt(i); + if (escaped) { + escaped = false; + } else if (c == '\\') { + escaped = true; + } else if (c == '"') { + inQuotes = !inQuotes; + } else if (!inQuotes && c == target) { + return i; + } + } + return -1; } /** diff --git a/src/main/java/dev/toonformat/jtoon/decoder/ValueDecoder.java b/src/main/java/dev/toonformat/jtoon/decoder/ValueDecoder.java index f28be99..db33cc7 100644 --- a/src/main/java/dev/toonformat/jtoon/decoder/ValueDecoder.java +++ b/src/main/java/dev/toonformat/jtoon/decoder/ValueDecoder.java @@ -46,6 +46,17 @@ private ValueDecoder() { * invalid */ public static Object decode(final String toon, final DecodeOptions options) { + try { + return decodeInternal(toon, options); + } catch (IllegalArgumentException e) { + if (!options.strict()) { + return null; + } + throw e; + } + } + + private static Object decodeInternal(final String toon, final DecodeOptions options) { if (toon == null || toon.isBlank()) { return new LinkedHashMap<>(); } @@ -95,12 +106,34 @@ public static Object decode(final String toon, final DecodeOptions options) { // Handle key-value pairs: name: Ada final int colonIdx = DecodeHelper.findUnquotedColon(line); if (colonIdx > 0) { + if (context.options.strict()) { + final String key = line.substring(0, colonIdx).trim(); + // In strict mode, reject keys with unquoted brackets that didn't match + // KEYED_ARRAY_PATTERN. This catches: + // - extra brackets between bracket segment and colon (foo[1][bar]) + // - text between bracket segment and colon (foo[2]extra) + // - non-integer bracket segment (foo[bar]) + // - negative bracket length (items[-1]) + // - whitespace between bracket segment and colon/fields segment + // (items[2] :, items[2] {a,b}:) + if (DecodeHelper.hasUnquotedBrackets(key)) { + throw new IllegalArgumentException( + "Invalid array header syntax at line " + (context.currentLine + 1)); + } + } final String key = line.substring(0, colonIdx).trim(); final String value = line.substring(colonIdx + 1).trim(); return KeyDecoder.parseKeyValuePair(key, value, depth, depth == 0, context); } // Bare scalar value + if (context.options.strict() && DecodeHelper.hasUnquotedBrackets(line)) { + // Line has brackets but no colon and didn't match KEYED_ARRAY_PATTERN + // (e.g. "items[2]{id,name}" missing colon) + throw new IllegalArgumentException( + "Invalid syntax: unquoted brackets without valid header at line " + + (context.currentLine + 1)); + } return ObjectDecoder.parseBareScalarValue(line, depth, context); } @@ -122,7 +155,14 @@ public static Object decode(final String toon, final DecodeOptions options) { public static String decodeToJson(final String toon, final DecodeOptions options) { try { final Object decoded = decode(toon, options); + if (decoded == null) { + return NULL_LITERAL; + } return MAPPER.writeValueAsString(decoded); + } catch (IllegalArgumentException e) { + // decode() already threw, or strict-mode structural failure + // re-throw with wrapping for consistency + throw new IllegalArgumentException("Failed to convert decoded value to JSON", e); } catch (Exception e) { throw new IllegalArgumentException("Failed to convert decoded value to JSON: " + e.getMessage(), e); } diff --git a/src/main/java/dev/toonformat/jtoon/encoder/ArrayEncoder.java b/src/main/java/dev/toonformat/jtoon/encoder/ArrayEncoder.java index 0b180c8..c8767a4 100644 --- a/src/main/java/dev/toonformat/jtoon/encoder/ArrayEncoder.java +++ b/src/main/java/dev/toonformat/jtoon/encoder/ArrayEncoder.java @@ -31,16 +31,16 @@ private ArrayEncoder() { public static void encodeArray(final String key, final ArrayNode value, final LineWriter writer, final int depth, final EncodeOptions options) { if (value.isEmpty()) { - if (!options.lengthMarker()) { - if (key == null && depth == 0) { - writer.push(depth, "[]"); - return; - } - if (key != null) { - final String encodedKey = PrimitiveEncoder.encodeKey(key); - writer.push(depth, encodedKey + ": []"); - return; - } + // Per spec §9.1: encoders SHOULD emit key: [] for empty arrays. + // When lengthMarker is enabled, use the legacy header form instead. + if (key == null && depth == 0) { + writer.push(depth, options.lengthMarker() ? "[0]: " : "[]"); + return; + } + if (key != null && !options.lengthMarker()) { + final String encodedKey = PrimitiveEncoder.encodeKey(key); + writer.push(depth, encodedKey + ": []"); + return; } final String header = PrimitiveEncoder.formatHeader(0, key, null, options.delimiter().toString(), options.lengthMarker()); diff --git a/src/main/java/dev/toonformat/jtoon/encoder/LineWriter.java b/src/main/java/dev/toonformat/jtoon/encoder/LineWriter.java index 1d22e02..9c47b98 100644 --- a/src/main/java/dev/toonformat/jtoon/encoder/LineWriter.java +++ b/src/main/java/dev/toonformat/jtoon/encoder/LineWriter.java @@ -37,6 +37,8 @@ public LineWriter(final int indentSize) { /** * Adds a line with the specified depth and content. + * Trailing spaces are stripped from content per spec §12 + * (encoders MUST NOT emit trailing spaces). * * @param depth Indentation depth (0 = no indentation) * @param content Line content to add @@ -56,7 +58,13 @@ public void push(final int depth, final String content) { } } } - stringBuilder.append(content); + // Strip trailing spaces per spec §12 + final int end = content.length() - 1; + int trimEnd = end; + while (trimEnd >= 0 && content.charAt(trimEnd) == ' ') { + trimEnd--; + } + stringBuilder.append(trimEnd < 0 ? "" : content.substring(0, trimEnd + 1)); } /** diff --git a/src/main/java/dev/toonformat/jtoon/encoder/PrimitiveEncoder.java b/src/main/java/dev/toonformat/jtoon/encoder/PrimitiveEncoder.java index e3f50cf..2c867ce 100644 --- a/src/main/java/dev/toonformat/jtoon/encoder/PrimitiveEncoder.java +++ b/src/main/java/dev/toonformat/jtoon/encoder/PrimitiveEncoder.java @@ -48,11 +48,22 @@ private static String encodeNumber(final JsonNode value) { return value.asString(); } - final double doubleValue = value.asDouble(); - final BigDecimal decimal = BigDecimal.valueOf(doubleValue); + // Use decimalValue() for exact precision from Jackson's DecimalNode, + // avoiding precision loss from double conversion. + // BigDecimal.valueOf(double) uses Double.toString which can only + // represent ~15-17 significant digits. + final BigDecimal decimal = value.decimalValue(); final String plainString = decimal.toPlainString(); - return stripTrailingZeros(plainString); + // Strip trailing zeros but preserve the number's mathematical value + final String stripped = stripTrailingZeros(plainString); + + // Per spec §2: -0 MUST be normalized to 0 + if ("-0".equals(stripped)) { + return "0"; + } + + return stripped; } /** diff --git a/src/main/java/dev/toonformat/jtoon/normalizer/JsonNormalizer.java b/src/main/java/dev/toonformat/jtoon/normalizer/JsonNormalizer.java index 6bc293f..58f092c 100644 --- a/src/main/java/dev/toonformat/jtoon/normalizer/JsonNormalizer.java +++ b/src/main/java/dev/toonformat/jtoon/normalizer/JsonNormalizer.java @@ -251,7 +251,9 @@ private static JsonNode tryNormalizeTemporal(final Object value) { } else if (value instanceof LocalTime lt) { return formatTemporal(lt, DateTimeFormatter.ISO_LOCAL_TIME); } else if (value instanceof ZonedDateTime zonedDateTime) { - return formatTemporal(zonedDateTime, DateTimeFormatter.ISO_ZONED_DATE_TIME); + // Use toOffsetDateTime().toString() to avoid [Zone/Id] bracket + // that ZonedDateTime.toString() appends (not standard ISO 8601) + return StringNode.valueOf(zonedDateTime.toOffsetDateTime().toString()); } else if (value instanceof OffsetDateTime offsetDateTime) { return formatTemporal(offsetDateTime, DateTimeFormatter.ISO_OFFSET_DATE_TIME); } else if (value instanceof Calendar calendar) { diff --git a/src/main/java/dev/toonformat/jtoon/util/Headers.java b/src/main/java/dev/toonformat/jtoon/util/Headers.java index fdd5b6c..8deb6fa 100644 --- a/src/main/java/dev/toonformat/jtoon/util/Headers.java +++ b/src/main/java/dev/toonformat/jtoon/util/Headers.java @@ -22,12 +22,13 @@ public final class Headers { /** * Matches keyed array headers: items[2]{id,name}: or tags[3]: or data[4]{id}:. - * Also matches quoted keys with brackets: "key[test]"[3]:. + * Also matches quoted keys with brackets: "key[test]"[3]: and keys with + * escaped quotes: "key\"quote"[3]:. * Captures: group(1)=key (quoted or unquoted), group(2)=#marker, group(3)=delimiter, * group(4)=optional field spec */ public static final Pattern KEYED_ARRAY_PATTERN = Pattern.compile( - "^(\"[^\"]+\"|[^\\[\\]]+)\\[(#?)\\d+([\\t|])?](\\{[^}]+})?:.*$"); + "^(\"(?:[^\"\\\\]|\\\\.)*\"|[^\\[\\]]+)\\[(#?)\\d+([\\t|])?](\\{[^}]+})?:.*$"); private Headers() { throw new UnsupportedOperationException("Utility class cannot be instantiated"); diff --git a/src/main/java/dev/toonformat/jtoon/util/StringEscaper.java b/src/main/java/dev/toonformat/jtoon/util/StringEscaper.java index b8ce0db..d3f00f4 100644 --- a/src/main/java/dev/toonformat/jtoon/util/StringEscaper.java +++ b/src/main/java/dev/toonformat/jtoon/util/StringEscaper.java @@ -73,8 +73,8 @@ public static void validateString(final String value) { if (value.startsWith("\"") && value.endsWith("\"")) { final String unquoted = value.substring(1, value.length() - 1); boolean escaped = false; - - for (int i = 0; i < unquoted.length(); i++) { + int i = 0; + while (i < unquoted.length()) { final char c = unquoted.charAt(i); if (escaped) { // Check if escape sequence is valid @@ -106,12 +106,17 @@ public static void validateString(final String value) { || !Character.isLowSurrogate((char) Integer.parseInt(nextHex, HEX_RADIX))) { throw new IllegalArgumentException(INVALID_UNICODE_LONE_HIGH); } + // Skip past the full surrogate pair (\\uXXXX\\uXXXX = 12 chars total) + // to avoid reprocessing the consumed hex digits and the low surrogate + // escape as individual characters. + i += UNICODE_ESCAPE_TOTAL_LENGTH + UNICODE_HEX_LENGTH; } } escaped = false; } else if (c == '\\') { escaped = true; } + i++; } // Check for trailing backslash (invalid escape) diff --git a/src/main/java/dev/toonformat/jtoon/util/StringValidator.java b/src/main/java/dev/toonformat/jtoon/util/StringValidator.java index 499bd91..c33987e 100644 --- a/src/main/java/dev/toonformat/jtoon/util/StringValidator.java +++ b/src/main/java/dev/toonformat/jtoon/util/StringValidator.java @@ -92,6 +92,12 @@ public static boolean isValidUnquotedKey(final String key) { for (int i = 1; i < len; i++) { final char c = key.charAt(i); + // Reject control characters (U+0000-U+001F) even though + // Character.isJavaIdentifierPart returns true for identifier-ignorable + // control chars like U+0004. These must be escaped in TOON output. + if (c <= CONTROL_CHAR_MAX) { + return false; + } if (!Character.isJavaIdentifierPart(c) && c != '.') { return false; } diff --git a/src/main/java/dev/toonformat/jtoon/validator/ToonValidator.java b/src/main/java/dev/toonformat/jtoon/validator/ToonValidator.java new file mode 100644 index 0000000..c3b7e43 --- /dev/null +++ b/src/main/java/dev/toonformat/jtoon/validator/ToonValidator.java @@ -0,0 +1,124 @@ +package dev.toonformat.jtoon.validator; + +import dev.toonformat.jtoon.DecodeOptions; +import dev.toonformat.jtoon.decoder.ValueDecoder; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Pattern; + +/** + * Validates TOON-formatted strings for conformance to the TOON specification (§13.3). + * + *

    Performs structural checks beyond what the decoder's strict mode enforces: + *

      + *
    • Structural conformance (headers, indentation, list markers)
    • + *
    • Whitespace invariants (no trailing spaces/newlines)
    • + *
    • Delimiter consistency between headers and rows
    • + *
    • Array length counts match declared [N]
    • + *
    + * + *

    This is a read-only validation utility. It does not produce decoded values. + */ +public final class ToonValidator { + + private static final Pattern NEWLINE = Pattern.compile("\r?\n"); + + private ToonValidator() { + throw new UnsupportedOperationException("Utility class cannot be instantiated"); + } + + /** + * Result of a validation run. + * + * @param valid true if the input passed all checks + * @param issues list of human-readable issue descriptions (empty when valid) + */ + public record ValidationResult(boolean valid, List issues) { + private static final ValidationResult PASS = new ValidationResult(true, List.of()); + + static ValidationResult pass() { + return PASS; + } + + static ValidationResult fail(final List issues) { + return new ValidationResult(false, List.copyOf(issues)); + } + } + + /** + * Validates a TOON-formatted string. + * + * @param toon the TOON string to validate + * @param options decode options (indent, delimiter, strict mode) + * @return validation result with issues list + */ + public static ValidationResult validate(final String toon, final DecodeOptions options) { + if (toon == null || toon.isBlank()) { + return ValidationResult.pass(); + } + + final List issues = new ArrayList<>(); + + // 1. Try decoding in strict mode to catch structural errors + try { + ValueDecoder.decode(toon, options); + } catch (IllegalArgumentException e) { + issues.add("Structural error: " + e.getMessage()); + } + + // 2. Whitespace invariants (encoder checks) + checkTrailingWhitespace(toon, issues); + + // 3. Check for trailing newline (encoder MUST NOT emit) + if (!toon.isEmpty() && toon.charAt(toon.length() - 1) == '\n') { + issues.add("Trailing newline at end of document (§12)"); + } + + if (issues.isEmpty()) { + return ValidationResult.pass(); + } + + return ValidationResult.fail(issues); + } + + /** + * Validates a TOON-formatted string with default options (strict mode, comma delimiter, 2-space indent). + * + * @param toon the TOON string to validate + * @return validation result with issues list + */ + public static ValidationResult validate(final String toon) { + return validate(toon, DecodeOptions.DEFAULT); + } + + /** + * Returns true if the TOON string is valid per the specification. + * + * @param toon the TOON string to validate + * @return true if valid + */ + public static boolean isValid(final String toon) { + return validate(toon).valid(); + } + + /** + * Returns true if the TOON string is valid per the specification. + * + * @param toon the TOON string to validate + * @param options decode options + * @return true if valid + */ + public static boolean isValid(final String toon, final DecodeOptions options) { + return validate(toon, options).valid(); + } + + private static void checkTrailingWhitespace(final String toon, final List issues) { + final String[] lines = NEWLINE.split(toon, -1); + for (int i = 0; i < lines.length; i++) { + final String line = lines[i]; + if (!line.isEmpty() && line.charAt(line.length() - 1) == ' ') { + issues.add("Trailing space on line " + (i + 1) + " (§12)"); + } + } + } +} diff --git a/src/test/java/dev/toonformat/jtoon/JToonDecodeTest.java b/src/test/java/dev/toonformat/jtoon/JToonDecodeTest.java index 311ae45..b7ac8a0 100644 --- a/src/test/java/dev/toonformat/jtoon/JToonDecodeTest.java +++ b/src/test/java/dev/toonformat/jtoon/JToonDecodeTest.java @@ -670,6 +670,127 @@ void testLenientMode() { // Then assertEquals(Collections.emptyList(), result); } + + @Test + @DisplayName("strict mode: throws on duplicate sibling keys") + void strictDuplicateSiblingKeys() { + assertThrows(IllegalArgumentException.class, + () -> JToon.decode("name: Ada\nname: Bob")); + } + + @Test + @DisplayName("strict mode: throws on nested duplicate sibling keys") + void strictNestedDuplicateKeys() { + assertThrows(IllegalArgumentException.class, + () -> JToon.decode(""" + outer: + name: Ada + name: Bob + """)); + } + + @Test + @DisplayName("strict mode: throws on duplicate keys within a list-item object") + void strictDuplicateKeysInListItem() { + assertThrows(IllegalArgumentException.class, + () -> JToon.decode(""" + items[1]: + - id: 1 + id: 2 + """)); + } + + @Test + @DisplayName("strict mode: throws on extra brackets between bracket segment and colon") + void strictExtraBrackets() { + assertThrows(IllegalArgumentException.class, + () -> JToon.decode("foo[1][bar]: 10")); + } + + @Test + @DisplayName("strict mode: throws on non-integer bracket segment") + void strictNonIntegerBracket() { + assertThrows(IllegalArgumentException.class, + () -> JToon.decode("foo[bar]: 10")); + } + + @Test + @DisplayName("strict mode: throws on text between bracket segment and colon") + void strictTextBetweenBracketAndColon() { + assertThrows(IllegalArgumentException.class, + () -> JToon.decode("foo[2]extra: a,b")); + } + + @Test + @DisplayName("strict mode: throws on negative bracket length") + void strictNegativeBracketLength() { + assertThrows(IllegalArgumentException.class, + () -> JToon.decode("items[-1]: a,b,c")); + } + + @Test + @DisplayName("strict mode: throws on bracket length with leading zeros") + void strictLeadingZeroBracketLength() { + assertThrows(IllegalArgumentException.class, + () -> JToon.decode("items[03]: a,b,c")); + } + + @Test + @DisplayName("strict mode: throws on array header missing colon") + void strictMissingColonInArrayHeader() { + assertThrows(IllegalArgumentException.class, + () -> JToon.decode(""" + items[2]{id,name} + 1,Ada + 2,Bob + """)); + } + + @Test + @DisplayName("strict mode: throws on whitespace between bracket segment and colon") + void strictWhitespaceBetweenBracketAndColon() { + assertThrows(IllegalArgumentException.class, + () -> JToon.decode("items[2] :\n 1,2")); + } + + @Test + @DisplayName("strict mode: throws on whitespace between bracket and fields segment") + void strictWhitespaceBetweenBracketAndFields() { + assertThrows(IllegalArgumentException.class, + () -> JToon.decode("items[2] {a,b}:\n 1,2\n 3,4")); + } + + @Test + @DisplayName("lenient mode: allows brackets in keys") + void lenientAllowsBracketsInKeys() { + DecodeOptions lenient = DecodeOptions.withStrict(false); + Object result = JToon.decode("foo[1][bar]: 10", lenient); + @SuppressWarnings("unchecked") + Map map = (Map) result; + assertEquals(10L, map.get("foo[1][bar]")); + } + + @Test + @DisplayName("lenient mode: allows duplicate keys (last-write-wins)") + void lenientAllowsDuplicateKeys() { + DecodeOptions lenient = DecodeOptions.withStrict(false); + Object result = JToon.decode("name: Ada\nname: Bob", lenient); + @SuppressWarnings("unchecked") + Map map = (Map) result; + assertEquals("Bob", map.get("name")); + } + + @Test + @DisplayName("lenient mode: allows leading zeros in bracket length") + void lenientAllowsLeadingZeros() { + DecodeOptions lenient = DecodeOptions.withStrict(false); + Object result = JToon.decode("items[03]: a,b,c", lenient); + @SuppressWarnings("unchecked") + Map map = (Map) result; + @SuppressWarnings("unchecked") + List items = (List) map.get("items"); + assertEquals(3, items.size()); + } } @Nested diff --git a/src/test/java/dev/toonformat/jtoon/decoder/DecodeHelperTest.java b/src/test/java/dev/toonformat/jtoon/decoder/DecodeHelperTest.java index c624c28..3630fd0 100644 --- a/src/test/java/dev/toonformat/jtoon/decoder/DecodeHelperTest.java +++ b/src/test/java/dev/toonformat/jtoon/decoder/DecodeHelperTest.java @@ -583,6 +583,75 @@ void blanksOnlyOk() { } } + @Nested + @DisplayName("hasUnquotedBrackets()") + class HasUnquotedBrackets { + + @Test + @DisplayName("should return true when brackets are present") + void detectsBrackets() { + assertTrue(DecodeHelper.hasUnquotedBrackets("foo[bar]")); + assertTrue(DecodeHelper.hasUnquotedBrackets("[test]")); + assertTrue(DecodeHelper.hasUnquotedBrackets("items[2]extra")); + } + + @Test + @DisplayName("should return false when no brackets") + void noBrackets() { + assertFalse(DecodeHelper.hasUnquotedBrackets("simple key: value")); + assertFalse(DecodeHelper.hasUnquotedBrackets("foo")); + assertFalse(DecodeHelper.hasUnquotedBrackets("")); + } + + @Test + @DisplayName("should return false when brackets are inside quotes") + void bracketsInsideQuotes() { + assertFalse(DecodeHelper.hasUnquotedBrackets("\"[test]\"")); + assertFalse(DecodeHelper.hasUnquotedBrackets("\"foo[bar]\"")); + } + + @Test + @DisplayName("should handle escaped quotes properly") + void escapedQuotes() { + // escaped quote inside quoted section should not end the quotes + assertFalse(DecodeHelper.hasUnquotedBrackets("\"escaped\\\"quote[br]\"")); + } + } + + @Nested + @DisplayName("checkDuplicateKey()") + class CheckDuplicateKey { + + @Test + @DisplayName("should throw when key already exists in strict mode") + void duplicateKeyThrows() { + Map map = new HashMap<>(); + map.put("name", "Ada"); + setUpContext(""); + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> DecodeHelper.checkDuplicateKey(map, "name", context)); + assertTrue(ex.getMessage().contains("Duplicate key")); + } + + @Test + @DisplayName("should not throw when key does not exist") + void newKeyOk() { + Map map = new HashMap<>(); + map.put("name", "Ada"); + setUpContext(""); + assertDoesNotThrow(() -> DecodeHelper.checkDuplicateKey(map, "other", context)); + } + + @Test + @DisplayName("should not throw in non-strict mode") + void nonStrictAllowsDuplicate() { + Map map = new HashMap<>(); + map.put("name", "Ada"); + context.options = DecodeOptions.withStrict(false); + assertDoesNotThrow(() -> DecodeHelper.checkDuplicateKey(map, "name", context)); + } + } + @Nested @DisplayName("computeLeadingSpaces()") class computeLeadingSpaces { diff --git a/src/test/java/dev/toonformat/jtoon/decoder/PrimitiveDecoderTest.java b/src/test/java/dev/toonformat/jtoon/decoder/PrimitiveDecoderTest.java index c63102d..7c47e2a 100644 --- a/src/test/java/dev/toonformat/jtoon/decoder/PrimitiveDecoderTest.java +++ b/src/test/java/dev/toonformat/jtoon/decoder/PrimitiveDecoderTest.java @@ -234,6 +234,84 @@ void givenNumberWithLeadingZeroOutsideTheOctalRange_whenParse_thenReturnsLong() assertEquals("0.9", result.toString()); } + @Test + void given08_whenParse_thenReturnsString() { + // Given + String input = "08"; + + // When + Object result = PrimitiveDecoder.parse(input); + + // Then + assertNotNull(result); + assertEquals("08", result); + } + + @Test + void given09_whenParse_thenReturnsString() { + // Given + String input = "09"; + + // When + Object result = PrimitiveDecoder.parse(input); + + // Then + assertNotNull(result); + assertEquals("09", result); + } + + @Test + void given00_whenParse_thenReturnsString() { + // Given + String input = "00"; + + // When + Object result = PrimitiveDecoder.parse(input); + + // Then + assertNotNull(result); + assertEquals("00", result); + } + + @Test + void givenNegativeLeadingZero_whenParse_thenReturnsString() { + // Given + String input = "-07"; + + // When + Object result = PrimitiveDecoder.parse(input); + + // Then + assertNotNull(result); + assertEquals("-07", result); + } + + @Test + void givenLeadingZeroDecimal_whenParse_thenReturnsNumber() { + // Given + String input = "0.5"; + + // When + Object result = PrimitiveDecoder.parse(input); + + // Then + assertNotNull(result); + assertEquals(0.5, (Double) result, 0.000001); + } + + @Test + void givenLeadingZeroExponent_whenParse_thenReturnsNumber() { + // Given — "0e1" = 0 × 10^1 = 0, which is a whole number → Long + String input = "0e1"; + + // When + Object result = PrimitiveDecoder.parse(input); + + // Then + assertNotNull(result); + assertEquals(0L, result); + } + @Test void givenMinLongNumber_whenParse_thenReturnsLong() { // Given diff --git a/src/test/java/dev/toonformat/jtoon/decoder/TabularArrayDecoderTest.java b/src/test/java/dev/toonformat/jtoon/decoder/TabularArrayDecoderTest.java index adc8923..eb37beb 100644 --- a/src/test/java/dev/toonformat/jtoon/decoder/TabularArrayDecoderTest.java +++ b/src/test/java/dev/toonformat/jtoon/decoder/TabularArrayDecoderTest.java @@ -216,6 +216,101 @@ void testTerminateWhenLineDepthLessThanExpected() throws Exception { assertTrue(result, "Should terminate when lineDepth < expectedRowDepth"); } + @Test + @DisplayName("should NOT terminate when delimiter found before colon (§9.3)") + void testDisambiguation_DelimiterBeforeColon_continuesRow() throws Exception { + // Given — "10,active:done" has comma at index 2, colon at index 9 + context.options = new DecodeOptions(2, Delimiter.COMMA, true, PathExpansion.OFF, DecodeOptions.MAX_ALLOWED_DEPTH, DecodeOptions.DEFAULT_MAX_ARRAY_SIZE, DecodeOptions.DEFAULT_MAX_STRING_LENGTH); + context.delimiter = context.options.delimiter(); + String line = " 10,active:done"; + int lineDepth = 1; + int expectedRowDepth = 1; + + // When + boolean result = (boolean) invokePrivateStatic("shouldTerminateTabularArray", + new Class[]{String.class, int.class, int.class, DecodeContext.class}, + line, lineDepth, expectedRowDepth, context); + + // Then — delimiter comes before colon, so this is a tabular row + assertFalse(result, "Should continue tabular array when delimiter found before colon (§9.3)"); + } + + @Test + @DisplayName("should terminate when colon found before delimiter (§9.3)") + void testDisambiguation_ColonBeforeDelimiter_terminates() throws Exception { + // Given — "time: 10,active" has colon at index 4, comma nowhere relevant + context.options = new DecodeOptions(2, Delimiter.COMMA, true, PathExpansion.OFF, DecodeOptions.MAX_ALLOWED_DEPTH, DecodeOptions.DEFAULT_MAX_ARRAY_SIZE, DecodeOptions.DEFAULT_MAX_STRING_LENGTH); + context.delimiter = context.options.delimiter(); + String line = " time: 10,active"; + int lineDepth = 1; + int expectedRowDepth = 1; + + // When + boolean result = (boolean) invokePrivateStatic("shouldTerminateTabularArray", + new Class[]{String.class, int.class, int.class, DecodeContext.class}, + line, lineDepth, expectedRowDepth, context); + + // Then — colon comes before any unquoted delimiter, so this is a key-value pair + assertTrue(result, "Should terminate tabular array when colon found before delimiter (§9.3)"); + } + + @Test + @DisplayName("should terminate when line has colon but no delimiter (§9.3)") + void testDisambiguation_ColonOnly_terminates() throws Exception { + // Given — "done: true" has colon but no comma delimiter + context.options = new DecodeOptions(2, Delimiter.COMMA, true, PathExpansion.OFF, DecodeOptions.MAX_ALLOWED_DEPTH, DecodeOptions.DEFAULT_MAX_ARRAY_SIZE, DecodeOptions.DEFAULT_MAX_STRING_LENGTH); + context.delimiter = context.options.delimiter(); + String line = " done: true"; + int lineDepth = 1; + int expectedRowDepth = 1; + + // When + boolean result = (boolean) invokePrivateStatic("shouldTerminateTabularArray", + new Class[]{String.class, int.class, int.class, DecodeContext.class}, + line, lineDepth, expectedRowDepth, context); + + // Then — colon present, no delimiter → key-value line + assertTrue(result, "Should terminate tabular array when colon present without delimiter (§9.3)"); + } + + @Test + @DisplayName("should NOT terminate when line has delimiter but no colon (§9.3)") + void testDisambiguation_DelimiterOnly_continuesRow() throws Exception { + // Given — "10,active" has comma but no colon → tabular row + context.options = new DecodeOptions(2, Delimiter.COMMA, true, PathExpansion.OFF, DecodeOptions.MAX_ALLOWED_DEPTH, DecodeOptions.DEFAULT_MAX_ARRAY_SIZE, DecodeOptions.DEFAULT_MAX_STRING_LENGTH); + context.delimiter = context.options.delimiter(); + String line = " 10,active"; + int lineDepth = 1; + int expectedRowDepth = 1; + + // When + boolean result = (boolean) invokePrivateStatic("shouldTerminateTabularArray", + new Class[]{String.class, int.class, int.class, DecodeContext.class}, + line, lineDepth, expectedRowDepth, context); + + // Then — no colon → this is a tabular row + assertFalse(result, "Should continue tabular array when no colon present (§9.3)"); + } + + @Test + @DisplayName("should handle tab pipe delimiter in disambiguation (§9.3)") + void testDisambiguation_PipeDelimiter_continuesRow() throws Exception { + // Given — pipe-delimited row, pipe before colon + context.options = new DecodeOptions(2, Delimiter.PIPE, true, PathExpansion.OFF, DecodeOptions.MAX_ALLOWED_DEPTH, DecodeOptions.DEFAULT_MAX_ARRAY_SIZE, DecodeOptions.DEFAULT_MAX_STRING_LENGTH); + context.delimiter = context.options.delimiter(); + String line = " 10|active:done"; + int lineDepth = 1; + int expectedRowDepth = 1; + + // When + boolean result = (boolean) invokePrivateStatic("shouldTerminateTabularArray", + new Class[]{String.class, int.class, int.class, DecodeContext.class}, + line, lineDepth, expectedRowDepth, context); + + // Then — pipe (delimiter) before colon → tabular row + assertFalse(result, "Should continue tabular array with pipe delimiter when delim found before colon (§9.3)"); + } + @Test void testParseTabularArray_ReturnsEmptyList_WhenHeaderDoesNotMatchPattern() { // Given diff --git a/src/test/java/dev/toonformat/jtoon/decoder/ValueDecoderTest.java b/src/test/java/dev/toonformat/jtoon/decoder/ValueDecoderTest.java index d0b9abb..930fed5 100644 --- a/src/test/java/dev/toonformat/jtoon/decoder/ValueDecoderTest.java +++ b/src/test/java/dev/toonformat/jtoon/decoder/ValueDecoderTest.java @@ -204,4 +204,57 @@ void decodeToJson_throwsWrappedException_whenDecodeFails() { assertTrue(ex.getCause().getMessage().contains("Unexpected indentation")); } + @Test + void givenInvalidInputAndStrictFalse_whenDecode_thenReturnsNull() { + // Given — malformed quoted string causes StringEscaper to throw + DecodeOptions options = new DecodeOptions(2, Delimiter.COMMA, false, PathExpansion.OFF, DecodeOptions.MAX_ALLOWED_DEPTH, DecodeOptions.DEFAULT_MAX_ARRAY_SIZE, DecodeOptions.DEFAULT_MAX_STRING_LENGTH); + String invalidInput = "value: \"unclosed"; + + // When + Object result = ValueDecoder.decode(invalidInput, options); + + // Then + assertNull(result); + } + + @Test + void givenDecodeReturnsNull_whenDecodeToJson_thenReturnsNullLiteral() { + // Given — malformed quoted string causes StringEscaper to throw + DecodeOptions options = new DecodeOptions(2, Delimiter.COMMA, false, PathExpansion.OFF, DecodeOptions.MAX_ALLOWED_DEPTH, DecodeOptions.DEFAULT_MAX_ARRAY_SIZE, DecodeOptions.DEFAULT_MAX_STRING_LENGTH); + String invalidInput = "value: \"unclosed"; + + // When + String result = ValueDecoder.decodeToJson(invalidInput, options); + + // Then + assertEquals("null", result); + } + + @Test + void givenNullLiteralInput_whenDecodeToJson_thenReturnsNullLiteral() { + // Given + String input = "null"; + + // When + String result = ValueDecoder.decodeToJson(input, DecodeOptions.DEFAULT); + + // Then + assertEquals("null", result); + } + + @Test + void givenValidInputAndStrictFalse_whenDecode_thenReturnsResult() { + // Given + DecodeOptions options = new DecodeOptions(2, Delimiter.COMMA, false, PathExpansion.OFF, DecodeOptions.MAX_ALLOWED_DEPTH, DecodeOptions.DEFAULT_MAX_ARRAY_SIZE, DecodeOptions.DEFAULT_MAX_STRING_LENGTH); + String validInput = "name: Ada"; + + // When + Object result = ValueDecoder.decode(validInput, options); + + // Then + assertNotNull(result); + assertInstanceOf(Map.class, result); + assertEquals("Ada", ((Map) result).get("name")); + } + } diff --git a/src/test/java/dev/toonformat/jtoon/encoder/ArrayEncoderTest.java b/src/test/java/dev/toonformat/jtoon/encoder/ArrayEncoderTest.java index ac3d84c..4ebe01b 100644 --- a/src/test/java/dev/toonformat/jtoon/encoder/ArrayEncoderTest.java +++ b/src/test/java/dev/toonformat/jtoon/encoder/ArrayEncoderTest.java @@ -1,5 +1,6 @@ package dev.toonformat.jtoon.encoder; +import dev.toonformat.jtoon.Delimiter; import dev.toonformat.jtoon.EncodeOptions; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; @@ -133,6 +134,81 @@ void encodeArrayWithAllPrimitivesArrayOfArrays() { " - [3]: 4,5,6", lineWriter.toString()); } + @Test + @DisplayName("should encode empty keyed array as key: [] without lengthMarker") + void encodeEmptyArrayAsKeyValue() { + // Given + ArrayNode emptyArray = jsonNodeFactory.arrayNode(); + EncodeOptions options = EncodeOptions.DEFAULT; + LineWriter writer = new LineWriter(2); + + // When + ArrayEncoder.encodeArray("tags", emptyArray, writer, 0, options); + + // Then + assertEquals("tags: []", writer.toString()); + } + + @Test + @DisplayName("should encode empty keyed array with lengthMarker as header form") + void encodeEmptyArrayWithLengthMarker() { + // Given + ArrayNode emptyArray = jsonNodeFactory.arrayNode(); + EncodeOptions options = EncodeOptions.withLengthMarker(true); + LineWriter writer = new LineWriter(2); + + // When + ArrayEncoder.encodeArray("tags", emptyArray, writer, 0, options); + + // Then + assertEquals("tags[#0]:", writer.toString()); + } + + @Test + @DisplayName("should encode top-level empty array as [] without lengthMarker") + void encodeRootEmptyArray() { + // Given + ArrayNode emptyArray = jsonNodeFactory.arrayNode(); + EncodeOptions options = EncodeOptions.DEFAULT; + LineWriter writer = new LineWriter(2); + + // When + ArrayEncoder.encodeArray(null, emptyArray, writer, 0, options); + + // Then + assertEquals("[]", writer.toString()); + } + + @Test + @DisplayName("should encode top-level empty array with lengthMarker as [0]:") + void encodeRootEmptyArrayWithLengthMarker() { + // Given + ArrayNode emptyArray = jsonNodeFactory.arrayNode(); + EncodeOptions options = EncodeOptions.withLengthMarker(true); + LineWriter writer = new LineWriter(2); + + // When + ArrayEncoder.encodeArray(null, emptyArray, writer, 0, options); + + // Then + assertEquals("[0]:", writer.toString()); + } + + @Test + @DisplayName("should encode empty nested array as key: []") + void encodeEmptyNestedArray() { + // Given + ArrayNode emptyArray = jsonNodeFactory.arrayNode(); + EncodeOptions options = EncodeOptions.DEFAULT; + LineWriter writer = new LineWriter(2); + + // When + ArrayEncoder.encodeArray("data", emptyArray, writer, 1, options); + + // Then + assertEquals(" data: []", writer.toString()); + } + @Test @DisplayName("throws unsupported Operation Exception for calling the constructor") void throwsOnConstructor() throws NoSuchMethodException { diff --git a/src/test/java/dev/toonformat/jtoon/encoder/LineWriterTest.java b/src/test/java/dev/toonformat/jtoon/encoder/LineWriterTest.java index 45cd8c7..535157b 100644 --- a/src/test/java/dev/toonformat/jtoon/encoder/LineWriterTest.java +++ b/src/test/java/dev/toonformat/jtoon/encoder/LineWriterTest.java @@ -299,9 +299,61 @@ void testComplexMixedStructure() { @DisplayName("Edge Cases") class EdgeCases { - @Test - @DisplayName("should handle depth 0 correctly") - void testDepthZero() { + @Test + @DisplayName("should strip trailing spaces from content (§12)") + void testTrailingSpacesAreStripped() { + // Given + LineWriter writer = new LineWriter(2); + + // When + writer.push(0, "content "); + + // Then + assertEquals("content", writer.toString()); + } + + @Test + @DisplayName("should strip trailing spaces from indented content (§12)") + void testTrailingSpacesIndented() { + // Given — content " value " has leading spaces (indent) and trailing spaces + LineWriter writer = new LineWriter(2); + + // When — trailing spaces stripped first → " value", then depth=1 adds indent + writer.push(1, " value "); + + // Then — indent (2 spaces) + " " + "value" = " value" + assertEquals(" value", writer.toString()); + } + + @Test + @DisplayName("should handle content that is entirely spaces (§12)") + void testAllSpacesContent() { + // Given + LineWriter writer = new LineWriter(2); + + // When + writer.push(0, " "); + + // Then + assertEquals("", writer.toString()); + } + + @Test + @DisplayName("should handle content with no trailing spaces (§12)") + void testNoTrailingSpaces() { + // Given + LineWriter writer = new LineWriter(2); + + // When + writer.push(0, "content"); + + // Then + assertEquals("content", writer.toString()); + } + + @Test + @DisplayName("should handle depth 0 correctly") + void testDepthZero() { // Given LineWriter writer = new LineWriter(2); diff --git a/src/test/java/dev/toonformat/jtoon/encoder/PrimitiveEncoderTest.java b/src/test/java/dev/toonformat/jtoon/encoder/PrimitiveEncoderTest.java index 0656d81..80aee8b 100644 --- a/src/test/java/dev/toonformat/jtoon/encoder/PrimitiveEncoderTest.java +++ b/src/test/java/dev/toonformat/jtoon/encoder/PrimitiveEncoderTest.java @@ -124,6 +124,33 @@ void testDecimal() { // Then assertEquals("123.456", result); } + + @Test + @DisplayName("should preserve high-precision BigDecimal exactly") + void testHighPrecisionDecimal() { + // Given — a 40-digit number that would lose precision through Double + java.math.BigDecimal precise = new java.math.BigDecimal("1234567890123456789012345678901234567890.12345678901234567890"); + + // When + String result = PrimitiveEncoder.encodePrimitive(DecimalNode.valueOf(precise), Delimiter.COMMA.toString()); + + // Then — exact value preserved, not truncated via double precision + // trailing zero stripped by stripTrailingZeros + assertEquals("1234567890123456789012345678901234567890.1234567890123456789", result); + } + + @Test + @DisplayName("should preserve high-precision small decimal") + void testHighPrecisionSmallDecimal() { + // Given — a tiny fractional number that loses precision via Double + java.math.BigDecimal tiny = new java.math.BigDecimal("0.00000000000012345678901234567890"); + + // When + String result = PrimitiveEncoder.encodePrimitive(DecimalNode.valueOf(tiny), Delimiter.COMMA.toString()); + + // Then — trailing zero stripped by stripTrailingZeros + assertEquals("0.0000000000001234567890123456789", result); + } } @Nested diff --git a/src/test/java/dev/toonformat/jtoon/normalizer/JsonNormalizerTest.java b/src/test/java/dev/toonformat/jtoon/normalizer/JsonNormalizerTest.java index 449f826..976007a 100644 --- a/src/test/java/dev/toonformat/jtoon/normalizer/JsonNormalizerTest.java +++ b/src/test/java/dev/toonformat/jtoon/normalizer/JsonNormalizerTest.java @@ -548,7 +548,7 @@ void testZonedDateTime() { // Then assertTrue(result.isString()); - assertTrue(result.asString().startsWith("2023-10-15T14:30:45")); + assertEquals("2023-10-15T14:30:45Z", result.asString()); } @Test @@ -1517,7 +1517,7 @@ void givenZoneDateTime_whenTryNormalizeTemporal_thenIsoStringNode() throws Excep // Then assertInstanceOf(StringNode.class, result); - assertEquals("2025-11-26T15:45:00+01:00[Europe/Berlin]", ((JsonNode) result).asString()); + assertEquals("2025-11-26T15:45+01:00", ((JsonNode) result).asString()); } @Test diff --git a/src/test/java/dev/toonformat/jtoon/util/HeadersTest.java b/src/test/java/dev/toonformat/jtoon/util/HeadersTest.java index fa96040..7a3a589 100644 --- a/src/test/java/dev/toonformat/jtoon/util/HeadersTest.java +++ b/src/test/java/dev/toonformat/jtoon/util/HeadersTest.java @@ -45,4 +45,30 @@ void keyedArrayPatternMatches() { assertNotNull(Headers.KEYED_ARRAY_PATTERN.matcher("tags[3]:").matches()); assertNotNull(Headers.KEYED_ARRAY_PATTERN.matcher("data[4]{id}:").matches()); } + + @Test + @DisplayName("KEYED_ARRAY_PATTERN matches quoted keys with spaces") + void keyedArrayPatternQuotedKeyWithSpaces() { + assertNotNull(Headers.KEYED_ARRAY_PATTERN.matcher("\"my items\"[3]:").matches()); + assertNotNull(Headers.KEYED_ARRAY_PATTERN.matcher("\"user name\"[2]{id,name}:").matches()); + } + + @Test + @DisplayName("KEYED_ARRAY_PATTERN matches quoted keys with escaped quotes") + void keyedArrayPatternEscapedQuotes() { + // Key containing escaped quotes: "name\"with\"quotes" + assertTrue(Headers.KEYED_ARRAY_PATTERN.matcher("\"name\\\"with\\\"quotes\"[3]:").matches()); + assertTrue(Headers.KEYED_ARRAY_PATTERN.matcher("\"key\\\"word\"[2]{a,b}:").matches()); + } + + @Test + @DisplayName("KEYED_ARRAY_PATTERN does not match malformed patterns") + void keyedArrayPatternNoMatch() { + // Missing colon + assertFalse(Headers.KEYED_ARRAY_PATTERN.matcher("items[3]").matches()); + // Missing brackets + assertFalse(Headers.KEYED_ARRAY_PATTERN.matcher("items:").matches()); + // Negative length + assertFalse(Headers.KEYED_ARRAY_PATTERN.matcher("items[-1]:").matches()); + } } diff --git a/src/test/java/dev/toonformat/jtoon/util/StringEscaperTest.java b/src/test/java/dev/toonformat/jtoon/util/StringEscaperTest.java index 3582954..6073bfa 100644 --- a/src/test/java/dev/toonformat/jtoon/util/StringEscaperTest.java +++ b/src/test/java/dev/toonformat/jtoon/util/StringEscaperTest.java @@ -76,6 +76,12 @@ void testEmptyString() { assertEquals("", StringEscaper.escape("")); } + @Test + @DisplayName("should return null for null input") + void testNullInput() { + assertNull(StringEscaper.escape(null)); + } + @ParameterizedTest @DisplayName("should not modify strings without special characters") @ValueSource(strings = { @@ -177,6 +183,12 @@ void testEmptyQuotedString() { // Then assertEquals("", StringEscaper.unescape("\"\"")); } + + @Test + @DisplayName("should not unquote when string starts with but does not end with quote") + void testUnmatchedOpeningQuote() { + assertEquals("\"unclosed", StringEscaper.unescape("\"unclosed")); + } } @Nested @@ -278,6 +290,235 @@ void handlesDoubleBackslashCorrectly() { } } + @Nested + @DisplayName("Control Character Escaping") + class ControlCharacterEscaping { + + static Stream controlCharCases() { + return Stream.of( + Arguments.of("U+0000 null", "\u0000", "\\u0000"), + Arguments.of("U+0004 EOT", "\u0004", "\\u0004"), + Arguments.of("U+000F shift-in", "\u000F", "\\u000f"), + Arguments.of("U+001B escape", "\u001B", "\\u001b"), + Arguments.of("U+001F unit separator", "\u001F", "\\u001f"), + Arguments.of("U+0001 in middle", "a\u0001b", "a\\u0001b")); + } + + @ParameterizedTest(name = "should escape {0}") + @MethodSource("controlCharCases") + @DisplayName("should escape control characters via \\uXXXX") + void testControlChars(String description, String input, String expected) { + assertEquals(expected, StringEscaper.escape(input)); + } + + @Test + @DisplayName("should NOT escape space (U+0020)") + void testSpaceNotEscaped() { + assertEquals("a b", StringEscaper.escape("a b")); + } + } + + @Nested + @DisplayName("validateString - Surrogate Pairs") + class ValidateStringSurrogates { + + @Test + @DisplayName("should accept valid surrogate pair") + void validSurrogatePair() { + String input = "\"a\\uD800\\uDC00b\""; + assertDoesNotThrow(() -> StringEscaper.validateString(input)); + } + + @Test + @DisplayName("should reject lone low surrogate") + void loneLowSurrogate() { + String input = "\"a\\uDC00b\""; + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> StringEscaper.validateString(input)); + assertTrue(ex.getMessage().contains("lone low surrogate")); + } + + @Test + @DisplayName("should reject lone high surrogate") + void loneHighSurrogate() { + String input = "\"a\\uD800b\""; + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> StringEscaper.validateString(input)); + assertTrue(ex.getMessage().contains("lone high surrogate")); + } + + @Test + @DisplayName("should reject high surrogate followed by non-\\u") + void highSurrogateWithoutBackslash() { + String input = "\"a\\uD800X\""; + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> StringEscaper.validateString(input)); + assertTrue(ex.getMessage().contains("lone high surrogate")); + } + + @Test + @DisplayName("should reject invalid hex in \\u escape") + void invalidUnicodeHex() { + String input = "\"a\\u00XXb\""; + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> StringEscaper.validateString(input)); + assertTrue(ex.getMessage().contains("Invalid escape sequence: \\u")); + } + + @Test + @DisplayName("should reject truncated \\u escape (fewer than 4 hex chars)") + void truncatedUnicodeEscape() { + // \\u00b has only 3 hex chars + String input = "\"\\u00b\""; + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> StringEscaper.validateString(input)); + assertEquals("Invalid escape sequence: \\u", ex.getMessage()); + } + + @Test + @DisplayName("should reject high surrogate followed by non-backslash char") + void highSurrogateFollowedByNonBackslash() { + // \\uD800! — '!' is not '\\', with enough trailing chars to pass length check + String input = "\"a\\uD800!bcdefg\""; + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> StringEscaper.validateString(input)); + assertEquals("Invalid unicode escape: lone high surrogate", ex.getMessage()); + } + + @Test + @DisplayName("should reject high surrogate followed by backslash + non-u char") + void highSurrogateFollowedByNonU() { + // \\uD800\\t — '\\' then 't' != 'u', enough trailing chars + String input = "\"a\\uD800\\tbcdef\""; + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> StringEscaper.validateString(input)); + assertEquals("Invalid unicode escape: lone high surrogate", ex.getMessage()); + } + + @Test + @DisplayName("should reject high surrogate with invalid hex in next \\u") + void highSurrogateFollowedByInvalidHex() { + // \\uD800\\u00XX — "00XX" is not valid hex + String input = "\"a\\uD800\\u00XXbcdefg\""; + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> StringEscaper.validateString(input)); + assertEquals("Invalid unicode escape: lone high surrogate", ex.getMessage()); + } + + @Test + @DisplayName("should reject high surrogate where next \\u hex is not low surrogate") + void highSurrogateFollowedByNonLowSurrogate() { + // \\uD800\\u0041 — 0x0041 is 'A', not a low surrogate + String input = "\"a\\uD800\\u0041bcdefg\""; + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> StringEscaper.validateString(input)); + assertEquals("Invalid unicode escape: lone high surrogate", ex.getMessage()); + } + + @Test + @DisplayName("should accept valid standard escapes") + void validStandardEscapes() { + assertDoesNotThrow(() -> StringEscaper.validateString("\"\\n\"")); + assertDoesNotThrow(() -> StringEscaper.validateString("\"\\r\"")); + assertDoesNotThrow(() -> StringEscaper.validateString("\"\\t\"")); + assertDoesNotThrow(() -> StringEscaper.validateString("\"\\\\\"")); + assertDoesNotThrow(() -> StringEscaper.validateString("\"\\\"\"")); + } + } + + @Nested + @DisplayName("unescape - Unicode Sequences") + class UnescapeUnicode { + + @Test + @DisplayName("should unescape \\u0004 to control char") + void unescapeControlChar() { + assertEquals("a\u0004b", StringEscaper.unescape("a\\u0004b")); + } + + @Test + @DisplayName("should unescape \\u001F") + void unescapeUpperControlChar() { + assertEquals("\u001F", StringEscaper.unescape("\\u001f")); + } + + @Test + @DisplayName("should unescape valid surrogate pair") + void unescapeSurrogatePair() { + String input = "\\uD800\\uDC00"; + String result = StringEscaper.unescape(input); + assertEquals(2, result.length()); + assertTrue(Character.isHighSurrogate(result.charAt(0))); + assertTrue(Character.isLowSurrogate(result.charAt(1))); + } + + @Test + @DisplayName("should throw on truncated \\u escape") + void truncatedUnicodeEscape() { + assertThrows(IllegalArgumentException.class, + () -> StringEscaper.unescape("\\u00b")); + } + + @Test + @DisplayName("should throw on invalid hex in \\u escape") + void invalidUnicodeHex() { + assertThrows(IllegalArgumentException.class, + () -> StringEscaper.unescape("\\u00XX")); + } + + @Test + @DisplayName("should throw on lone low surrogate in \\u escape") + void loneLowSurrogate() { + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> StringEscaper.unescape("\\uDC00")); + assertTrue(ex.getMessage().contains("lone low surrogate")); + } + + @Test + @DisplayName("should throw on lone high surrogate in \\u escape") + void loneHighSurrogate() { + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> StringEscaper.unescape("\\uD800")); + assertTrue(ex.getMessage().contains("lone high surrogate")); + } + + @Test + @DisplayName("should throw on high surrogate followed by non-backslash") + void highSurrogateFollowedByNonBackslash() { + // \\uD800 followed by '!' — not '\\' + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> StringEscaper.unescape("\\uD800!!!!!!")); + assertTrue(ex.getMessage().contains("lone high surrogate")); + } + + @Test + @DisplayName("should throw on high surrogate followed by backslash + non-u") + void highSurrogateFollowedByNonU() { + // \\uD800 followed by \\n — '\\' then 'n' != 'u' + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> StringEscaper.unescape("\\uD800\\n!!!!")); + assertTrue(ex.getMessage().contains("lone high surrogate")); + } + + @Test + @DisplayName("should throw on high surrogate with invalid low hex") + void highSurrogateWithInvalidLowHex() { + // \\uD800\\u00XX — low hex "00XX" is not valid hex + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> StringEscaper.unescape("\\uD800\\u00XX")); + assertEquals("Invalid escape sequence: \\u00XX", ex.getMessage()); + } + + @Test + @DisplayName("should throw on high surrogate where low hex is not low surrogate") + void highSurrogateWithNonLowSurrogate() { + // \\uD800\\u0041 — 0x0041 is 'A', not a low surrogate + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> StringEscaper.unescape("\\uD800\\u0041")); + assertTrue(ex.getMessage().contains("lone high surrogate")); + } + } + @Test @DisplayName("throws unsupported Operation Exception for calling the constructor") void throwsOnConstructor() throws NoSuchMethodException { @@ -295,6 +536,12 @@ void throwsOnConstructor() throws NoSuchMethodException { assertEquals("Utility class cannot be instantiated", cause.getMessage()); } + @Test + void testingValidateString_WithNotQuotedString() { + // covers startsWith(\") = false branch on lines 68 and 73 + StringEscaper.validateString("plain text without quotes"); + } + @Test void testingValidateString_WithNull() { // Given diff --git a/src/test/java/dev/toonformat/jtoon/util/StringValidatorTest.java b/src/test/java/dev/toonformat/jtoon/util/StringValidatorTest.java index dee43f7..4b5ea37 100644 --- a/src/test/java/dev/toonformat/jtoon/util/StringValidatorTest.java +++ b/src/test/java/dev/toonformat/jtoon/util/StringValidatorTest.java @@ -456,6 +456,20 @@ void testEmptyKey() { // Then assertFalse(StringValidator.isValidUnquotedKey("")); } + + @Test + @DisplayName("should return false for key with control characters") + void testKeyWithControlChars() { + assertFalse(StringValidator.isValidUnquotedKey("a\u0004b")); + assertFalse(StringValidator.isValidUnquotedKey("a\u0000b")); + assertFalse(StringValidator.isValidUnquotedKey("a\u001Fb")); + } + + @Test + @DisplayName("should return false for null key") + void testNullKey() { + assertFalse(StringValidator.isValidUnquotedKey(null)); + } } @Test diff --git a/src/test/java/dev/toonformat/jtoon/validator/ToonValidatorTest.java b/src/test/java/dev/toonformat/jtoon/validator/ToonValidatorTest.java new file mode 100644 index 0000000..9c97331 --- /dev/null +++ b/src/test/java/dev/toonformat/jtoon/validator/ToonValidatorTest.java @@ -0,0 +1,150 @@ +package dev.toonformat.jtoon.validator; + +import dev.toonformat.jtoon.DecodeOptions; +import dev.toonformat.jtoon.Delimiter; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests for {@link ToonValidator} — structural and conformance validation. + */ +@Tag("unit") +class ToonValidatorTest { + + @Test + void validToon_passesValidation() { + // Given + String toon = "id: 123\nname: Ada\nactive: true"; + + // When + ToonValidator.ValidationResult result = ToonValidator.validate(toon); + + // Then + assertTrue(result.valid()); + assertTrue(result.issues().isEmpty()); + } + + @Test + void validToon_withTabularArray_passes() { + // Given + String toon = "items[2]{id,name}:\n 1,Alice\n 2,Bob"; + + // When + ToonValidator.ValidationResult result = ToonValidator.validate(toon); + + // Then + assertTrue(result.valid()); + assertTrue(result.issues().isEmpty()); + } + + @Test + void nullInput_passesValidation() { + // Given + String toon = null; + + // When + ToonValidator.ValidationResult result = ToonValidator.validate(toon); + + // Then + assertTrue(result.valid()); + } + + @Test + void blankInput_passesValidation() { + // Given + String toon = " "; + + // When + ToonValidator.ValidationResult result = ToonValidator.validate(toon); + + // Then + assertTrue(result.valid()); + } + + @Test + void invalidStructure_failsValidation() { + // Given — bad indentation + String toon = " badIndent"; + + // When + ToonValidator.ValidationResult result = ToonValidator.validate(toon); + + // Then + assertFalse(result.valid()); + assertFalse(result.issues().isEmpty()); + assertTrue(result.issues().get(0).contains("Structural error")); + } + + @Test + void trailingSpaces_detected() { + // Given + String toon = "id: 123 \nname: Ada"; + + // When + ToonValidator.ValidationResult result = ToonValidator.validate(toon); + + // Then + assertFalse(result.valid()); + assertTrue(result.issues().stream().anyMatch(i -> i.contains("Trailing space"))); + } + + @Test + void trailingNewline_detected() { + // Given + String toon = "id: 123\n"; + + // When + ToonValidator.ValidationResult result = ToonValidator.validate(toon); + + // Then + assertFalse(result.valid()); + assertTrue(result.issues().stream().anyMatch(i -> i.contains("Trailing newline"))); + } + + @Test + void multipleTrailingSpaces_allDetected() { + // Given — trailing spaces on multiple lines + String toon = "id: 123 \nname: Ada \nactive: true"; + + // When + ToonValidator.ValidationResult result = ToonValidator.validate(toon); + + // Then + assertFalse(result.valid()); + assertEquals(2, result.issues().stream().filter(i -> i.contains("Trailing space")).count()); + } + + @Test + void isValid_returnsTrueForValidToon() { + // Given + String toon = "key: value"; + + // Then + assertTrue(ToonValidator.isValid(toon)); + } + + @Test + void isValid_returnsFalseForInvalidToon() { + // Given + String toon = " badIndent"; + + // Then + assertFalse(ToonValidator.isValid(toon)); + } + + @Test + void validate_withCustomOptions() { + // Given — pipe-delimited valid TOON + String toon = "items[2|]{a|b}:\n 1|x\n 2|y"; + DecodeOptions options = DecodeOptions.withDelimiter(Delimiter.PIPE); + + // When + ToonValidator.ValidationResult result = ToonValidator.validate(toon, options); + + // Then + assertTrue(result.valid()); + } +} diff --git a/src/test/resources/conformance/decode/arrays-nested.json b/src/test/resources/conformance/decode/arrays-nested.json index 927cfb1..fad16f6 100644 --- a/src/test/resources/conformance/decode/arrays-nested.json +++ b/src/test/resources/conformance/decode/arrays-nested.json @@ -172,12 +172,6 @@ "expected": [], "specSection": "9.1" }, - { - "name": "decodes canonical empty root array", - "input": "[]", - "expected": [], - "specSection": "9.1" - }, { "name": "parses complex mixed object with arrays and nested objects", "input": "user:\n id: 123\n name: Ada\n tags[2]: reading,gaming\n active: true\n prefs[0]:", diff --git a/src/test/resources/conformance/decode/blank-lines.json b/src/test/resources/conformance/decode/blank-lines.json index dd217a3..a4dba63 100644 --- a/src/test/resources/conformance/decode/blank-lines.json +++ b/src/test/resources/conformance/decode/blank-lines.json @@ -11,7 +11,7 @@ "options": { "strict": true }, - "specSection": "14.4" + "specSection": "14.2" }, { "name": "throws on blank line inside tabular array", @@ -21,7 +21,7 @@ "options": { "strict": true }, - "specSection": "14.4" + "specSection": "14.2" }, { "name": "throws on multiple blank lines inside array", @@ -31,7 +31,7 @@ "options": { "strict": true }, - "specSection": "14.4" + "specSection": "14.2" }, { "name": "throws on blank line with spaces inside array", @@ -41,7 +41,7 @@ "options": { "strict": true }, - "specSection": "14.4" + "specSection": "14.2" }, { "name": "throws on blank line in nested list array", @@ -51,7 +51,7 @@ "options": { "strict": true }, - "specSection": "14.4" + "specSection": "14.2" }, { "name": "accepts blank line between root-level fields", @@ -65,6 +65,19 @@ }, "specSection": "12" }, + { + "name": "accepts whitespace-only line at non-multiple indent as blank in strict mode", + "input": "a: 1\n \nb: 2", + "expected": { + "a": 1, + "b": 2 + }, + "options": { + "strict": true + }, + "specSection": "12", + "minSpecVersion": "3.2" + }, { "name": "accepts trailing newline at end of file", "input": "a: 1\n", diff --git a/src/test/resources/conformance/decode/delimiters.json b/src/test/resources/conformance/decode/delimiters.json index 7fafd50..71191fd 100644 --- a/src/test/resources/conformance/decode/delimiters.json +++ b/src/test/resources/conformance/decode/delimiters.json @@ -19,14 +19,6 @@ }, "specSection": "11" }, - { - "name": "parses primitive arrays with comma delimiter", - "input": "tags[3]: reading,gaming,coding", - "expected": { - "tags": ["reading", "gaming", "coding"] - }, - "specSection": "11" - }, { "name": "parses tabular arrays with tab delimiter", "input": "items[2\t]{sku\tqty\tprice}:\n A1\t2\t9.99\n B2\t1\t14.5", @@ -179,7 +171,7 @@ "note": "Active delimiter is tab, but object values use document delimiter for quoting" }, { - "name": "object values with comma must be quoted when document delimiter is comma", + "name": "parses quoted comma in object values", "input": "items[2]:\n - status: \"a,b\"\n - status: \"c,d\"", "expected": { "items": [{ "status": "a,b" }, { "status": "c,d" }] diff --git a/src/test/resources/conformance/decode/indentation-errors.json b/src/test/resources/conformance/decode/indentation-errors.json index d94ded3..728650e 100644 --- a/src/test/resources/conformance/decode/indentation-errors.json +++ b/src/test/resources/conformance/decode/indentation-errors.json @@ -12,7 +12,7 @@ "indent": 2, "strict": true }, - "specSection": "14.3" + "specSection": "14.2" }, { "name": "throws on list item with non-multiple indentation (3 spaces with indent=2)", @@ -23,7 +23,7 @@ "indent": 2, "strict": true }, - "specSection": "14.3" + "specSection": "14.2" }, { "name": "throws on non-multiple indentation with custom indent=4 (3 spaces)", @@ -34,7 +34,7 @@ "indent": 4, "strict": true }, - "specSection": "14.3" + "specSection": "14.2" }, { "name": "accepts correct indentation with custom indent size (4 spaces with indent=4)", @@ -58,7 +58,7 @@ "options": { "strict": true }, - "specSection": "14.3" + "specSection": "14.2" }, { "name": "throws on mixed tabs and spaces in indentation", @@ -68,7 +68,7 @@ "options": { "strict": true }, - "specSection": "14.3" + "specSection": "14.2" }, { "name": "throws on tab at start of line", @@ -78,7 +78,7 @@ "options": { "strict": true }, - "specSection": "14.3" + "specSection": "14.2" }, { "name": "accepts tabs in quoted string values", @@ -142,43 +142,6 @@ "strict": false }, "specSection": "12" - }, - { - "name": "parses empty lines without validation errors", - "input": "a: 1\n\nb: 2", - "expected": { - "a": 1, - "b": 2 - }, - "options": { - "strict": true - }, - "specSection": "12" - }, - { - "name": "parses root-level content (0 indentation) as always valid", - "input": "a: 1\nb: 2\nc: 3", - "expected": { - "a": 1, - "b": 2, - "c": 3 - }, - "options": { - "strict": true - }, - "specSection": "12" - }, - { - "name": "parses lines with only spaces without validation if empty", - "input": "a: 1\n \nb: 2", - "expected": { - "a": 1, - "b": 2 - }, - "options": { - "strict": true - }, - "specSection": "12" } ] } diff --git a/src/test/resources/conformance/decode/objects.json b/src/test/resources/conformance/decode/objects.json index 74c191d..47a1b75 100644 --- a/src/test/resources/conformance/decode/objects.json +++ b/src/test/resources/conformance/decode/objects.json @@ -1,7 +1,7 @@ { "version": "3.1", "category": "decode", - "description": "Object decoding - simple objects, nested objects, key parsing, quoted values", + "description": "Object decoding - fields, nested objects, key parsing, §6 fall-through (non-strict), and §14.4 duplicate-key LWW", "tests": [ { "name": "parses objects with primitive values", @@ -38,6 +38,18 @@ }, "specSection": "8" }, + { + "name": "applies last-write-wins for duplicate sibling keys in non-strict mode", + "input": "name: Ada\nname: Bob", + "expected": { + "name": "Bob" + }, + "options": { + "strict": false + }, + "specSection": "14.4", + "minSpecVersion": "3.2" + }, { "name": "parses quoted object value with colon", "input": "note: \"a:b\"", @@ -118,6 +130,22 @@ }, "specSection": "8" }, + { + "name": "decodes \\uXXXX in quoted key (U+0004 control character)", + "input": "\"a\\u0004b\": 1", + "expected": { + "a\u0004b": 1 + }, + "specSection": "7.1" + }, + { + "name": "decodes \\uXXXX in quoted key (case-insensitive hex)", + "input": "\"x\\u00E9y\": 2", + "expected": { + "xéy": 2 + }, + "specSection": "7.1" + }, { "name": "parses quoted key with brackets", "input": "\"[index]\": 5", @@ -127,31 +155,40 @@ "specSection": "8" }, { - "name": "treats extra brackets after valid array segment as literal key", + "name": "treats extra brackets after valid array segment as literal key (non-strict)", "input": "foo[1][bar]: 10", + "options": { + "strict": false + }, "expected": { "foo[1][bar]": 10 }, "specSection": "6", - "note": "Non-whitespace [bar] between ] and : prevents array header interpretation" + "note": "Non-whitespace [bar] between ] and : prevents array header interpretation; non-strict fall-through produces a literal key not constrained by §7.3" }, { - "name": "treats non-integer bracket content as literal key", + "name": "treats non-integer bracket content as literal key (non-strict)", "input": "foo[bar][1]: 20", + "options": { + "strict": false + }, "expected": { "foo[bar][1]": 20 }, "specSection": "6", - "note": "[bar] fails integer parsing; line is not an array header" + "note": "[bar] fails integer parsing; non-strict fall-through produces a literal key not constrained by §7.3" }, { - "name": "treats text between bracket segment and colon as literal key", + "name": "treats text between bracket segment and colon as literal key (non-strict)", "input": "foo[2]extra: a,b", + "options": { + "strict": false + }, "expected": { "foo[2]extra": "a,b" }, "specSection": "6", - "note": "Non-whitespace content between ] and : prevents array header interpretation" + "note": "Non-whitespace content between ] and : prevents array header interpretation; non-strict fall-through produces a literal key" }, { "name": "parses quoted key with braces", @@ -268,6 +305,36 @@ } }, "specSection": "8" + }, + { + "name": "applies LWW for nested duplicate sibling keys in non-strict mode", + "input": "outer:\n name: Ada\n name: Bob", + "expected": { + "outer": { + "name": "Bob" + } + }, + "options": { + "strict": false + }, + "specSection": "14.4", + "minSpecVersion": "3.2" + }, + { + "name": "applies LWW for duplicate keys within a list-item object in non-strict mode", + "input": "items[1]:\n - id: 1\n id: 2", + "expected": { + "items": [ + { + "id": 2 + } + ] + }, + "options": { + "strict": false + }, + "specSection": "14.4", + "minSpecVersion": "3.2" } ] } diff --git a/src/test/resources/conformance/decode/path-expansion.json b/src/test/resources/conformance/decode/path-expansion.json index 5eb9cb4..0b513c6 100644 --- a/src/test/resources/conformance/decode/path-expansion.json +++ b/src/test/resources/conformance/decode/path-expansion.json @@ -88,7 +88,7 @@ "expandPaths": "safe", "strict": true }, - "specSection": "14.5" + "specSection": "14.3" }, { "name": "throws on expansion conflict (object vs array) when strict=true", @@ -99,7 +99,7 @@ "expandPaths": "safe", "strict": true }, - "specSection": "14.5" + "specSection": "14.3" }, { "name": "applies LWW when strict=false (primitive overwrites expanded object)", @@ -144,15 +144,16 @@ "specSection": "13.4" }, { - "name": "preserves non-IdentifierSegment keys as literals", - "input": "full-name.x: 1", + "name": "preserves quoted non-IdentifierSegment keys as literals", + "input": "\"full-name.x\": 1", "expected": { "full-name.x": 1 }, "options": { "expandPaths": "safe" }, - "specSection": "13.4" + "specSection": "13.4", + "note": "Quoted keys remain literal after unescaping; safe-mode expansion does not split them. The key must be quoted because §7.3 forbids hyphens in unquoted keys." }, { "name": "expands keys creating empty nested objects", diff --git a/src/test/resources/conformance/decode/primitives.json b/src/test/resources/conformance/decode/primitives.json index 58d690f..4efb601 100644 --- a/src/test/resources/conformance/decode/primitives.json +++ b/src/test/resources/conformance/decode/primitives.json @@ -165,6 +165,12 @@ "input": "\"05\"", "expected": "05", "specSection": "7.4" + }, + { + "name": "decodes supplementary scalar (U+1F680) in quoted string as literal UTF-8", + "input": "\"🚀 launch\"", + "expected": "🚀 launch", + "specSection": "7.1" } ] } diff --git a/src/test/resources/conformance/decode/root-form.json b/src/test/resources/conformance/decode/root-form.json index 5f61148..1da36e5 100644 --- a/src/test/resources/conformance/decode/root-form.json +++ b/src/test/resources/conformance/decode/root-form.json @@ -1,7 +1,7 @@ { "version": "1.4", "category": "decode", - "description": "Root form detection - empty document, single primitive, multiple primitives", + "description": "Root form detection - empty document, single primitive, literal empty array", "tests": [ { "name": "parses empty document as empty object", @@ -12,6 +12,42 @@ }, "specSection": "5", "note": "Empty input (no non-empty lines) decodes to empty object" + }, + { + "name": "parses single primitive string at root as primitive", + "input": "hello", + "expected": "hello", + "options": { + "strict": true + }, + "specSection": "5" + }, + { + "name": "parses single primitive number at root as primitive", + "input": "42", + "expected": 42, + "options": { + "strict": true + }, + "specSection": "5" + }, + { + "name": "parses single primitive boolean at root as primitive", + "input": "true", + "expected": true, + "options": { + "strict": true + }, + "specSection": "5" + }, + { + "name": "parses literal [] at root as empty array", + "input": "[]", + "expected": [], + "options": { + "strict": true + }, + "specSection": "5" } ] } diff --git a/src/test/resources/conformance/decode/validation-errors.json b/src/test/resources/conformance/decode/validation-errors.json index dd9ee86..c525b46 100644 --- a/src/test/resources/conformance/decode/validation-errors.json +++ b/src/test/resources/conformance/decode/validation-errors.json @@ -77,11 +77,12 @@ "specSection": "5" }, { - "name": "throws on delimiter mismatch (header declares tab, row uses comma)", + "name": "throws on row width mismatch when rows use a different delimiter than the active delimiter", "input": "items[2\t]{a\tb}:\n 1,2\n 3,4", "expected": null, "shouldError": true, - "specSection": "14.2" + "specSection": "14.1", + "note": "Active delimiter is tab; rows using comma each parse as 1 value, failing the row width check" }, { "name": "throws on mismatched delimiter between bracket and brace fields", @@ -91,7 +92,144 @@ "options": { "strict": true }, + "specSection": "6", + "minSpecVersion": "3.2" + }, + { + "name": "throws on extra brackets between bracket segment and colon in strict mode", + "input": "foo[1][bar]: 10", + "expected": null, + "shouldError": true, + "options": { + "strict": true + }, + "specSection": "6", + "note": "Non-whitespace content between ] and : must error in strict mode (§6 fall-through is non-strict only)", + "minSpecVersion": "3.2" + }, + { + "name": "throws on text between bracket segment and colon in strict mode", + "input": "foo[2]extra: a,b", + "expected": null, + "shouldError": true, + "options": { + "strict": true + }, + "specSection": "6", + "minSpecVersion": "3.2" + }, + { + "name": "throws on non-integer bracket segment in strict mode", + "input": "foo[bar]: 10", + "expected": null, + "shouldError": true, + "options": { + "strict": true + }, + "specSection": "6", + "minSpecVersion": "3.2" + }, + { + "name": "throws on duplicate sibling keys in strict mode", + "input": "name: Ada\nname: Bob", + "expected": null, + "shouldError": true, + "options": { + "strict": true + }, + "specSection": "14.4", + "minSpecVersion": "3.2" + }, + { + "name": "throws on array header missing colon", + "input": "items[2]{id,name}\n 1,Ada\n 2,Bob", + "expected": null, + "shouldError": true, "specSection": "6" + }, + { + "name": "throws on inline primitive array length mismatch (too few)", + "input": "tags[3]: a,b", + "expected": null, + "shouldError": true, + "specSection": "14.1" + }, + { + "name": "throws on list items length mismatch (too few)", + "input": "items[2]:\n - a", + "expected": null, + "shouldError": true, + "specSection": "14.1" + }, + { + "name": "throws on bracket length with leading zeros in strict mode", + "input": "items[03]: a,b,c", + "expected": null, + "shouldError": true, + "options": { + "strict": true + }, + "specSection": "6", + "note": "[03] is not a canonical non-negative integer length; decoders MUST NOT interpret it as a bracket segment", + "minSpecVersion": "3.2" + }, + { + "name": "throws on negative bracket length in strict mode", + "input": "items[-1]: a,b,c", + "expected": null, + "shouldError": true, + "options": { + "strict": true + }, + "specSection": "6", + "note": "[-1] is not a non-negative integer length; decoders MUST NOT interpret it as a bracket segment", + "minSpecVersion": "3.2" + }, + { + "name": "throws on whitespace between bracket segment and colon in strict mode", + "input": "items[2] :\n 1,2", + "expected": null, + "shouldError": true, + "options": { + "strict": true + }, + "specSection": "6", + "note": "No whitespace is permitted between ] and the colon/fields segment; any content there prevents header interpretation", + "minSpecVersion": "3.2" + }, + { + "name": "throws on whitespace between bracket segment and fields segment in strict mode", + "input": "items[2] {a,b}:\n 1,2\n 3,4", + "expected": null, + "shouldError": true, + "options": { + "strict": true + }, + "specSection": "6", + "note": "No whitespace is permitted between ] and the fields segment; mirrors the ]-to-colon rule", + "minSpecVersion": "3.2" + }, + { + "name": "throws on nested duplicate sibling keys in strict mode", + "input": "outer:\n name: Ada\n name: Bob", + "expected": null, + "shouldError": true, + "options": { + "strict": true + }, + "specSection": "14.4", + "minSpecVersion": "3.2" + }, + { + "name": "throws on duplicate keys within a list-item object in strict mode", + "input": "items[1]:\n - id: 1\n id: 2", + "expected": null, + "shouldError": true, + "options": { + "strict": true + }, + "specSection": "14.4", + "minSpecVersion": "3.2" } ] } diff --git a/src/test/resources/conformance/encode/arrays-objects.json b/src/test/resources/conformance/encode/arrays-objects.json index 371ea56..9646619 100644 --- a/src/test/resources/conformance/encode/arrays-objects.json +++ b/src/test/resources/conformance/encode/arrays-objects.json @@ -153,6 +153,16 @@ }, "expected": "items[2]:\n - id: 1\n data: string\n - id: 2\n data:\n nested: true", "specSection": "9.4" + }, + { + "name": "uses expanded list for arrays containing empty objects", + "input": { + "items": [{}, {}] + }, + "expected": "items[2]:\n -\n -", + "specSection": "9.4", + "minSpecVersion": "3.2", + "note": "Empty objects {} MUST NOT use tabular form per §9.3; encoded via §9.4 expanded list with bare hyphen markers per §10" } ] } diff --git a/src/test/resources/conformance/encode/key-folding.json b/src/test/resources/conformance/encode/key-folding.json index b8041aa..367732a 100644 --- a/src/test/resources/conformance/encode/key-folding.json +++ b/src/test/resources/conformance/encode/key-folding.json @@ -135,23 +135,6 @@ "specSection": "13.4", "note": "flattenDepth=0 disables all folding" }, - { - "name": "encodes standard nesting with flattenDepth=1 (no practical effect)", - "input": { - "a": { - "b": { - "c": 1 - } - } - }, - "expected": "a:\n b:\n c: 1", - "options": { - "keyFolding": "safe", - "flattenDepth": 1 - }, - "specSection": "13.4", - "note": "flattenDepth=1 has no practical folding effect (requires at least 2 segments)" - }, { "name": "encodes standard nesting with keyFolding=off (baseline)", "input": { diff --git a/src/test/resources/conformance/encode/objects.json b/src/test/resources/conformance/encode/objects.json index 22f7584..5a26204 100644 --- a/src/test/resources/conformance/encode/objects.json +++ b/src/test/resources/conformance/encode/objects.json @@ -196,6 +196,24 @@ "expected": "\"he said \\\"hi\\\"\": 1", "specSection": "7.1" }, + { + "name": "escapes U+0004 control character in key via \\uXXXX", + "input": { + "a\u0004b": 1 + }, + "expected": "\"a\\u0004b\": 1", + "specSection": "7.1", + "minSpecVersion": "3.1" + }, + { + "name": "escapes U+001F control character in key via \\uXXXX", + "input": { + "x\u001fy": 2 + }, + "expected": "\"x\\u001fy\": 2", + "specSection": "7.1", + "minSpecVersion": "3.1" + }, { "name": "encodes deeply nested objects", "input": {