From e234f23db7263fb9ec1979079b6b1eb39c52bc06 Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Thu, 16 Apr 2026 14:15:12 +1200 Subject: [PATCH 1/3] Add support for per-chapter remarks --- .../Corpora/ParatextProjectTextUpdaterBase.cs | 2 +- .../Corpora/UpdateUsfmParserHandler.cs | 70 +++++++++++++---- .../Corpora/UpdateUsfmParserHandlerTests.cs | 77 ++++++++++++++++++- 3 files changed, 129 insertions(+), 20 deletions(-) diff --git a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs index 5b0731c4..13b71b84 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs @@ -29,7 +29,7 @@ public string UpdateUsfm( UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip, IEnumerable preserveParagraphStyles = null, IEnumerable updateBlockHandlers = null, - IEnumerable remarks = null, + IEnumerable<(int, string)> remarks = null, Func errorHandler = null, bool compareSegments = false ) diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index 76a59336..1a891d4c 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -60,7 +60,7 @@ public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase private readonly HashSet _preserveParagraphStyles; private readonly Stack _updateBlocks; private readonly Stack _updateBlockHandlers; - private readonly List _remarks; + private readonly List<(int, string)> _remarks; private readonly Stack _replace; private int _tokenIndex; private readonly Func _errorHandler; @@ -76,7 +76,7 @@ public UpdateUsfmParserHandler( UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip, IEnumerable preserveParagraphStyles = null, IEnumerable updateBlockHandlers = null, - IEnumerable remarks = null, + IEnumerable<(int, string)> remarks = null, Func errorHandler = null, bool compareSegments = false ) @@ -107,7 +107,7 @@ public UpdateUsfmParserHandler( preserveParagraphStyles == null ? new HashSet { "r", "rem" } : new HashSet(preserveParagraphStyles); - _remarks = remarks?.ToList() ?? new List(); + _remarks = remarks?.ToList() ?? new List<(int, string)>(); _errorHandler = errorHandler; if (_errorHandler == null) _errorHandler = (error) => false; @@ -433,26 +433,66 @@ public string GetUsfm(string stylesheetFileName = "usfm.sty") public string GetUsfm(UsfmStylesheet stylesheet) { var tokenizer = new UsfmTokenizer(stylesheet); - List tokens = new List(_tokens); - if (_remarks.Count() > 0) + var tokens = new List(_tokens); + if (_remarks.Count > 0) { - var remarkTokens = new List(); - foreach (string remark in _remarks) + var remarkTokensByChapter = new Dictionary>(); + foreach ((int chapterNum, string remark) in _remarks) { - remarkTokens.Add(new UsfmToken(UsfmTokenType.Paragraph, "rem", null, null)); - remarkTokens.Add(new UsfmToken(remark)); + // Add the remark tokens for each chapter that is to have remarks + if (!remarkTokensByChapter.TryGetValue(chapterNum, out List chapterTokens)) + { + chapterTokens = new List(); + remarkTokensByChapter.Add(chapterNum, chapterTokens); + } + + chapterTokens.Add(new UsfmToken(UsfmTokenType.Paragraph, "rem", null, null)); + chapterTokens.Add(new UsfmToken(remark)); } if (tokens.Count > 0) { - int index = 0; - HashSet markersToSkip = new HashSet() { "id", "ide", "rem" }; - while (markersToSkip.Contains(tokens[index].Marker)) + foreach (KeyValuePair> remarkTokens in remarkTokensByChapter) { - index++; - if (tokens.Count > index && tokens[index].Type == UsfmTokenType.Text) + int index; + HashSet markersToSkip; + if (remarkTokens.Key == 0) + { + // Add the remarks at the top level of the USFM, + // after the book id, encode, and any initial comments + index = 0; + markersToSkip = new HashSet { "id", "ide", "rem" }; + } + else + { + // Add the remarks just after the specified chapter + index = tokens.FindIndex(t => + t.Type == UsfmTokenType.Chapter + && int.TryParse(t.Data, out int chapterNumber) + && chapterNumber == remarkTokens.Key + ); + if (index == -1) + continue; index++; + markersToSkip = new HashSet(); + } + + if (index >= tokens.Count) + { + // The remark insertion point is at the very end + tokens.AddRange(remarkTokens.Value); + } + else + { + while (markersToSkip.Contains(tokens[index].Marker)) + { + index++; + if (tokens.Count > index && tokens[index].Type == UsfmTokenType.Text) + index++; + } + + tokens.InsertRange(index, remarkTokens.Value); + } } - tokens.InsertRange(index, remarkTokens); } } diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index 9b5219c3..e6f6ac65 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -1380,7 +1380,7 @@ public void GetUsfm_IdTags() } [Test] - public void GetUsfm_PreferExisting_AddRemark() + public void GetUsfm_PreferExisting_AddRemarkToStart() { var rows = new List { @@ -1400,7 +1400,7 @@ public void GetUsfm_PreferExisting_AddRemark() rows, usfm, textBehavior: UpdateUsfmTextBehavior.PreferExisting, - remarks: ["New remark"] + remarks: [(0, "New remark")] ); string result = @"\id MAT - Test @@ -1419,7 +1419,7 @@ public void GetUsfm_PreferExisting_AddRemark() rows, target, textBehavior: UpdateUsfmTextBehavior.PreferExisting, - remarks: ["New remark 2"] + remarks: [(0, "New remark 2")] ); result = @"\id MAT - Test @@ -1436,6 +1436,75 @@ public void GetUsfm_PreferExisting_AddRemark() AssertUsfmEquals(target, result); } + [Test] + public void GetUsfm_PreferExisting_AddRemarkToChapter() + { + var rows = new List + { + new UpdateUsfmRow(ScrRef("MAT 2:1"), "Update 1"), + new UpdateUsfmRow(ScrRef("MAT 2:2"), "Update 2"), + }; + string usfm = + @"\id MAT - Test +\ide UTF-8 +\c 1 +\v 1 Chapter 1, Verse 1 +\c 2 +\rem Existing remark +\v 1 Some text +\v 2 +\v 3 Other text +\c 3 +"; + string target = UpdateUsfm( + rows, + usfm, + textBehavior: UpdateUsfmTextBehavior.PreferExisting, + remarks: [(2, "New remark"), (3, "Last remark"), (4, "Remark for missing chapter")] + ); + string result = + @"\id MAT - Test +\ide UTF-8 +\c 1 +\v 1 Chapter 1, Verse 1 +\c 2 +\rem New remark +\rem Existing remark +\v 1 Some text +\v 2 Update 2 +\v 3 Other text +\c 3 +\rem Last remark +"; + + AssertUsfmEquals(target, result); + + target = UpdateUsfm( + rows, + target, + textBehavior: UpdateUsfmTextBehavior.PreferExisting, + remarks: [(1, "New remark 2"), (2, "New remark 3")] + ); + result = + @"\id MAT - Test +\ide UTF-8 +\c 1 +\rem New remark 2 +\v 1 Chapter 1, Verse 1 +\c 2 +\rem New remark 3 +\rem New remark +\rem Existing remark +\v 1 Some text +\v 2 Update 2 +\v 3 Other text +\c 3 +\rem Last remark +"; + + AssertUsfmEquals(target, result); + } + [Test] public void UpdateBlock_FootnoteInPublishedChapterNumber() { @@ -1536,7 +1605,7 @@ private static string UpdateUsfm( UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip, IEnumerable? preserveParagraphStyles = null, IEnumerable? usfmUpdateBlockHandlers = null, - IEnumerable? remarks = null, + IEnumerable<(int, string)>? remarks = null, bool compareSegments = false ) { From c3bea1a43e3a83d867ff5c893a44d6f288b4fc4c Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Thu, 23 Apr 2026 14:57:49 +1200 Subject: [PATCH 2/3] Port of per-chapter USFM filtering --- .../Corpora/ParatextProjectTextUpdaterBase.cs | 6 +- src/SIL.Machine/Corpora/UsfmTokenizer.cs | 54 +++++++++++++- .../Corpora/UpdateUsfmParserHandlerTests.cs | 72 ++++++++++++++++++- 3 files changed, 128 insertions(+), 4 deletions(-) diff --git a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs index 13b71b84..241e2330 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs @@ -22,6 +22,7 @@ ParatextProjectSettings settings public string UpdateUsfm( string bookId, IReadOnlyList rows, + IReadOnlyList chapters = null, string fullName = null, UpdateUsfmTextBehavior textBehavior = UpdateUsfmTextBehavior.PreferExisting, UpdateUsfmMarkerBehavior paragraphBehavior = UpdateUsfmMarkerBehavior.Preserve, @@ -59,7 +60,10 @@ public string UpdateUsfm( ); try { - UsfmParser.Parse(usfm, handler, _settings.Stylesheet, _settings.Versification); + var tokenizer = new UsfmTokenizer(_settings.Stylesheet); + IReadOnlyList tokens = tokenizer.Tokenize(usfm, filterTokensByChapter: chapters); + var parser = new UsfmParser(tokens, handler, _settings.Stylesheet, _settings.Versification); + parser.ProcessTokens(); return handler.GetUsfm(_settings.Stylesheet); } catch (Exception ex) diff --git a/src/SIL.Machine/Corpora/UsfmTokenizer.cs b/src/SIL.Machine/Corpora/UsfmTokenizer.cs index f07886df..a2ab70f4 100644 --- a/src/SIL.Machine/Corpora/UsfmTokenizer.cs +++ b/src/SIL.Machine/Corpora/UsfmTokenizer.cs @@ -37,7 +37,11 @@ public UsfmTokenizer(UsfmStylesheet stylesheet, RtlReferenceOrder rtlReferenceOr public UsfmStylesheet Stylesheet { get; } public RtlReferenceOrder RtlReferenceOrder { get; } - public IReadOnlyList Tokenize(string usfm, bool preserveWhitespace = false) + public IReadOnlyList Tokenize( + string usfm, + bool preserveWhitespace = false, + IReadOnlyList filterTokensByChapter = null + ) { List tokens = new List(); @@ -409,7 +413,7 @@ public IReadOnlyList Tokenize(string usfm, bool preserveWhitespace = } } - return tokens; + return FilterTokensByChapter(tokens, filterTokensByChapter); } public string Detokenize(IEnumerable tokens, bool tokensHaveWhitespace = false) @@ -534,6 +538,52 @@ public string Detokenize(IEnumerable tokens, bool tokensHaveWhitespac return usfm.ToString(); } + /// + /// Filters tokens by the specified chapters. + /// + /// The tokens. + /// The chapters. If null, all tokens are returned. + /// The filtered tokens. + private static IReadOnlyList FilterTokensByChapter( + IReadOnlyList tokens, + IReadOnlyList chapters = null + ) + { + if (chapters is null) + return tokens; + + var tokensWithinChapters = new List(); + bool inChapter = false; + bool inIdMarker = false; + + for (int index = 0; index < tokens.Count; index++) + { + UsfmToken token = tokens[index]; + if (index == 0 && token.Marker == "id") + { + inIdMarker = true; + if (chapters.Contains(1)) + inChapter = true; + } + else if (inIdMarker && token.Marker != null && token.Marker != "id") + { + inIdMarker = false; + } + else if (token.Type == UsfmTokenType.Chapter) + { + inChapter = + !string.IsNullOrEmpty(token.Data) + && int.TryParse(token.Data, out int chapter) + && chapters.Contains(chapter); + } + + if (inIdMarker || inChapter) + tokensWithinChapters.Add(token); + } + + return tokensWithinChapters; + } + /// /// Gets the next word in the usfm and advances the index past it /// diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index e6f6ac65..9f0b0025 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -1590,6 +1590,71 @@ public void UpdateBlock_FootnoteAtStartOfChapterWithPrecedingText() ); } + [Test] + public void FilterChapters() + { + string usfm = + @"\id MAT - Test +\h Matthew +\c 1 +\v 1 Some text +\v 2 +\v 3 Other text +\c 2 +\v 1 Some text +\c 3 +\v 1 Some text +\c 4 +\v 1 Some text +"; + + string target = UpdateUsfm(source: usfm, chapters: [2, 4]); + + string result = + @"\id MAT - Test +\c 2 +\v 1 Some text +\c 4 +\v 1 Some text +"; + + AssertUsfmEquals(target, result); + } + + [Test] + public void FilterChapters_WithChapterOneAndHeader() + { + string usfm = + @"\id MAT - Test +\h Matthew +\c 1 +\v 1 Some text +\v 2 +\v 3 Other text +\c 2 +\v 1 Some text +\c 3 +\v 1 Some text +\c 4 +\v 1 Some text +"; + + string target = UpdateUsfm(source: usfm, chapters: [1, 3]); + + string result = + @"\id MAT - Test +\h Matthew +\c 1 +\v 1 Some text +\v 2 +\v 3 Other text +\c 3 +\v 1 Some text +"; + + AssertUsfmEquals(target, result); + } + private static ScriptureRef[] ScrRef(params string[] refs) { return refs.Select(r => ScriptureRef.Parse(r)).ToArray(); @@ -1598,6 +1663,7 @@ private static ScriptureRef[] ScrRef(params string[] refs) private static string UpdateUsfm( IReadOnlyList? rows = null, string? source = null, + IReadOnlyList? chapters = null, string? idText = null, UpdateUsfmTextBehavior textBehavior = UpdateUsfmTextBehavior.PreferNew, UpdateUsfmMarkerBehavior paragraphBehavior = UpdateUsfmMarkerBehavior.Preserve, @@ -1615,6 +1681,7 @@ private static string UpdateUsfm( return updater.UpdateUsfm( "MAT", rows, + chapters, idText, textBehavior, paragraphBehavior, @@ -1643,7 +1710,10 @@ private static string UpdateUsfm( (_) => false, compareSegments ); - UsfmParser.Parse(source, updater); + var tokenizer = new UsfmTokenizer(); + IReadOnlyList tokens = tokenizer.Tokenize(source, filterTokensByChapter: chapters); + var parser = new UsfmParser(tokens, updater); + parser.ProcessTokens(); return updater.GetUsfm(); } } From d164ee4de7692956c98973722ee909c055686ef5 Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Tue, 28 Apr 2026 14:26:39 +1200 Subject: [PATCH 3/3] Update to match machine.py --- .../Corpora/ParatextProjectTextUpdaterBase.cs | 51 ++++++- .../Corpora/UpdateUsfmParserHandler.cs | 2 +- src/SIL.Machine/Corpora/UsfmTokenizer.cs | 54 +------- .../Corpora/UpdateUsfmParserHandlerTests.cs | 129 +++++++++++------- 4 files changed, 134 insertions(+), 102 deletions(-) diff --git a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs index 241e2330..48736ed6 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.IO; +using System.Linq; using System.Text; namespace SIL.Machine.Corpora @@ -61,7 +62,8 @@ public string UpdateUsfm( try { var tokenizer = new UsfmTokenizer(_settings.Stylesheet); - IReadOnlyList tokens = tokenizer.Tokenize(usfm, filterTokensByChapter: chapters); + IReadOnlyList tokens = tokenizer.Tokenize(usfm); + tokens = FilterTokensByChapter(tokens, chapters); var parser = new UsfmParser(tokens, handler, _settings.Stylesheet, _settings.Versification); parser.ProcessTokens(); return handler.GetUsfm(_settings.Stylesheet); @@ -77,6 +79,53 @@ public string UpdateUsfm( } } + /// + /// Filters tokens by the specified chapters. + /// + /// The tokens. + /// The chapters. If null, all tokens are returned. + /// The filtered tokens. + /// This is marked internal so test classes can use it. + internal static IReadOnlyList FilterTokensByChapter( + IReadOnlyList tokens, + IReadOnlyList chapters = null + ) + { + if (chapters is null) + return tokens; + + var tokensWithinChapters = new List(); + bool inChapter = false; + bool inIdMarker = false; + + for (int index = 0; index < tokens.Count; index++) + { + UsfmToken token = tokens[index]; + if (index == 0 && token.Marker == "id") + { + inIdMarker = true; + if (chapters.Contains(1)) + inChapter = true; + } + else if (inIdMarker && token.Marker != null && token.Marker != "id") + { + inIdMarker = false; + } + else if (token.Type == UsfmTokenType.Chapter) + { + inChapter = + !string.IsNullOrEmpty(token.Data) + && int.TryParse(token.Data, out int chapter) + && chapters.Contains(chapter); + } + + if (inIdMarker || inChapter) + tokensWithinChapters.Add(token); + } + + return tokensWithinChapters; + } + private bool Exists(string fileName) => _paratextProjectFileHandler.Exists(fileName); private Stream Open(string fileName) => _paratextProjectFileHandler.Open(fileName); diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index 1a891d4c..a5687439 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -473,7 +473,7 @@ public string GetUsfm(UsfmStylesheet stylesheet) if (index == -1) continue; index++; - markersToSkip = new HashSet(); + markersToSkip = new HashSet { "rem" }; } if (index >= tokens.Count) diff --git a/src/SIL.Machine/Corpora/UsfmTokenizer.cs b/src/SIL.Machine/Corpora/UsfmTokenizer.cs index a2ab70f4..f07886df 100644 --- a/src/SIL.Machine/Corpora/UsfmTokenizer.cs +++ b/src/SIL.Machine/Corpora/UsfmTokenizer.cs @@ -37,11 +37,7 @@ public UsfmTokenizer(UsfmStylesheet stylesheet, RtlReferenceOrder rtlReferenceOr public UsfmStylesheet Stylesheet { get; } public RtlReferenceOrder RtlReferenceOrder { get; } - public IReadOnlyList Tokenize( - string usfm, - bool preserveWhitespace = false, - IReadOnlyList filterTokensByChapter = null - ) + public IReadOnlyList Tokenize(string usfm, bool preserveWhitespace = false) { List tokens = new List(); @@ -413,7 +409,7 @@ public IReadOnlyList Tokenize( } } - return FilterTokensByChapter(tokens, filterTokensByChapter); + return tokens; } public string Detokenize(IEnumerable tokens, bool tokensHaveWhitespace = false) @@ -538,52 +534,6 @@ public string Detokenize(IEnumerable tokens, bool tokensHaveWhitespac return usfm.ToString(); } - /// - /// Filters tokens by the specified chapters. - /// - /// The tokens. - /// The chapters. If null, all tokens are returned. - /// The filtered tokens. - private static IReadOnlyList FilterTokensByChapter( - IReadOnlyList tokens, - IReadOnlyList chapters = null - ) - { - if (chapters is null) - return tokens; - - var tokensWithinChapters = new List(); - bool inChapter = false; - bool inIdMarker = false; - - for (int index = 0; index < tokens.Count; index++) - { - UsfmToken token = tokens[index]; - if (index == 0 && token.Marker == "id") - { - inIdMarker = true; - if (chapters.Contains(1)) - inChapter = true; - } - else if (inIdMarker && token.Marker != null && token.Marker != "id") - { - inIdMarker = false; - } - else if (token.Type == UsfmTokenType.Chapter) - { - inChapter = - !string.IsNullOrEmpty(token.Data) - && int.TryParse(token.Data, out int chapter) - && chapters.Contains(chapter); - } - - if (inIdMarker || inChapter) - tokensWithinChapters.Add(token); - } - - return tokensWithinChapters; - } - /// /// Gets the next word in the usfm and advances the index past it /// diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index 9f0b0025..177e6e2c 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -1380,13 +1380,14 @@ public void GetUsfm_IdTags() } [Test] - public void GetUsfm_PreferExisting_AddRemarkToStart() + public void GetUsfm_PassRemark() { var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1"), new UpdateUsfmRow(ScrRef("MAT 1:2"), "Update 2"), }; + string usfm = @"\id MAT - Test \ide UTF-8 @@ -1395,111 +1396,116 @@ public void GetUsfm_PreferExisting_AddRemarkToStart() \v 1 Some text \v 2 \v 3 Other text +\c 2 +\rem Existing remark +\v 1 More text +\c 3 "; + string target = UpdateUsfm( rows, usfm, textBehavior: UpdateUsfmTextBehavior.PreferExisting, - remarks: [(0, "New remark")] + remarks: [(0, "New remark 0"), (1, "New remark 1"), (2, "New remark 2"), (3, "New remark 3")] ); + string result = @"\id MAT - Test \ide UTF-8 \rem Existing remark -\rem New remark +\rem New remark 0 \c 1 +\rem New remark 1 \v 1 Some text \v 2 Update 2 \v 3 Other text -"; - - AssertUsfmEquals(target, result); - - target = UpdateUsfm( - rows, - target, - textBehavior: UpdateUsfmTextBehavior.PreferExisting, - remarks: [(0, "New remark 2")] - ); - result = - @"\id MAT - Test -\ide UTF-8 +\c 2 \rem Existing remark -\rem New remark \rem New remark 2 -\c 1 -\v 1 Some text -\v 2 Update 2 -\v 3 Other text +\v 1 More text +\c 3 +\rem New remark 3 "; AssertUsfmEquals(target, result); } [Test] - public void GetUsfm_PreferExisting_AddRemarkToChapter() + public void GetUsfm_PassRemark0_NoExistingRemark() { var rows = new List { - new UpdateUsfmRow(ScrRef("MAT 2:1"), "Update 1"), - new UpdateUsfmRow(ScrRef("MAT 2:2"), "Update 2"), + new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1"), + new UpdateUsfmRow(ScrRef("MAT 1:2"), "Update 2"), }; + string usfm = @"\id MAT - Test \ide UTF-8 \c 1 -\v 1 Chapter 1, Verse 1 -\c 2 -\rem Existing remark \v 1 Some text \v 2 \v 3 Other text -\c 3 "; + string target = UpdateUsfm( rows, usfm, textBehavior: UpdateUsfmTextBehavior.PreferExisting, - remarks: [(2, "New remark"), (3, "Last remark"), (4, "Remark for missing chapter")] + remarks: [(0, "New remark 0")] ); + string result = @"\id MAT - Test \ide UTF-8 +\rem New remark 0 \c 1 -\v 1 Chapter 1, Verse 1 -\c 2 -\rem New remark -\rem Existing remark \v 1 Some text \v 2 Update 2 \v 3 Other text -\c 3 -\rem Last remark "; AssertUsfmEquals(target, result); + } - target = UpdateUsfm( + [Test] + public void GetUsfm_MultipleRemarksSameChapter() + { + var rows = new List + { + new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1"), + new UpdateUsfmRow(ScrRef("MAT 1:2"), "Update 2"), + }; + + string usfm = + @"\id MAT - Test +\ide UTF-8 +\rem Existing remark +\c 1 +\v 1 Some text +\v 2 +\v 3 Other text +"; + + string target = UpdateUsfm( rows, - target, + usfm, textBehavior: UpdateUsfmTextBehavior.PreferExisting, - remarks: [(1, "New remark 2"), (2, "New remark 3")] + remarks: [(0, "New remark 0.1"), (0, "New remark 0.2"), (1, "New remark 1.1"), (1, "New remark 1.2")] ); - result = + + string result = @"\id MAT - Test \ide UTF-8 -\c 1 -\rem New remark 2 -\v 1 Chapter 1, Verse 1 -\c 2 -\rem New remark 3 -\rem New remark \rem Existing remark +\rem New remark 0.1 +\rem New remark 0.2 +\c 1 +\rem New remark 1.1 +\rem New remark 1.2 \v 1 Some text \v 2 Update 2 \v 3 Other text -\c 3 -\rem Last remark "; AssertUsfmEquals(target, result); @@ -1655,6 +1661,32 @@ public void FilterChapters_WithChapterOneAndHeader() AssertUsfmEquals(target, result); } + [Test] + public void FilterChapters_WithBadChapterReference() + { + string usfm = + @"\id MAT - Test +\c 1. +\v 1 Some text +\c 2. +\v 1 Some text +\c 3 +\v 1 Some text with good chapter reference +\c 4 +\v 1 Some text with good chapter reference +"; + + string target = UpdateUsfm(source: usfm, chapters: [2, 4]); + + string result = + @"\id MAT - Test +\c 4 +\v 1 Some text with good chapter reference +"; + + AssertUsfmEquals(target, result); + } + private static ScriptureRef[] ScrRef(params string[] refs) { return refs.Select(r => ScriptureRef.Parse(r)).ToArray(); @@ -1711,7 +1743,8 @@ private static string UpdateUsfm( compareSegments ); var tokenizer = new UsfmTokenizer(); - IReadOnlyList tokens = tokenizer.Tokenize(source, filterTokensByChapter: chapters); + IReadOnlyList tokens = tokenizer.Tokenize(source); + tokens = ParatextProjectTextUpdaterBase.FilterTokensByChapter(tokens, chapters); var parser = new UsfmParser(tokens, updater); parser.ProcessTokens(); return updater.GetUsfm();