From 5391943ecb3a826309253627b35934803fed99db Mon Sep 17 00:00:00 2001 From: Mounir IDRASSI Date: Wed, 15 Apr 2026 16:12:29 +0900 Subject: [PATCH] index: fix repeated v2 index merges Repeated merges of v2 indexes could treat the on-disk posting-list sentinel, invalidTrigram (0x00ffffff), as a normal trigram because the merge reader used ^uint32(0) as its internal EOF marker. That let duplicate serialized sentinels accumulate in the posting index, and a later cindex run could panic with no progress while merging. Use invalidTrigram consistently in postMapReader and merge sentinel handling. Restore lazy posting-list writes so ordinary trigrams with no surviving file IDs are omitted while the final sentinel is still emitted. Add a repeated-merge regression test that checks merged v2 indexes stay valid and do not grow extra posting entries. Fixes #100. --- index/merge.go | 8 ++++---- index/merge_test.go | 43 +++++++++++++++++++++++++++++++++++++++++++ index/write.go | 13 +++++++++++-- 3 files changed, 58 insertions(+), 6 deletions(-) diff --git a/index/merge.go b/index/merge.go index 55c5456a64..e5c2689b14 100644 --- a/index/merge.go +++ b/index/merge.go @@ -217,7 +217,7 @@ func Merge(dst, src1, src2 string) { w.endTrigram() } else { w.trigram(r1.trigram) - if r1.trigram == ^uint32(0) { + if r1.trigram == invalidTrigram { w.endTrigram() break } @@ -299,7 +299,7 @@ type postMapReader struct { func (r *postMapReader) init(ix *Index, idmap []idrange) { r.ix = ix r.idmap = idmap - r.trigram = ^uint32(0) + r.trigram = invalidTrigram r.nextBlock = 0 r.triNum = -1 r.load(true) @@ -310,12 +310,12 @@ func (r *postMapReader) nextTrigram() { } func (r *postMapReader) load(force bool) { - if !force && r.trigram == ^uint32(0) { + if !force && r.trigram == invalidTrigram { return } r.triNum++ if r.triNum >= r.ix.numPost { - r.trigram = ^uint32(0) + r.trigram = invalidTrigram r.count = 0 r.fileid = -1 return diff --git a/index/merge_test.go b/index/merge_test.go index 49a7631f1b..a7ad7355df 100644 --- a/index/merge_test.go +++ b/index/merge_test.go @@ -76,6 +76,49 @@ func TestMerge(t *testing.T) { checkPosting(t, ix3, "pot", 4, 5, 7) } +func TestMergeRepeated(t *testing.T) { + old := writeVersion + defer func() { + writeVersion = old + }() + writeVersion = 2 + + tempIndex := func() string { + f, err := os.CreateTemp("", "index-test") + if err != nil { + t.Fatal(err) + } + name := f.Name() + f.Close() + t.Cleanup(func() { + os.Remove(name) + }) + return name + } + + current := tempIndex() + buildIndex(current, mergePaths1, mergeFiles1) + wantPost := Open(current).numPost + + for i := 1; i <= 4; i++ { + fresh := tempIndex() + next := tempIndex() + buildIndex(fresh, mergePaths1, mergeFiles1) + Merge(next, current, fresh) + + ix := Open(next) + if err := ix.Check(); err != nil { + t.Fatalf("merge %d: Check: %v", i, err) + } + if ix.numPost != wantPost { + t.Fatalf("merge %d: numPost = %d, want %d", i, ix.numPost, wantPost) + } + checkFiles(t, ix, "/a/x", "/a/y", "/b/xx", "/b/xy", "/c/ab", "/c/de") + + current = next + } +} + func checkFiles(t *testing.T, ix *Index, l ...string) { t.Helper() for i, s := range l { diff --git a/index/write.go b/index/write.go index 185ed23938..cb0751e359 100644 --- a/index/write.go +++ b/index/write.go @@ -860,17 +860,26 @@ func (w *postDataWriter) trigram(t uint32) { w.count = 0 w.t = t w.lastID = -1 - w.numTrigram++ - w.out.WriteTrigram(w.t) } func (w *postDataWriter) fileid(id int) { + if w.count == 0 { + w.out.WriteTrigram(w.t) + w.numTrigram++ + } w.delta.Write(id - w.lastID) w.lastID = id w.count++ } func (w *postDataWriter) endTrigram() { + if w.count == 0 { + if w.t != invalidTrigram { + return + } + w.out.WriteTrigram(w.t) + w.numTrigram++ + } w.delta.Write(0) w.delta.Flush() if w.postIndexFile == nil {