From 5023e019e660c5665aa4f0193aa1fffa2789cfc3 Mon Sep 17 00:00:00 2001 From: Cassandra Heart Date: Fri, 10 Oct 2025 05:12:53 -0500 Subject: [PATCH] fix: no fork replay on repeat for non-archive nodes, snap now behaves correctly --- node/consensus/time/global_time_reel.go | 57 +++++++++++- node/consensus/time/global_time_reel_test.go | 97 +++++++++++++++++++- 2 files changed, 147 insertions(+), 7 deletions(-) diff --git a/node/consensus/time/global_time_reel.go b/node/consensus/time/global_time_reel.go index 0057291..d0cf073 100644 --- a/node/consensus/time/global_time_reel.go +++ b/node/consensus/time/global_time_reel.go @@ -527,8 +527,29 @@ func (g *GlobalTimeReel) processPendingFrames( for _, pending := range pendingList { frameID := g.ComputeFrameID(pending.Frame) - // Skip if already processed - if _, exists := g.nodes[frameID]; exists { + if existing, exists := g.nodes[frameID]; exists { + // Re-parent previously pre-inserted orphan + if existing.Parent == nil { + existing.Parent = parentNode + existing.Depth = parentNode.Depth + 1 + parentNode.Children[frameID] = existing + g.framesByNumber[pending.Frame.Header.FrameNumber] = append( + g.framesByNumber[pending.Frame.Header.FrameNumber], existing) + + g.cache.Add(frameID, existing) + + g.logger.Debug("reparented pending orphan frame", + zap.Uint64("frame_number", pending.Frame.Header.FrameNumber), + zap.String("id", frameID), + zap.String("parent_id", parentFrameID), + ) + + g.processPendingFrames(frameID, existing) + + g.evaluateForkChoice(existing) + } + + // Skip if already processed continue } @@ -791,17 +812,45 @@ func (g *GlobalTimeReel) evaluateForkChoice(newNode *GlobalFrameNode) { } } -// findLeafNodes returns all leaf nodes (nodes with no children) +// findLeafNodes returns all leaf nodes (nodes with no children) that are in the +// same connected component as the current head. This prevents spurious fork +// choice across disconnected forests (e.g., after a non-archive snap-ahead). func (g *GlobalTimeReel) findLeafNodes() []*GlobalFrameNode { var leaves []*GlobalFrameNode + if g.head == nil { + // Fallback: no head yet, return all leaves + for _, node := range g.nodes { + if len(node.Children) == 0 { + leaves = append(leaves, node) + } + } + + return leaves + } + + headRoot := g.findRoot(g.head) for _, node := range g.nodes { - if len(node.Children) == 0 { + if len(node.Children) != 0 { + continue + } + + if g.findRoot(node) == headRoot { leaves = append(leaves, node) } } + return leaves } +// findRoot walks parents to identify the root of a node +func (g *GlobalTimeReel) findRoot(n *GlobalFrameNode) *GlobalFrameNode { + cur := n + for cur != nil && cur.Parent != nil { + cur = cur.Parent + } + return cur +} + // nodeToBranch converts a node and its lineage to a Branch for fork choice func (g *GlobalTimeReel) nodeToBranch(node *GlobalFrameNode) Branch { // Build lineage from this node backwards, but limit to maxGlobalTreeDepth diff --git a/node/consensus/time/global_time_reel_test.go b/node/consensus/time/global_time_reel_test.go index abdd7ce..8b4402f 100644 --- a/node/consensus/time/global_time_reel_test.go +++ b/node/consensus/time/global_time_reel_test.go @@ -1518,6 +1518,7 @@ func TestGlobalTimeReel_ComprehensiveEquivocation(t *testing.T) { } } } + func TestGlobalTimeReel_NonArchive_BootstrapLoadsWindowOf360(t *testing.T) { logger, _ := zap.NewDevelopment() s := setupTestClockStore(t) @@ -1611,9 +1612,8 @@ func TestGlobalTimeReel_NonArchive_PrunesStore_AsHeadAdvances(t *testing.T) { require.NoError(t, err) assert.Equal(t, head.Header.FrameNumber, latest.Header.FrameNumber) - // Boundary itself should have been deleted by current code path. _, err = s.GetGlobalClockFrame(oldestToKeep) - assert.Error(t, err, "boundary oldestToKeep is deleted by range deletion in current implementation") + assert.NoError(t, err, "boundary oldestToKeep should not be deleted") } func TestGlobalTimeReel_NonArchive_PendingResolves_WhenParentArrives(t *testing.T) { @@ -1677,6 +1677,96 @@ func TestGlobalTimeReel_NonArchive_PendingResolves_WhenParentArrives(t *testing. assert.Equal(t, uint64(101), head.Header.FrameNumber) } +func TestGlobalTimeReel_NonArchive_SnapThenAppend_NoSpuriousForks(t *testing.T) { + logger, _ := zap.NewDevelopment() + s := setupTestClockStore(t) + + // Seed 0 -> 200 so the store has history + buildAndPersistChain(t, s, 0, 200) + + tr, err := NewGlobalTimeReel(logger, createTestProverRegistry(true), s, 99, false) + require.NoError(t, err) + require.NoError(t, tr.Start()) + defer tr.Stop() + + ctx := context.Background() + eventCh := tr.GetEventCh() + + // Drain any startup/new head events. +drain: + for { + select { + case <-eventCh: + case <-time.After(20 * time.Millisecond): + break drain + } + } + + // Confirm bootstrapped head is 200. + head, err := tr.GetHead() + require.NoError(t, err) + require.Equal(t, uint64(200), head.Header.FrameNumber) + + // Insert a far-ahead tip so that gap > 360 (induces snap ahead). + snapTip := &protobufs.GlobalFrame{ + Header: &protobufs.GlobalFrameHeader{ + FrameNumber: 561, + Timestamp: time.Now().UnixMilli(), + Output: []byte("snap_561"), + ParentSelector: []byte("unknown"), + }, + } + require.NoError(t, tr.Insert(ctx, snapTip)) + + // We should get a fork + select { + case ev := <-eventCh: + require.Equal(t, TimeReelEventForkDetected, ev.Type, "snap should be a fork event") + case <-time.After(200 * time.Millisecond): + t.Fatal("timeout waiting for fork event") + } + + // Now append a few sequential frames (562 -> 566) that chain to the snapped + // head. + var prev = snapTip + var forkEvents int + for n := uint64(562); n <= 566; n++ { + f := &protobufs.GlobalFrame{ + Header: &protobufs.GlobalFrameHeader{ + FrameNumber: n, + Timestamp: time.Now().UnixMilli(), + Output: []byte(fmt.Sprintf("snap_%d", n)), + ParentSelector: computeGlobalPoseidonHash(prev.Header.Output), + }, + } + require.NoError(t, tr.Insert(ctx, f)) + prev = f + + // Expect exactly one new-head event per append, and zero fork events. + select { + case ev := <-eventCh: + if ev.Type == TimeReelEventForkDetected { + forkEvents++ + } + require.Equal(t, TimeReelEventNewHead, ev.Type, "sequential append should be a head advance, not a fork") + case <-time.After(200 * time.Millisecond): + t.Fatalf("timeout waiting for head advance at %d", n) + } + } + + assert.Equal(t, 0, forkEvents, "no fork events should occur when linearly appending after snap") + + // Head should be at 566 + head, err = tr.GetHead() + require.NoError(t, err) + assert.Equal(t, uint64(566), head.Header.FrameNumber) + + // Tree should have exactly one branch in the head's component. + info := tr.GetTreeInfo() + branchCount := info["branch_count"].(int) + assert.Equal(t, 1, branchCount, "should present a single branch after snap + linear append") +} + func mustLatestGlobal(t *testing.T, s *store.PebbleClockStore) *protobufs.GlobalFrame { t.Helper() f, err := s.GetLatestGlobalClockFrame() @@ -1730,7 +1820,8 @@ func createGlobalFrame(num uint64, parent *protobufs.GlobalFrame, out []byte) *p func buildAndPersistChain(t *testing.T, s *store.PebbleClockStore, start, end uint64) { t.Helper() logger, _ := zap.NewDevelopment() - reel, err := NewGlobalTimeReel(logger, createTestProverRegistry(true), s, 99, true) + // note: needs to be non-archive otherwise insert will only set as pending + reel, err := NewGlobalTimeReel(logger, createTestProverRegistry(true), s, 99, false) require.NoError(t, err) require.NoError(t, reel.Start()) defer reel.Stop()