fix: no fork replay on repeat for non-archive nodes, snap now behaves correctly

This commit is contained in:
Cassandra Heart 2025-10-10 05:12:53 -05:00
parent ec8074e01b
commit 5023e019e6
No known key found for this signature in database
GPG Key ID: 371083BFA6C240AA
2 changed files with 147 additions and 7 deletions

View File

@ -527,8 +527,29 @@ func (g *GlobalTimeReel) processPendingFrames(
for _, pending := range pendingList {
frameID := g.ComputeFrameID(pending.Frame)
// Skip if already processed
if _, exists := g.nodes[frameID]; exists {
if existing, exists := g.nodes[frameID]; exists {
// Re-parent previously pre-inserted orphan
if existing.Parent == nil {
existing.Parent = parentNode
existing.Depth = parentNode.Depth + 1
parentNode.Children[frameID] = existing
g.framesByNumber[pending.Frame.Header.FrameNumber] = append(
g.framesByNumber[pending.Frame.Header.FrameNumber], existing)
g.cache.Add(frameID, existing)
g.logger.Debug("reparented pending orphan frame",
zap.Uint64("frame_number", pending.Frame.Header.FrameNumber),
zap.String("id", frameID),
zap.String("parent_id", parentFrameID),
)
g.processPendingFrames(frameID, existing)
g.evaluateForkChoice(existing)
}
// Skip if already processed
continue
}
@ -791,17 +812,45 @@ func (g *GlobalTimeReel) evaluateForkChoice(newNode *GlobalFrameNode) {
}
}
// findLeafNodes returns all leaf nodes (nodes with no children)
// findLeafNodes returns all leaf nodes (nodes with no children) that are in the
// same connected component as the current head. This prevents spurious fork
// choice across disconnected forests (e.g., after a non-archive snap-ahead).
func (g *GlobalTimeReel) findLeafNodes() []*GlobalFrameNode {
var leaves []*GlobalFrameNode
if g.head == nil {
// Fallback: no head yet, return all leaves
for _, node := range g.nodes {
if len(node.Children) == 0 {
leaves = append(leaves, node)
}
}
return leaves
}
headRoot := g.findRoot(g.head)
for _, node := range g.nodes {
if len(node.Children) == 0 {
if len(node.Children) != 0 {
continue
}
if g.findRoot(node) == headRoot {
leaves = append(leaves, node)
}
}
return leaves
}
// findRoot walks parents to identify the root of a node
func (g *GlobalTimeReel) findRoot(n *GlobalFrameNode) *GlobalFrameNode {
cur := n
for cur != nil && cur.Parent != nil {
cur = cur.Parent
}
return cur
}
// nodeToBranch converts a node and its lineage to a Branch for fork choice
func (g *GlobalTimeReel) nodeToBranch(node *GlobalFrameNode) Branch {
// Build lineage from this node backwards, but limit to maxGlobalTreeDepth

View File

@ -1518,6 +1518,7 @@ func TestGlobalTimeReel_ComprehensiveEquivocation(t *testing.T) {
}
}
}
func TestGlobalTimeReel_NonArchive_BootstrapLoadsWindowOf360(t *testing.T) {
logger, _ := zap.NewDevelopment()
s := setupTestClockStore(t)
@ -1611,9 +1612,8 @@ func TestGlobalTimeReel_NonArchive_PrunesStore_AsHeadAdvances(t *testing.T) {
require.NoError(t, err)
assert.Equal(t, head.Header.FrameNumber, latest.Header.FrameNumber)
// Boundary itself should have been deleted by current code path.
_, err = s.GetGlobalClockFrame(oldestToKeep)
assert.Error(t, err, "boundary oldestToKeep is deleted by range deletion in current implementation")
assert.NoError(t, err, "boundary oldestToKeep should not be deleted")
}
func TestGlobalTimeReel_NonArchive_PendingResolves_WhenParentArrives(t *testing.T) {
@ -1677,6 +1677,96 @@ func TestGlobalTimeReel_NonArchive_PendingResolves_WhenParentArrives(t *testing.
assert.Equal(t, uint64(101), head.Header.FrameNumber)
}
func TestGlobalTimeReel_NonArchive_SnapThenAppend_NoSpuriousForks(t *testing.T) {
logger, _ := zap.NewDevelopment()
s := setupTestClockStore(t)
// Seed 0 -> 200 so the store has history
buildAndPersistChain(t, s, 0, 200)
tr, err := NewGlobalTimeReel(logger, createTestProverRegistry(true), s, 99, false)
require.NoError(t, err)
require.NoError(t, tr.Start())
defer tr.Stop()
ctx := context.Background()
eventCh := tr.GetEventCh()
// Drain any startup/new head events.
drain:
for {
select {
case <-eventCh:
case <-time.After(20 * time.Millisecond):
break drain
}
}
// Confirm bootstrapped head is 200.
head, err := tr.GetHead()
require.NoError(t, err)
require.Equal(t, uint64(200), head.Header.FrameNumber)
// Insert a far-ahead tip so that gap > 360 (induces snap ahead).
snapTip := &protobufs.GlobalFrame{
Header: &protobufs.GlobalFrameHeader{
FrameNumber: 561,
Timestamp: time.Now().UnixMilli(),
Output: []byte("snap_561"),
ParentSelector: []byte("unknown"),
},
}
require.NoError(t, tr.Insert(ctx, snapTip))
// We should get a fork
select {
case ev := <-eventCh:
require.Equal(t, TimeReelEventForkDetected, ev.Type, "snap should be a fork event")
case <-time.After(200 * time.Millisecond):
t.Fatal("timeout waiting for fork event")
}
// Now append a few sequential frames (562 -> 566) that chain to the snapped
// head.
var prev = snapTip
var forkEvents int
for n := uint64(562); n <= 566; n++ {
f := &protobufs.GlobalFrame{
Header: &protobufs.GlobalFrameHeader{
FrameNumber: n,
Timestamp: time.Now().UnixMilli(),
Output: []byte(fmt.Sprintf("snap_%d", n)),
ParentSelector: computeGlobalPoseidonHash(prev.Header.Output),
},
}
require.NoError(t, tr.Insert(ctx, f))
prev = f
// Expect exactly one new-head event per append, and zero fork events.
select {
case ev := <-eventCh:
if ev.Type == TimeReelEventForkDetected {
forkEvents++
}
require.Equal(t, TimeReelEventNewHead, ev.Type, "sequential append should be a head advance, not a fork")
case <-time.After(200 * time.Millisecond):
t.Fatalf("timeout waiting for head advance at %d", n)
}
}
assert.Equal(t, 0, forkEvents, "no fork events should occur when linearly appending after snap")
// Head should be at 566
head, err = tr.GetHead()
require.NoError(t, err)
assert.Equal(t, uint64(566), head.Header.FrameNumber)
// Tree should have exactly one branch in the head's component.
info := tr.GetTreeInfo()
branchCount := info["branch_count"].(int)
assert.Equal(t, 1, branchCount, "should present a single branch after snap + linear append")
}
func mustLatestGlobal(t *testing.T, s *store.PebbleClockStore) *protobufs.GlobalFrame {
t.Helper()
f, err := s.GetLatestGlobalClockFrame()
@ -1730,7 +1820,8 @@ func createGlobalFrame(num uint64, parent *protobufs.GlobalFrame, out []byte) *p
func buildAndPersistChain(t *testing.T, s *store.PebbleClockStore, start, end uint64) {
t.Helper()
logger, _ := zap.NewDevelopment()
reel, err := NewGlobalTimeReel(logger, createTestProverRegistry(true), s, 99, true)
// note: needs to be non-archive otherwise insert will only set as pending
reel, err := NewGlobalTimeReel(logger, createTestProverRegistry(true), s, 99, false)
require.NoError(t, err)
require.NoError(t, reel.Start())
defer reel.Stop()