mirror of
https://github.com/QuilibriumNetwork/ceremonyclient.git
synced 2026-03-05 16:27:28 +08:00
* .20 testing * Read in the debug by env variable (#514) * v2.1.0.19 * enhanced error logging, fix seniority marker join blocker, fix sync message size limit defaults * resolve signature failure * additional error logging for merge-related signatures * fix: one-shot sync message size, app shard TC signature size, collector/hotstuff race condition, expired joins blocking new joins due to pruning disable * remove compat with old 2.0.0 blossomsub * fix: resolve abandoned prover joins * reload prover registry * fix stale worker proposal edge * add full sanity check on join before submitting to identify bug * resolve non-fallthrough condition that should be fallthrough * fix: resolve rare SIGFPE, fix orphan expired joins blocking workers from reallocating * add reconnect fallback if no peers are found with variable reconnect time (#511) Co-authored-by: Tyler Sturos <55340199+tjsturos@users.noreply.github.com> * update base peer count to 1 (#513) * fix: expired prover join frames, starting port ranges, proposer getting stuck, and seniority on joins * fix: panic on shutdown, libp2p discovery picking inaccessible peers, coverage event check not in shutdown logic, amend app shard worker behavior to mirror global for prover root reconciliation * fix: shutdown scenario quirks, reload hanging * fix: do not bailout early on shutdown of coverage check * fix: force registry refresh on worker waiting for registration * add more logging to wait for prover * fix: worker manager refreshes the filter on allocation, snapshots blocking close on shutdown * tweak: force shutdown after five seconds for app worker * fix: don't loop when shutting down * fix: slight reordering, also added named workers to trace hanging shutdowns * use deterministic key for peer id of workers to stop flagging workers as sybil attacks * fix: remove pubsub stop from app consensus engine as it shouldn't manage pubsub lifecycle, integrate shutdown context to PerformSync to prevent stuck syncs from halting respawn * fix: blossomsub pubsub interface does not properly track subscription status * fix: subscribe order to avoid nil panic * switch from dnsaddr to dns4 * add missing quic-v1 * additional logging to isolate respawn quirks * fix: dnsaddr -> dns4 for blossomsub * allow debug env var to be read --------- Co-authored-by: Cassandra Heart <cassandra@quilibrium.com> Co-authored-by: Tyler Sturos <55340199+tjsturos@users.noreply.github.com> Co-authored-by: Cassandra Heart <7929478+CassOnMars@users.noreply.github.com> * fix newPebbleDB constructor config param (#517) * fix: high CPU overhead in initial worker behaviors/ongoing sync * faster docker builds with better caching * qol: add extra data to node info, and query metrics from command line * leave proposals for overcrowded shards * hub-and-spoke global message broadcasts * small tweaks to cli output for join frames --------- Co-authored-by: winged-pegasus <55340199+winged-pegasus@users.noreply.github.com> Co-authored-by: Tyler Sturos <55340199+tjsturos@users.noreply.github.com> Co-authored-by: Black Swan <3999712+blacks1ne@users.noreply.github.com>
207 lines
5.7 KiB
Go
207 lines
5.7 KiB
Go
package tests
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/rand"
|
|
"math/big"
|
|
"runtime"
|
|
"sync"
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/mock"
|
|
"go.uber.org/zap"
|
|
"source.quilibrium.com/quilibrium/monorepo/config"
|
|
hg "source.quilibrium.com/quilibrium/monorepo/hypergraph"
|
|
"source.quilibrium.com/quilibrium/monorepo/node/store"
|
|
"source.quilibrium.com/quilibrium/monorepo/types/mocks"
|
|
"source.quilibrium.com/quilibrium/monorepo/types/tries"
|
|
)
|
|
|
|
type vertexSpec struct {
|
|
appAddr [32]byte
|
|
dataAddr [32]byte
|
|
commit []byte
|
|
size *big.Int
|
|
}
|
|
|
|
// TestConcurrentAddVertexAndCommitRace verifies that serializing
|
|
// AddVertex batches and Commit calls with a mutex (the commitBarrier
|
|
// pattern) prevents partial-state tree roots.
|
|
//
|
|
// The test applies the same serialization that GlobalConsensusEngine
|
|
// uses: a mutex held across the entire AddVertex loop and around each
|
|
// Commit call. With this barrier, Commit can never observe a partially
|
|
// modified tree.
|
|
func TestConcurrentAddVertexAndCommitRace(t *testing.T) {
|
|
const (
|
|
iterations = 100
|
|
verticesPerRun = 50
|
|
// Multiple concurrent commit goroutines to increase contention.
|
|
commitGoroutines = 4
|
|
)
|
|
|
|
// Ensure true parallelism.
|
|
prevProcs := runtime.GOMAXPROCS(runtime.NumCPU())
|
|
defer runtime.GOMAXPROCS(prevProcs)
|
|
|
|
logger, _ := zap.NewDevelopment()
|
|
|
|
for iter := 0; iter < iterations; iter++ {
|
|
prover := &mocks.MockInclusionProver{}
|
|
mockCommit := make([]byte, 74)
|
|
mockCommit[0] = 0x02
|
|
rand.Read(mockCommit[1:])
|
|
prover.On("CommitRaw", mock.Anything, mock.Anything).Return(mockCommit, nil)
|
|
|
|
enc := &mocks.MockVerifiableEncryptor{}
|
|
dbCfg := &config.DBConfig{InMemoryDONOTUSE: true, Path: ".configtest/store"}
|
|
|
|
s := store.NewPebbleDB(logger, &config.Config{DB: dbCfg}, 0)
|
|
hgStore := store.NewPebbleHypergraphStore(
|
|
dbCfg, s, logger, enc, prover,
|
|
)
|
|
hgcrdt := hg.NewHypergraph(
|
|
logger,
|
|
hgStore,
|
|
prover,
|
|
[]int{},
|
|
&Nopthenticator{},
|
|
200,
|
|
)
|
|
|
|
// All vertices share the same appAddress so they land in the same shard,
|
|
// maximizing tree contention.
|
|
appAddr := [32]byte{0x10}
|
|
|
|
// Commit baseline (frame 1) with no vertices — this is the
|
|
// "before" state that a commit goroutine may validly capture
|
|
// if it acquires the barrier before the AddVertex goroutine.
|
|
baselineCommits, err := hgcrdt.Commit(1)
|
|
if err != nil {
|
|
t.Fatalf("iter %d: baseline commit failed: %v", iter, err)
|
|
}
|
|
|
|
// Prepare vertices to add.
|
|
specs := make([]vertexSpec, verticesPerRun)
|
|
for i := 0; i < verticesPerRun; i++ {
|
|
dataAddr := [32]byte{byte(i + 1), byte(iter), byte(i >> 8)}
|
|
specs[i] = vertexSpec{
|
|
appAddr: appAddr,
|
|
dataAddr: dataAddr,
|
|
commit: mockCommit,
|
|
size: big.NewInt(55),
|
|
}
|
|
}
|
|
|
|
// commitBarrier mirrors the mutex in GlobalConsensusEngine that
|
|
// serializes materialize (AddVertex loop) with
|
|
// rebuildShardCommitments (Commit).
|
|
var commitBarrier sync.Mutex
|
|
|
|
var wg sync.WaitGroup
|
|
start := make(chan struct{})
|
|
|
|
type commitResult struct {
|
|
commits map[tries.ShardKey][][]byte
|
|
err error
|
|
}
|
|
results := make([]commitResult, commitGoroutines)
|
|
|
|
wg.Add(1 + commitGoroutines)
|
|
|
|
// Goroutine A: add vertices one at a time (like materialize does).
|
|
// Holds the barrier across the entire batch.
|
|
go func() {
|
|
defer wg.Done()
|
|
<-start
|
|
commitBarrier.Lock()
|
|
defer commitBarrier.Unlock()
|
|
for _, vs := range specs {
|
|
v := hg.NewVertex(vs.appAddr, vs.dataAddr, vs.commit, vs.size)
|
|
if addErr := hgcrdt.AddVertex(nil, v); addErr != nil {
|
|
t.Errorf("iter %d: AddVertex failed: %v", iter, addErr)
|
|
return
|
|
}
|
|
// Yield between additions — without the barrier this would
|
|
// allow Commit to interleave and capture partial state.
|
|
runtime.Gosched()
|
|
}
|
|
}()
|
|
|
|
// Goroutines B: commit the tree concurrently with vertex additions.
|
|
// Each acquires the barrier around Commit, so it waits for any
|
|
// in-progress AddVertex batch to finish.
|
|
for g := 0; g < commitGoroutines; g++ {
|
|
g := g
|
|
go func() {
|
|
defer wg.Done()
|
|
<-start
|
|
// Stagger start to hit different points in the AddVertex sequence.
|
|
for y := 0; y < g*3; y++ {
|
|
runtime.Gosched()
|
|
}
|
|
commitBarrier.Lock()
|
|
results[g].commits, results[g].err = hgcrdt.Commit(
|
|
uint64(iter*10+g+2),
|
|
)
|
|
commitBarrier.Unlock()
|
|
}()
|
|
}
|
|
|
|
close(start)
|
|
wg.Wait()
|
|
|
|
for g, r := range results {
|
|
if r.err != nil {
|
|
t.Fatalf("iter %d goroutine %d: commit failed: %v", iter, g, r.err)
|
|
}
|
|
}
|
|
|
|
// Final commit with ALL vertices present — the canonical state.
|
|
expectedCommits, err := hgcrdt.Commit(uint64(iter*10 + commitGoroutines + 2))
|
|
if err != nil {
|
|
t.Fatalf("iter %d: expected commit failed: %v", iter, err)
|
|
}
|
|
|
|
// With the commitBarrier, each concurrent commit must reflect a
|
|
// consistent state: either the baseline (0 vertices, committed
|
|
// before AddVertex batch) or the final state (all vertices,
|
|
// committed after). Any other result means Commit() interleaved
|
|
// with AddVertex calls and captured partial state.
|
|
for g, r := range results {
|
|
matchesBaseline := commitMapsEqual(r.commits, baselineCommits)
|
|
matchesFinal := commitMapsEqual(r.commits, expectedCommits)
|
|
if !matchesBaseline && !matchesFinal {
|
|
t.Fatalf(
|
|
"iter %d goroutine %d: commit captured partial state "+
|
|
"(tree root matches neither baseline nor final — "+
|
|
"divergence detected)",
|
|
iter, g,
|
|
)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// commitMapsEqual compares two commit maps for byte-level equality.
|
|
func commitMapsEqual(a, b map[tries.ShardKey][][]byte) bool {
|
|
if len(a) != len(b) {
|
|
return false
|
|
}
|
|
for k, aPhases := range a {
|
|
bPhases, ok := b[k]
|
|
if !ok {
|
|
return false
|
|
}
|
|
if len(aPhases) != len(bPhases) {
|
|
return false
|
|
}
|
|
for i := range aPhases {
|
|
if !bytes.Equal(aPhases[i], bPhases[i]) {
|
|
return false
|
|
}
|
|
}
|
|
}
|
|
return true
|
|
}
|