From ab99f105f71021c76849bbfb61023b4845a14151 Mon Sep 17 00:00:00 2001 From: Cassandra Heart <7929478+CassOnMars@users.noreply.github.com> Date: Mon, 15 Dec 2025 16:45:31 -0600 Subject: [PATCH] v2.1.0.16 (#492) --- RELEASE-NOTES | 14 + build_utils/go.mod | 3 + build_utils/main.go | 301 +++++++ channel/channel.go | 2 +- config/version.go | 2 +- consensus/helper/quorum_certificate.go | 2 +- consensus/helper/state.go | 2 +- consensus/helper/timeout_certificate.go | 4 +- consensus/participant/participant.go | 2 +- consensus/signature/packer.go | 4 +- .../weighted_signature_aggregator.go | 8 +- consensus/timeoutcollector/aggregation.go | 6 +- consensus/timeoutcollector/factory.go | 4 +- consensus/votecollector/statemachine.go | 4 +- hypergraph/hypergraph.go | 16 +- hypergraph/id_set.go | 2 +- hypergraph/snapshot_manager.go | 101 ++- hypergraph/sync.go | 378 +++++++-- node/app/db_console.go | 2 +- .../consensus_signature_aggregator_wrapper.go | 10 +- node/consensus/app/app_consensus_engine.go | 450 ++++++++--- ...consensus_engine_chaos_integration_test.go | 4 +- .../app_consensus_engine_integration_test.go | 28 +- .../app/consensus_dynamic_committee.go | 5 + .../app/consensus_voting_provider.go | 2 +- node/consensus/app/coverage_events.go | 251 ++++++ node/consensus/app/event_distributor.go | 6 + node/consensus/app/factory.go | 1 + node/consensus/app/frame_chain_checker.go | 2 +- node/consensus/app/message_processors.go | 747 ++++++++++++++++++ node/consensus/app/message_validation.go | 39 +- .../global/consensus_leader_provider.go | 9 + node/consensus/global/coverage_events.go | 10 +- node/consensus/global/event_distributor.go | 127 ++- node/consensus/global/genesis.go | 66 ++ .../global/global_consensus_engine.go | 360 +++++++-- ...lobal_consensus_engine_integration_test.go | 8 +- node/consensus/global/message_collector.go | 2 +- node/consensus/global/message_processors.go | 294 ++++++- node/consensus/provers/proposer.go | 7 +- node/consensus/provers/proposer_test.go | 12 +- node/consensus/provers/prover_registry.go | 195 ++++- node/consensus/sync/app_sync_hooks.go | 303 +++++++ node/consensus/sync/sync_client.go | 2 +- node/consensus/sync/sync_provider.go | 29 +- node/consensus/time/app_time_reel.go | 2 +- node/datarpc/data_worker_ipc_server.go | 25 + .../engines/compute_execution_engine.go | 2 +- .../engines/token_execution_engine.go | 2 +- .../intrinsics/compute/compute_conversions.go | 2 +- .../compute_intrinsic_code_deployment.go | 4 +- .../compute/compute_intrinsic_code_execute.go | 6 +- .../compute_intrinsic_code_finalize.go | 8 +- .../intrinsics/global/global_conversions.go | 2 +- .../global/global_prover_confirm.go | 2 +- .../intrinsics/global/global_prover_join.go | 8 +- .../intrinsics/global/global_prover_kick.go | 8 +- .../intrinsics/global/global_prover_leave.go | 2 +- .../intrinsics/global/global_prover_pause.go | 2 +- .../intrinsics/global/global_prover_reject.go | 2 +- .../intrinsics/global/global_prover_resume.go | 2 +- .../intrinsics/global/global_prover_update.go | 2 +- .../hypergraph/hypergraph_intrinsic.go | 2 +- .../hypergraph/hypergraph_vertex_add.go | 4 +- .../token/token_intrinsic_mint_transaction.go | 14 +- .../token_intrinsic_pending_transaction.go | 16 +- .../token/token_intrinsic_transaction.go | 12 +- node/execution/manager/execution_manager.go | 2 +- .../state/hypergraph/hypergraph_state.go | 12 +- node/go.mod | 14 +- node/go.sum | 34 +- node/main.go | 13 +- node/p2p/blossomsub.go | 6 +- node/p2p/internal/peer_connector.go | 2 +- node/p2p/internal/peer_monitor.go | 2 +- node/p2p/internal/peer_source.go | 2 +- node/p2p/onion/grpc_transport.go | 10 +- node/p2p/onion/router.go | 4 +- node/rpc/hypergraph_sync_rpc_server_test.go | 233 ++---- node/rpc/pubsub_proxy.go | 36 +- node/store/clock.go | 12 +- node/store/consensus.go | 2 +- node/store/hypergraph.go | 212 ++++- node/store/key.go | 2 +- node/store/managed_kvdb.go | 259 ++++++ node/store/pebble.go | 146 +++- node/store/peerstore.go | 2 +- node/store/shards.go | 2 +- node/store/token.go | 2 +- node/store/worker.go | 14 +- node/worker/manager.go | 8 +- protobufs/global.go | 48 +- types/consensus/prover_registry.go | 4 + types/mocks/prover_registry.go | 6 + types/store/worker.go | 1 + types/tries/lazy_proof_tree.go | 28 +- types/tries/proof_tree.go | 22 +- vdf/wesolowski_frame_prover.go | 68 +- 98 files changed, 4379 insertions(+), 784 deletions(-) create mode 100644 build_utils/go.mod create mode 100644 build_utils/main.go create mode 100644 node/consensus/app/coverage_events.go create mode 100644 node/consensus/sync/app_sync_hooks.go create mode 100644 node/store/managed_kvdb.go diff --git a/RELEASE-NOTES b/RELEASE-NOTES index d9ab42c..e889ed6 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -1,3 +1,17 @@ +# 2.1.0.16 +- build_utils – static code analysis checker for underlying slice assignment +- hypergraph snapshot manager now uses in memory snapshot instead of pebble snapshot +- hypersync can delete orphaned entries +- signature aggregation wrapper for app shards no longer expects proposer to have a proof (the proof is already in the frame) +- hook events on sync for app shards +- app shards properly sync global prover info +- coverage streaks/halt events now trigger on app shards +- peer info and key registry handlers on app shard level +- updated to pebble v2 +- pebble v2 upgrade handler +- archive mode memory bug fix +- subtle underlying slice mutation bug fix + # 2.1.0.15 - Adds direct db sync mode for hypersync - Removes blackhole detection entirely diff --git a/build_utils/go.mod b/build_utils/go.mod new file mode 100644 index 0000000..e7178d9 --- /dev/null +++ b/build_utils/go.mod @@ -0,0 +1,3 @@ +module source.quilibrium.com/quilibrium/monorepo/build_utils + +go 1.23.2 diff --git a/build_utils/main.go b/build_utils/main.go new file mode 100644 index 0000000..fe1825d --- /dev/null +++ b/build_utils/main.go @@ -0,0 +1,301 @@ +package main + +import ( + "bytes" + "flag" + "fmt" + "go/ast" + "go/format" + "go/parser" + "go/token" + "os" + "path/filepath" + "strings" +) + +type finding struct { + file string + pos token.Position + fn string + kind string + detail string +} + +const allowDirective = "buildutils:allow-slice-alias" + +func main() { + flag.Usage = func() { + fmt.Fprintf(flag.CommandLine.Output(), + "Usage: %s [...]\n"+ + "Scans Go files for functions that accept slice parameters\n"+ + "and either return them directly or store them in struct fields.\n", + os.Args[0]) + flag.PrintDefaults() + } + flag.Parse() + + if flag.NArg() == 0 { + flag.Usage() + os.Exit(1) + } + + var files []string + for _, path := range flag.Args() { + expanded, err := expandPath(path) + if err != nil { + fmt.Fprintf(os.Stderr, "error enumerating %s: %v\n", path, err) + os.Exit(1) + } + files = append(files, expanded...) + } + + var allFindings []finding + for _, file := range files { + fs := token.NewFileSet() + f, err := parser.ParseFile(fs, file, nil, parser.ParseComments) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to parse %s: %v\n", file, err) + continue + } + allFindings = append(allFindings, analyzeFile(fs, file, f)...) + } + + if len(allFindings) == 0 { + fmt.Println("No slice-to-struct assignments detected.") + return + } + + for _, finding := range allFindings { + fmt.Printf("%s:%d:%d: [%s] %s in %s\n", + finding.pos.Filename, + finding.pos.Line, + finding.pos.Column, + finding.kind, + finding.detail, + finding.fn, + ) + } +} + +func expandPath(path string) ([]string, error) { + info, err := os.Stat(path) + if err != nil { + return nil, err + } + if !info.IsDir() { + if shouldIncludeFile(path) { + return []string{path}, nil + } + return nil, nil + } + + var files []string + err = filepath.WalkDir(path, func(p string, d os.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + if d.Name() == "vendor" || d.Name() == ".git" { + return filepath.SkipDir + } + return nil + } + if shouldIncludeFile(p) { + files = append(files, p) + } + return nil + }) + return files, err +} + +func analyzeFile(fs *token.FileSet, filename string, file *ast.File) []finding { + var findings []finding + + commentMap := ast.NewCommentMap(fs, file, file.Comments) + commentGroups := file.Comments + + for _, decl := range file.Decls { + fn, ok := decl.(*ast.FuncDecl) + if !ok || fn.Body == nil || fn.Type == nil || fn.Type.Params == nil { + continue + } + if hasDirective(fs, commentMap, commentGroups, fn) { + continue + } + + paramObjs := map[*ast.Object]string{} + for _, field := range fn.Type.Params.List { + if hasDirective(fs, commentMap, commentGroups, field) { + continue + } + if isSliceType(field.Type) { + for _, name := range field.Names { + if name != nil && name.Obj != nil { + paramObjs[name.Obj] = name.Name + } + } + } + } + if len(paramObjs) == 0 { + continue + } + + ast.Inspect(fn.Body, func(n ast.Node) bool { + switch node := n.(type) { + case *ast.ReturnStmt: + if hasDirective(fs, commentMap, commentGroups, node) { + return true + } + for _, result := range node.Results { + if ident, ok := result.(*ast.Ident); ok { + if pname, ok := paramObjs[ident.Obj]; ok { + pos := fs.Position(ident.Pos()) + findings = append(findings, finding{ + file: filename, + pos: pos, + fn: fn.Name.Name, + kind: "return", + detail: fmt.Sprintf("returns slice parameter %q", pname), + }) + } + } + } + case *ast.AssignStmt: + if hasDirective(fs, commentMap, commentGroups, node) { + return true + } + for i, rhsExpr := range node.Rhs { + if ident, ok := rhsExpr.(*ast.Ident); ok { + if pname, ok := paramObjs[ident.Obj]; ok && i < len(node.Lhs) { + pos := fs.Position(rhsExpr.Pos()) + lhsStr := exprString(node.Lhs[i]) + findings = append(findings, finding{ + file: filename, + pos: pos, + fn: fn.Name.Name, + kind: "assignment", + detail: fmt.Sprintf( + "assigns slice parameter %q to %s", + pname, + lhsStr, + ), + }) + } + } + } + case *ast.CompositeLit: + if hasDirective(fs, commentMap, commentGroups, node) { + return true + } + for _, elt := range node.Elts { + kv, ok := elt.(*ast.KeyValueExpr) + if !ok { + continue + } + if hasDirective(fs, commentMap, commentGroups, kv) { + continue + } + if ident, ok := kv.Value.(*ast.Ident); ok { + if pname, ok := paramObjs[ident.Obj]; ok { + pos := fs.Position(kv.Value.Pos()) + field := exprString(kv.Key) + findings = append(findings, finding{ + file: filename, + pos: pos, + fn: fn.Name.Name, + kind: "struct literal", + detail: fmt.Sprintf( + "sets field %s to slice parameter %q", + field, + pname, + ), + }) + } + } + } + } + return true + }) + } + + return findings +} + +func isSliceType(expr ast.Expr) bool { + switch t := expr.(type) { + case *ast.ArrayType: + return t.Len == nil + case *ast.Ellipsis: + return true + } + return false +} + +func hasDirective( + fs *token.FileSet, + cm ast.CommentMap, + groups []*ast.CommentGroup, + node ast.Node, +) bool { + if node == nil { + return false + } + if cm != nil { + if mapped, ok := cm[node]; ok { + if commentGroupHasDirective(mapped) { + return true + } + } + } + nodePos := fs.Position(node.Pos()) + for _, group := range groups { + for _, c := range group.List { + if !bytes.Contains([]byte(c.Text), []byte(allowDirective)) { + continue + } + commentPos := fs.Position(c.Slash) + if commentPos.Filename != nodePos.Filename { + continue + } + if commentPos.Line == nodePos.Line { + return true + } + if commentPos.Line+1 == nodePos.Line && commentPos.Column == 1 { + return true + } + } + } + return false +} + +func commentGroupHasDirective(groups []*ast.CommentGroup) bool { + for _, group := range groups { + for _, c := range group.List { + if bytes.Contains([]byte(c.Text), []byte(allowDirective)) { + return true + } + } + } + return false +} + +func exprString(expr ast.Expr) string { + if expr == nil { + return "" + } + var buf bytes.Buffer + if err := format.Node(&buf, token.NewFileSet(), expr); err != nil { + return "" + } + return buf.String() +} +func shouldIncludeFile(path string) bool { + if filepath.Ext(path) != ".go" { + return false + } + name := filepath.Base(path) + if strings.HasSuffix(name, "_test.go") { + return false + } + return true +} diff --git a/channel/channel.go b/channel/channel.go index e92995a..fd0e7c6 100644 --- a/channel/channel.go +++ b/channel/channel.go @@ -92,7 +92,7 @@ func (d *DoubleRatchetEncryptedChannel) EncryptTwoPartyMessage( ) (newRatchetState string, envelope *channel.P2PChannelEnvelope, err error) { stateAndMessage := generated.DoubleRatchetStateAndMessage{ RatchetState: ratchetState, - Message: message, + Message: message, // buildutils:allow-slice-alias this assignment is ephemeral } result := DoubleRatchetEncrypt(stateAndMessage) diff --git a/config/version.go b/config/version.go index e22811e..c2e15f3 100644 --- a/config/version.go +++ b/config/version.go @@ -43,7 +43,7 @@ func FormatVersion(version []byte) string { } func GetPatchNumber() byte { - return 0x0f + return 0x10 } func GetRCNumber() byte { diff --git a/consensus/helper/quorum_certificate.go b/consensus/helper/quorum_certificate.go index ae0b068..b68530f 100644 --- a/consensus/helper/quorum_certificate.go +++ b/consensus/helper/quorum_certificate.go @@ -110,7 +110,7 @@ func WithQCState[StateT models.Unique](state *models.State[StateT]) func(*TestQu func WithQCSigners(signerIndices []byte) func(*TestQuorumCertificate) { return func(qc *TestQuorumCertificate) { - qc.AggregatedSignature.(*TestAggregatedSignature).Bitmask = signerIndices + qc.AggregatedSignature.(*TestAggregatedSignature).Bitmask = signerIndices // buildutils:allow-slice-alias } } diff --git a/consensus/helper/state.go b/consensus/helper/state.go index 6939bae..85efdda 100644 --- a/consensus/helper/state.go +++ b/consensus/helper/state.go @@ -243,7 +243,7 @@ func WithParentState[StateT models.Unique](parent *models.State[StateT]) func(*m func WithParentSigners[StateT models.Unique](signerIndices []byte) func(*models.State[StateT]) { return func(state *models.State[StateT]) { - state.ParentQuorumCertificate.(*TestQuorumCertificate).AggregatedSignature.(*TestAggregatedSignature).Bitmask = signerIndices + state.ParentQuorumCertificate.(*TestQuorumCertificate).AggregatedSignature.(*TestAggregatedSignature).Bitmask = signerIndices // buildutils:allow-slice-alias } } diff --git a/consensus/helper/timeout_certificate.go b/consensus/helper/timeout_certificate.go index 90bef9f..a2e22c3 100644 --- a/consensus/helper/timeout_certificate.go +++ b/consensus/helper/timeout_certificate.go @@ -90,7 +90,7 @@ func WithTCNewestQC(qc models.QuorumCertificate) func(*TestTimeoutCertificate) { func WithTCSigners(signerIndices []byte) func(*TestTimeoutCertificate) { return func(tc *TestTimeoutCertificate) { - tc.AggregatedSignature.(*TestAggregatedSignature).Bitmask = signerIndices + tc.AggregatedSignature.(*TestAggregatedSignature).Bitmask = signerIndices // buildutils:allow-slice-alias } } @@ -102,7 +102,7 @@ func WithTCRank(rank uint64) func(*TestTimeoutCertificate) { func WithTCHighQCRanks(highQCRanks []uint64) func(*TestTimeoutCertificate) { return func(tc *TestTimeoutCertificate) { - tc.LatestRanks = highQCRanks + tc.LatestRanks = highQCRanks // buildutils:allow-slice-alias } } diff --git a/consensus/participant/participant.go b/consensus/participant/participant.go index b16d974..060c147 100644 --- a/consensus/participant/participant.go +++ b/consensus/participant/participant.go @@ -57,7 +57,7 @@ func NewParticipant[ livenessState, err := consensusStore.GetLivenessState(filter) if err != nil { livenessState = &models.LivenessState{ - Filter: filter, + Filter: filter, // buildutils:allow-slice-alias this value is static CurrentRank: 0, LatestQuorumCertificate: trustedRoot.CertifyingQuorumCertificate, PriorRankTimeoutCertificate: nil, diff --git a/consensus/signature/packer.go b/consensus/signature/packer.go index 7a93412..cffe214 100644 --- a/consensus/signature/packer.go +++ b/consensus/signature/packer.go @@ -68,7 +68,7 @@ func (p *ConsensusSigDataPacker) Unpack( sigData []byte, ) (*consensus.StateSignatureData, error) { return &consensus.StateSignatureData{ - Signers: signerIdentities, - Signature: sigData, + Signers: signerIdentities, // buildutils:allow-slice-alias + Signature: sigData, // buildutils:allow-slice-alias }, nil } diff --git a/consensus/signature/weighted_signature_aggregator.go b/consensus/signature/weighted_signature_aggregator.go index b58e1ad..6c19073 100644 --- a/consensus/signature/weighted_signature_aggregator.go +++ b/consensus/signature/weighted_signature_aggregator.go @@ -76,11 +76,11 @@ func NewWeightedSignatureAggregator( } return &WeightedSignatureAggregator{ - dsTag: dsTag, - ids: ids, + dsTag: dsTag, // buildutils:allow-slice-alias static value + ids: ids, // buildutils:allow-slice-alias dynamic value constructed by caller idToInfo: idToInfo, aggregator: aggregator, - message: message, + message: message, // buildutils:allow-slice-alias static value for call lifetime collectedSigs: make(map[models.Identity][]byte), }, nil } @@ -156,7 +156,7 @@ func (w *WeightedSignatureAggregator) TrustedAdd( ) } - w.collectedSigs[signerID] = sig + w.collectedSigs[signerID] = sig // buildutils:allow-slice-alias static value for call lifetime w.totalWeight += info.weight return w.totalWeight, nil diff --git a/consensus/timeoutcollector/aggregation.go b/consensus/timeoutcollector/aggregation.go index 78a8610..2cb4876 100644 --- a/consensus/timeoutcollector/aggregation.go +++ b/consensus/timeoutcollector/aggregation.go @@ -85,8 +85,8 @@ func NewTimeoutSignatureAggregator( return &TimeoutSignatureAggregator{ aggregator: aggregator, - filter: filter, - dsTag: dsTag, + filter: filter, // buildutils:allow-slice-alias static value + dsTag: dsTag, // buildutils:allow-slice-alias static value idToInfo: idToInfo, idToSignature: make(map[models.Identity]sigInfo), rank: rank, @@ -149,7 +149,7 @@ func (a *TimeoutSignatureAggregator) VerifyAndAdd( } a.idToSignature[signerID] = sigInfo{ - sig: sig, + sig: sig, // buildutils:allow-slice-alias static value for call lifetime newestQCRank: newestQCRank, } a.totalWeight += info.weight diff --git a/consensus/timeoutcollector/factory.go b/consensus/timeoutcollector/factory.go index b4ac96d..0691896 100644 --- a/consensus/timeoutcollector/factory.go +++ b/consensus/timeoutcollector/factory.go @@ -89,13 +89,13 @@ func NewTimeoutProcessorFactory[ ) *TimeoutProcessorFactory[StateT, VoteT, PeerIDT] { return &TimeoutProcessorFactory[StateT, VoteT, PeerIDT]{ tracer: tracer, - filter: filter, + filter: filter, // buildutils:allow-slice-alias static value aggregator: aggregator, committee: committee, notifier: notifier, validator: validator, voting: voting, - domainSeparationTag: domainSeparationTag, + domainSeparationTag: domainSeparationTag, // buildutils:allow-slice-alias static value } } diff --git a/consensus/votecollector/statemachine.go b/consensus/votecollector/statemachine.go index e314422..a00d668 100644 --- a/consensus/votecollector/statemachine.go +++ b/consensus/votecollector/statemachine.go @@ -124,12 +124,12 @@ func NewStateMachine[ ) *VoteCollector[StateT, VoteT, PeerIDT] { sm := &VoteCollector[StateT, VoteT, PeerIDT]{ tracer: tracer, - filter: filter, + filter: filter, // buildutils:allow-slice-alias static value workers: workers, notifier: notifier, createVerifyingProcessor: verifyingVoteProcessorFactory, votesCache: *NewVotesCache[VoteT](rank), - dsTag: dsTag, + dsTag: dsTag, // buildutils:allow-slice-alias static value aggregator: aggregator, voter: voter, } diff --git a/hypergraph/hypergraph.go b/hypergraph/hypergraph.go index cf86a11..7e64dd3 100644 --- a/hypergraph/hypergraph.go +++ b/hypergraph/hypergraph.go @@ -3,6 +3,7 @@ package hypergraph import ( "context" "math/big" + "slices" "sync" "github.com/pkg/errors" @@ -81,10 +82,10 @@ func NewHypergraph( hyperedgeRemoves: make(map[tries.ShardKey]hypergraph.IdSet), store: store, prover: prover, - coveredPrefix: coveredPrefix, + coveredPrefix: slices.Clone(coveredPrefix), authenticationProvider: authenticationProvider, syncController: hypergraph.NewSyncController(maxSyncSessions), - snapshotMgr: newSnapshotManager(logger), + snapshotMgr: newSnapshotManager(logger, store), } hg.publishSnapshot(nil) @@ -96,14 +97,7 @@ func (hg *HypergraphCRDT) publishSnapshot(root []byte) { return } hg.logger.Debug("publishing snapshot") - - snapshotStore, release, err := hg.store.NewSnapshot() - if err != nil { - hg.logger.Warn("unable to create hypergraph snapshot", zap.Error(err)) - return - } - - hg.snapshotMgr.publish(snapshotStore, release, root) + hg.snapshotMgr.publish(root) } func (hg *HypergraphCRDT) cloneSetWithStore( @@ -125,7 +119,7 @@ func (hg *HypergraphCRDT) SetShutdownContext(ctx context.Context) { go func() { select { case <-hg.shutdownCtx.Done(): - hg.snapshotMgr.release(hg.snapshotMgr.current) + hg.snapshotMgr.publish(nil) } }() } diff --git a/hypergraph/id_set.go b/hypergraph/id_set.go index c6cdec0..fb90ba5 100644 --- a/hypergraph/id_set.go +++ b/hypergraph/id_set.go @@ -40,7 +40,7 @@ func NewIdSet( Store: store, InclusionProver: prover, Root: root, - CoveredPrefix: coveredPrefix, + CoveredPrefix: slices.Clone(coveredPrefix), }, } } diff --git a/hypergraph/snapshot_manager.go b/hypergraph/snapshot_manager.go index 333ab7b..3ecd191 100644 --- a/hypergraph/snapshot_manager.go +++ b/hypergraph/snapshot_manager.go @@ -16,6 +16,7 @@ type snapshotHandle struct { release func() refs atomic.Int32 root []byte + key string branchCacheMu sync.RWMutex branchCache map[string]*protobufs.HypergraphComparisonResponse @@ -26,6 +27,7 @@ type snapshotHandle struct { } func newSnapshotHandle( + key string, store tries.TreeBackingStore, release func(), root []byte, @@ -36,6 +38,7 @@ func newSnapshotHandle( branchCache: make(map[string]*protobufs.HypergraphComparisonResponse), leafDataCache: make(map[string][]byte), leafCacheMiss: make(map[string]struct{}), + key: key, } if len(root) != 0 { h.root = append([]byte{}, root...) @@ -49,16 +52,20 @@ func (h *snapshotHandle) acquire() tries.TreeBackingStore { return h.store } -func (h *snapshotHandle) releaseRef(logger *zap.Logger) { +func (h *snapshotHandle) releaseRef(logger *zap.Logger) bool { if h == nil { - return + return false } - if h.refs.Add(-1) == 0 && h.release != nil { - if err := safeRelease(h.release); err != nil { - logger.Warn("failed to release hypergraph snapshot", zap.Error(err)) + if h.refs.Add(-1) == 0 { + if h.release != nil { + if err := safeRelease(h.release); err != nil { + logger.Warn("failed to release hypergraph snapshot", zap.Error(err)) + } } + return true } + return false } func (h *snapshotHandle) Store() tries.TreeBackingStore { @@ -112,6 +119,7 @@ func (h *snapshotHandle) getLeafData(key []byte) ([]byte, bool) { return data, ok } +// buildutils:allow-slice-alias data is already cloned for this func (h *snapshotHandle) storeLeafData(key []byte, data []byte) { if h == nil || len(data) == 0 { return @@ -146,53 +154,96 @@ func (h *snapshotHandle) isLeafMiss(key []byte) bool { type snapshotManager struct { logger *zap.Logger + store tries.TreeBackingStore mu sync.Mutex - current *snapshotHandle + root []byte + handles map[string]*snapshotHandle } -func newSnapshotManager(logger *zap.Logger) *snapshotManager { - return &snapshotManager{logger: logger} -} - -func (m *snapshotManager) publish( +func newSnapshotManager( + logger *zap.Logger, store tries.TreeBackingStore, - release func(), - root []byte, -) { +) *snapshotManager { + return &snapshotManager{ + logger: logger, + store: store, + handles: make(map[string]*snapshotHandle), + } +} + +func (m *snapshotManager) publish(root []byte) { m.mu.Lock() defer m.mu.Unlock() - handle := newSnapshotHandle(store, release, root) - prev := m.current - m.current = handle + for key, handle := range m.handles { + if handle != nil { + handle.releaseRef(m.logger) + } + delete(m.handles, key) + } - if prev != nil { - prev.releaseRef(m.logger) + m.root = nil + if len(root) != 0 { + m.root = append([]byte{}, root...) } rootHex := "" if len(root) != 0 { rootHex = hex.EncodeToString(root) } - m.logger.Debug("swapped snapshot", zap.String("root", rootHex)) + m.logger.Debug("reset snapshot state", zap.String("root", rootHex)) } -func (m *snapshotManager) acquire() *snapshotHandle { +func (m *snapshotManager) acquire( + shardKey tries.ShardKey, +) *snapshotHandle { + key := shardKeyString(shardKey) m.mu.Lock() defer m.mu.Unlock() - if m.current == nil { + if handle, ok := m.handles[key]; ok { + handle.acquire() + return handle + } + + if m.store == nil { return nil } - m.current.acquire() - return m.current + + storeSnapshot, release, err := m.store.NewShardSnapshot(shardKey) + if err != nil { + m.logger.Warn( + "failed to build shard snapshot", + zap.Error(err), + zap.String("shard_key", key), + ) + return nil + } + + handle := newSnapshotHandle(key, storeSnapshot, release, m.root) + m.handles[key] = handle + return handle } func (m *snapshotManager) release(handle *snapshotHandle) { if handle == nil { return } - handle.releaseRef(m.logger) + if !handle.releaseRef(m.logger) { + return + } + m.mu.Lock() + defer m.mu.Unlock() + if current, ok := m.handles[handle.key]; ok && current == handle { + delete(m.handles, handle.key) + } +} + +func shardKeyString(sk tries.ShardKey) string { + buf := make([]byte, 0, len(sk.L1)+len(sk.L2)) + buf = append(buf, sk.L1[:]...) + buf = append(buf, sk.L2[:]...) + return hex.EncodeToString(buf) } func safeRelease(fn func()) (err error) { diff --git a/hypergraph/sync.go b/hypergraph/sync.go index b58b72e..4dec1cf 100644 --- a/hypergraph/sync.go +++ b/hypergraph/sync.go @@ -60,31 +60,8 @@ func (hg *HypergraphCRDT) HyperStream( hg.syncController.EndSyncSession(peerKey) }() - snapshotStart := time.Now() - handle := hg.snapshotMgr.acquire() - if handle == nil { - return errors.New("hypergraph snapshot unavailable") - } - defer hg.snapshotMgr.release(handle) - sessionLogger.Debug( - "snapshot acquisition complete", - zap.Duration("duration", time.Since(snapshotStart)), - ) - - root := handle.Root() - if len(root) != 0 { - sessionLogger.Debug( - "acquired snapshot", - zap.String("root", hex.EncodeToString(root)), - ) - } else { - sessionLogger.Debug("acquired snapshot", zap.String("root", "")) - } - - snapshotStore := handle.Store() - syncStart := time.Now() - err = hg.syncTreeServer(ctx, stream, snapshotStore, root, sessionLogger, handle) + err = hg.syncTreeServer(ctx, stream, sessionLogger) sessionLogger.Info( "syncTreeServer completed", zap.Duration("sync_duration", time.Since(syncStart)), @@ -432,6 +409,14 @@ func toInt32Slice(s []int) []int32 { return o } +func toIntSlice(s []int32) []int { + o := []int{} + for _, p := range s { + o = append(o, int(p)) + } + return o +} + func isPrefix(prefix []int, path []int) bool { if len(prefix) > len(path) { return false @@ -531,10 +516,18 @@ type rawVertexSaver interface { ) error } +type vertexTreeDeleter interface { + DeleteVertexTree( + txn tries.TreeBackingStoreTransaction, + id []byte, + ) error +} + const ( leafAckMinTimeout = 30 * time.Second leafAckMaxTimeout = 10 * time.Minute leafAckPerLeafBudget = 20 * time.Millisecond // Generous budget for tree building overhead + pruneTxnChunk = 100 ) func leafAckTimeout(count uint64) time.Duration { @@ -555,6 +548,22 @@ func shouldUseRawSync(phaseSet protobufs.HypergraphPhaseSet) bool { return phaseSet == protobufs.HypergraphPhaseSet_HYPERGRAPH_PHASE_SET_VERTEX_ADDS } +func keyWithinCoveredPrefix(key []byte, prefix []int) bool { + if len(prefix) == 0 { + return true + } + path := tries.GetFullPath(key) + if len(path) < len(prefix) { + return false + } + for i, nib := range prefix { + if path[i] != nib { + return false + } + } + return true +} + // rawShardSync performs a full raw sync of all leaves from server to client. // This iterates directly over the database, bypassing in-memory tree caching // to ensure all leaves are sent even if the in-memory tree is stale. @@ -562,6 +571,7 @@ func (s *streamManager) rawShardSync( shardKey tries.ShardKey, phaseSet protobufs.HypergraphPhaseSet, incomingLeaves <-chan *protobufs.HypergraphComparison, + coveredPrefix []int32, ) error { shardHex := hex.EncodeToString(shardKey.L2[:]) s.logger.Info( @@ -569,6 +579,7 @@ func (s *streamManager) rawShardSync( zap.String("shard_key", shardHex), ) start := time.Now() + prefix := toIntSlice(coveredPrefix) // Determine set and phase type strings setType := string(hypergraph.VertexAtomType) @@ -608,7 +619,7 @@ func (s *streamManager) rawShardSync( // Skip non-leaf nodes (branches) continue } - if leaf != nil { + if leaf != nil && keyWithinCoveredPrefix(leaf.Key, prefix) { count++ } } @@ -653,6 +664,9 @@ func (s *streamManager) rawShardSync( if leaf == nil { continue } + if !keyWithinCoveredPrefix(leaf.Key, prefix) { + continue + } update := &protobufs.LeafData{ Key: leaf.Key, @@ -743,6 +757,7 @@ func (s *streamManager) receiveRawShardSync( var txn tries.TreeBackingStoreTransaction var processed uint64 + seenKeys := make(map[string]struct{}) for processed < expectedLeaves { if processed%100 == 0 { if txn != nil { @@ -796,6 +811,9 @@ func (s *streamManager) receiveRawShardSync( } } + // Track key so we can prune anything absent from the authoritative list. + seenKeys[string(append([]byte(nil), leafMsg.Key...))] = struct{}{} + // Use Add to properly build tree structure if err := s.localSet.Add(txn, theirs); err != nil { txn.Abort() @@ -823,6 +841,10 @@ func (s *streamManager) receiveRawShardSync( return errors.Wrap(err, "receive raw shard sync") } + if err := s.pruneRawSyncExtras(seenKeys); err != nil { + return errors.Wrap(err, "receive raw shard sync") + } + s.logger.Info( "CLIENT: raw shard sync completed", zap.Uint64("leaves_received", expectedLeaves), @@ -831,6 +853,92 @@ func (s *streamManager) receiveRawShardSync( return nil } +func (s *streamManager) pruneRawSyncExtras(seen map[string]struct{}) error { + start := time.Now() + setType := s.localTree.SetType + phaseType := s.localTree.PhaseType + shardKey := s.localTree.ShardKey + + iter, err := s.hypergraphStore.IterateRawLeaves(setType, phaseType, shardKey) + if err != nil { + return errors.Wrap(err, "prune raw sync extras: iterator") + } + defer iter.Close() + + var txn tries.TreeBackingStoreTransaction + var pruned uint64 + + commitTxn := func() error { + if txn == nil { + return nil + } + if err := txn.Commit(); err != nil { + txn.Abort() + return err + } + txn = nil + return nil + } + + for valid := iter.First(); valid; valid = iter.Next() { + leaf, err := iter.Leaf() + if err != nil || leaf == nil { + continue + } + if _, ok := seen[string(leaf.Key)]; ok { + continue + } + + if txn == nil { + txn, err = s.hypergraphStore.NewTransaction(false) + if err != nil { + return errors.Wrap(err, "prune raw sync extras") + } + } + + atom := AtomFromBytes(leaf.Value) + if atom == nil { + s.logger.Warn("CLIENT: skipping stale leaf with invalid atom", zap.String("key", hex.EncodeToString(leaf.Key))) + continue + } + + if err := s.localSet.Delete(txn, atom); err != nil { + txn.Abort() + return errors.Wrap(err, "prune raw sync extras") + } + if err := s.deleteVertexTreeIfNeeded(txn, atom, leaf.Key); err != nil { + txn.Abort() + return errors.Wrap(err, "prune raw sync extras") + } + + pruned++ + if pruned%pruneTxnChunk == 0 { + if err := commitTxn(); err != nil { + return errors.Wrap(err, "prune raw sync extras") + } + } + } + + if err := commitTxn(); err != nil { + return errors.Wrap(err, "prune raw sync extras") + } + + if pruned > 0 { + s.logger.Info( + "CLIENT: pruned stale leaves after raw sync", + zap.Uint64("count", pruned), + zap.Duration("duration", time.Since(start)), + ) + } else { + s.logger.Info( + "CLIENT: no stale leaves found after raw sync", + zap.Duration("duration", time.Since(start)), + ) + } + + return nil +} + func (s *streamManager) awaitRawLeafMetadata( incomingLeaves <-chan *protobufs.HypergraphComparison, ) (uint64, error) { @@ -1279,7 +1387,7 @@ func getBranchInfoFromTree( ) if node == nil { return &protobufs.HypergraphComparisonResponse{ - Path: path, + Path: path, // buildutils:allow-slice-alias this assignment is ephemeral Commitment: []byte{}, IsRoot: len(path) == 0, }, nil @@ -1293,7 +1401,7 @@ func getBranchInfoFromTree( node = ensureCommittedNode(logger, tree, intpath, node) branchInfo := &protobufs.HypergraphComparisonResponse{ - Path: path, + Path: path, // buildutils:allow-slice-alias this assignment is ephemeral IsRoot: len(path) == 0, } @@ -1423,7 +1531,7 @@ func (s *streamManager) queryNext( if err := s.stream.Send(&protobufs.HypergraphComparison{ Payload: &protobufs.HypergraphComparison_Query{ Query: &protobufs.HypergraphComparisonQuery{ - Path: path, + Path: path, // buildutils:allow-slice-alias this assignment is ephemeral IncludeLeafData: true, }, }, @@ -1591,6 +1699,134 @@ func (s *streamManager) handleLeafData( return nil } +func (s *streamManager) deleteVertexTreeIfNeeded( + txn tries.TreeBackingStoreTransaction, + atom hypergraph.Atom, + key []byte, +) error { + if atom == nil || atom.GetAtomType() != hypergraph.VertexAtomType { + return nil + } + + deleter, ok := s.hypergraphStore.(vertexTreeDeleter) + if !ok { + return nil + } + + return deleter.DeleteVertexTree(txn, key) +} + +func (s *streamManager) pruneLocalSubtree(path []int32) (uint64, error) { + start := time.Now() + pathHex := hex.EncodeToString(packPath(path)) + s.logger.Info( + "CLIENT: pruning subtree", + zap.String("path", pathHex), + ) + + intPath := make([]int, len(path)) + for i, nib := range path { + intPath[i] = int(nib) + } + + node, err := s.localTree.GetByPath(intPath) + if err != nil { + return 0, errors.Wrap(err, "prune local subtree") + } + + if node == nil { + s.logger.Debug( + "CLIENT: prune skipped, node missing", + zap.String("path", pathHex), + ) + return 0, nil + } + + leaves := []*tries.LazyVectorCommitmentLeafNode{} + if leaf, ok := node.(*tries.LazyVectorCommitmentLeafNode); ok { + leaves = append(leaves, leaf) + } else { + gathered := tries.GetAllLeaves( + s.localTree.SetType, + s.localTree.PhaseType, + s.localTree.ShardKey, + node, + ) + for _, leaf := range gathered { + if leaf == nil { + continue + } + leaves = append(leaves, leaf) + } + } + + if len(leaves) == 0 { + s.logger.Debug( + "CLIENT: prune skipped, no leaves", + zap.String("path", pathHex), + ) + return 0, nil + } + + var txn tries.TreeBackingStoreTransaction + var pruned uint64 + + commitTxn := func() error { + if txn == nil { + return nil + } + if err := txn.Commit(); err != nil { + txn.Abort() + return err + } + txn = nil + return nil + } + + for idx, leaf := range leaves { + if idx%pruneTxnChunk == 0 { + if err := commitTxn(); err != nil { + return pruned, errors.Wrap(err, "prune local subtree") + } + txn, err = s.hypergraphStore.NewTransaction(false) + if err != nil { + return pruned, errors.Wrap(err, "prune local subtree") + } + } + + atom := AtomFromBytes(leaf.Value) + if atom == nil { + txn.Abort() + return pruned, errors.Wrap(errors.New("invalid atom payload"), "prune local subtree") + } + + if err := s.localSet.Delete(txn, atom); err != nil { + txn.Abort() + return pruned, errors.Wrap(err, "prune local subtree") + } + + if err := s.deleteVertexTreeIfNeeded(txn, atom, leaf.Key); err != nil { + txn.Abort() + return pruned, errors.Wrap(err, "prune local subtree") + } + + pruned++ + } + + if err := commitTxn(); err != nil { + return pruned, errors.Wrap(err, "prune local subtree") + } + + s.logger.Info( + "CLIENT: pruned local subtree", + zap.String("path", pathHex), + zap.Uint64("leaf_count", pruned), + zap.Duration("duration", time.Since(start)), + ) + + return pruned, nil +} + func (s *streamManager) persistLeafTree( txn tries.TreeBackingStoreTransaction, update *protobufs.LeafData, @@ -1792,19 +2028,6 @@ func (s *streamManager) walk( shardKey tries.ShardKey, phaseSet protobufs.HypergraphPhaseSet, ) error { - // Check if we should use raw sync mode for this phase set - if init && shouldUseRawSync(phaseSet) { - s.logger.Info( - "walk: using raw sync mode", - zap.Bool("is_server", isServer), - zap.Int("phase_set", int(phaseSet)), - ) - if isServer { - return s.rawShardSync(shardKey, phaseSet, incomingLeaves) - } - return s.receiveRawShardSync(incomingLeaves) - } - select { case <-s.ctx.Done(): return s.ctx.Err() @@ -1822,6 +2045,19 @@ func (s *streamManager) walk( return nil } + // Check if we should use raw sync mode for this phase set + if init && shouldUseRawSync(phaseSet) { + s.logger.Info( + "walk: using raw sync mode", + zap.Bool("is_server", isServer), + zap.Int("phase_set", int(phaseSet)), + ) + if isServer { + return s.rawShardSync(shardKey, phaseSet, incomingLeaves, path) + } + return s.receiveRawShardSync(incomingLeaves) + } + if isLeaf(lnode) && isLeaf(rnode) && !init { return nil } @@ -1883,7 +2119,7 @@ func (s *streamManager) walk( ) return errors.Wrap(err, "walk") } else { - err := s.handleLeafData(incomingLeaves) + _, err := s.pruneLocalSubtree(lpref) return errors.Wrap(err, "walk") } } @@ -1938,27 +2174,16 @@ func (s *streamManager) walk( } } } else { - // s.logger.Debug( - // "known missing branch", - // zap.String( - // "path", - // hex.EncodeToString( - // packPath( - // append(append([]int32{}, preTraversal...), child.Index), - // ), - // ), - // ), - // ) + missingPath := append(append([]int32{}, preTraversal...), child.Index) if isServer { if err := s.sendLeafData( - append(append([]int32{}, preTraversal...), child.Index), + missingPath, incomingLeaves, ); err != nil { return errors.Wrap(err, "walk") } } else { - err := s.handleLeafData(incomingLeaves) - if err != nil { + if _, err := s.pruneLocalSubtree(missingPath); err != nil { return errors.Wrap(err, "walk") } } @@ -2017,6 +2242,10 @@ func (s *streamManager) walk( ); err != nil { return errors.Wrap(err, "walk") } + } else { + if _, err := s.pruneLocalSubtree(nextPath); err != nil { + return errors.Wrap(err, "walk") + } } } if rchild != nil { @@ -2047,8 +2276,7 @@ func (s *streamManager) walk( return errors.Wrap(err, "walk") } } else { - err := s.handleLeafData(incomingLeaves) - if err != nil { + if _, err := s.pruneLocalSubtree(nextPath); err != nil { return errors.Wrap(err, "walk") } } @@ -2099,23 +2327,12 @@ func (s *streamManager) walk( func (hg *HypergraphCRDT) syncTreeServer( ctx context.Context, stream protobufs.HypergraphComparisonService_HyperStreamServer, - snapshotStore tries.TreeBackingStore, - snapshotRoot []byte, sessionLogger *zap.Logger, - handle *snapshotHandle, ) error { logger := sessionLogger if logger == nil { logger = hg.logger } - if len(snapshotRoot) != 0 { - logger.Info( - "syncing with snapshot", - zap.String("root", hex.EncodeToString(snapshotRoot)), - ) - } else { - logger.Info("syncing with snapshot", zap.String("root", "")) - } msg, err := stream.Recv() if err != nil { @@ -2137,6 +2354,29 @@ func (hg *HypergraphCRDT) syncTreeServer( L2: [32]byte(query.ShardKey[3:]), } + snapshotStart := time.Now() + handle := hg.snapshotMgr.acquire(shardKey) + if handle == nil { + return errors.New("hypergraph shard snapshot unavailable") + } + defer hg.snapshotMgr.release(handle) + logger.Debug( + "snapshot acquisition complete", + zap.Duration("duration", time.Since(snapshotStart)), + ) + + snapshotRoot := handle.Root() + if len(snapshotRoot) != 0 { + logger.Info( + "syncing with snapshot", + zap.String("root", hex.EncodeToString(snapshotRoot)), + ) + } else { + logger.Info("syncing with snapshot", zap.String("root", "")) + } + + snapshotStore := handle.Store() + idSet := hg.snapshotPhaseSet(shardKey, query.PhaseSet, snapshotStore) if idSet == nil { return errors.New("unsupported phase set") diff --git a/node/app/db_console.go b/node/app/db_console.go index 3897eee..115d312 100644 --- a/node/app/db_console.go +++ b/node/app/db_console.go @@ -537,7 +537,7 @@ func newModel(config *config.Config, specs []tailSpec) model { return model{ console: consoleModel(config, false), config: config, - tabs: specs, + tabs: specs, // buildutils:allow-slice-alias mutation is not an issue active: 0, vps: vps, offsets: offsets, diff --git a/node/consensus/aggregator/consensus_signature_aggregator_wrapper.go b/node/consensus/aggregator/consensus_signature_aggregator_wrapper.go index 7bd5e73..12bd189 100644 --- a/node/consensus/aggregator/consensus_signature_aggregator_wrapper.go +++ b/node/consensus/aggregator/consensus_signature_aggregator_wrapper.go @@ -47,7 +47,7 @@ func (c *ConsensusSignatureAggregatorWrapper) Aggregate( noextSigs = append(noextSigs, s[:74]) } } else { - noextSigs = signatures + noextSigs = signatures // buildutils:allow-slice-alias slice will not mutate } output, err := c.blsConstructor.Aggregate( @@ -71,14 +71,16 @@ func (c *ConsensusSignatureAggregatorWrapper) Aggregate( bitmask := make([]byte, (len(provers)+7)/8) extra := []byte{} if len(c.filter) != 0 { - extra = make([]byte, 516*len(provers)) + extra = make([]byte, 516*(len(provers)-1)) } + adj := 0 for i, p := range provers { if j, ok := pubs[string(p.PublicKey)]; ok { bitmask[i/8] |= (1 << (i % 8)) - if len(c.filter) != 0 { - copy(extra[516*i:516*(i+1)], signatures[j][74:]) + if len(c.filter) != 0 && len(signatures[j]) > 74 { + copy(extra[516*adj:516*(adj+1)], signatures[j][74:]) + adj++ } } } diff --git a/node/consensus/app/app_consensus_engine.go b/node/consensus/app/app_consensus_engine.go index f45950e..0c400ab 100644 --- a/node/consensus/app/app_consensus_engine.go +++ b/node/consensus/app/app_consensus_engine.go @@ -10,6 +10,7 @@ import ( "slices" "strings" "sync" + "sync/atomic" "time" "github.com/iden3/go-iden3-crypto/poseidon" @@ -52,6 +53,7 @@ import ( typesconsensus "source.quilibrium.com/quilibrium/monorepo/types/consensus" "source.quilibrium.com/quilibrium/monorepo/types/crypto" "source.quilibrium.com/quilibrium/monorepo/types/execution" + "source.quilibrium.com/quilibrium/monorepo/types/execution/intrinsics" "source.quilibrium.com/quilibrium/monorepo/types/execution/state" "source.quilibrium.com/quilibrium/monorepo/types/hypergraph" tkeys "source.quilibrium.com/quilibrium/monorepo/types/keys" @@ -99,47 +101,59 @@ type AppConsensusEngine struct { *protobufs.AppShardFrame, *protobufs.ProposalVote, ] - encryptedChannel channel.EncryptedChannel - dispatchService *dispatch.DispatchService - blsConstructor crypto.BlsConstructor - minimumProvers func() uint64 - executors map[string]execution.ShardExecutionEngine - executorsMu sync.RWMutex - executionManager *manager.ExecutionEngineManager - peerInfoManager tp2p.PeerInfoManager - currentDifficulty uint32 - currentDifficultyMu sync.RWMutex - messageCollectors *keyedaggregator.SequencedCollectors[sequencedAppMessage] - messageAggregator *keyedaggregator.SequencedAggregator[sequencedAppMessage] - appMessageSpillover map[uint64][]*protobufs.Message - appSpilloverMu sync.Mutex - lastProposalRank uint64 - lastProposalRankMu sync.RWMutex - collectedMessages []*protobufs.Message - collectedMessagesMu sync.RWMutex - provingMessages []*protobufs.Message - provingMessagesMu sync.RWMutex - lastProvenFrameTime time.Time - lastProvenFrameTimeMu sync.RWMutex - frameStore map[string]*protobufs.AppShardFrame - frameStoreMu sync.RWMutex - proposalCache map[uint64]*protobufs.AppShardProposal - proposalCacheMu sync.RWMutex - pendingCertifiedParents map[uint64]*protobufs.AppShardProposal - pendingCertifiedParentsMu sync.RWMutex - proofCache map[uint64][516]byte - proofCacheMu sync.RWMutex - ctx lifecycle.SignalerContext - cancel context.CancelFunc - quit chan struct{} - frameChainChecker *AppFrameChainChecker - canRunStandalone bool - blacklistMap map[string]bool - currentRank uint64 - alertPublicKey []byte - peerAuthCache map[string]time.Time - peerAuthCacheMu sync.RWMutex - proverAddress []byte + encryptedChannel channel.EncryptedChannel + dispatchService *dispatch.DispatchService + blsConstructor crypto.BlsConstructor + minimumProvers func() uint64 + executors map[string]execution.ShardExecutionEngine + executorsMu sync.RWMutex + executionManager *manager.ExecutionEngineManager + peerInfoManager tp2p.PeerInfoManager + currentDifficulty uint32 + currentDifficultyMu sync.RWMutex + messageCollectors *keyedaggregator.SequencedCollectors[sequencedAppMessage] + messageAggregator *keyedaggregator.SequencedAggregator[sequencedAppMessage] + appMessageSpillover map[uint64][]*protobufs.Message + appSpilloverMu sync.Mutex + lastProposalRank uint64 + lastProposalRankMu sync.RWMutex + collectedMessages []*protobufs.Message + collectedMessagesMu sync.RWMutex + provingMessages []*protobufs.Message + provingMessagesMu sync.RWMutex + lastProvenFrameTime time.Time + lastProvenFrameTimeMu sync.RWMutex + frameStore map[string]*protobufs.AppShardFrame + frameStoreMu sync.RWMutex + proposalCache map[uint64]*protobufs.AppShardProposal + proposalCacheMu sync.RWMutex + pendingCertifiedParents map[uint64]*protobufs.AppShardProposal + pendingCertifiedParentsMu sync.RWMutex + proofCache map[uint64][516]byte + proofCacheMu sync.RWMutex + ctx lifecycle.SignalerContext + cancel context.CancelFunc + quit chan struct{} + frameChainChecker *AppFrameChainChecker + canRunStandalone bool + blacklistMap map[string]bool + currentRank uint64 + alertPublicKey []byte + peerAuthCache map[string]time.Time + peerAuthCacheMu sync.RWMutex + peerInfoDigestCache map[string]struct{} + peerInfoDigestCacheMu sync.Mutex + keyRegistryDigestCache map[string]struct{} + keyRegistryDigestCacheMu sync.Mutex + proverAddress []byte + lowCoverageStreak map[string]*coverageStreak + coverageOnce sync.Once + coverageMinProvers uint64 + coverageHaltThreshold uint64 + coverageHaltGrace uint64 + globalProverRootVerifiedFrame atomic.Uint64 + globalProverRootSynced atomic.Bool + globalProverSyncInProgress atomic.Bool // Message queues consensusMessageQueue chan *pb.Message @@ -230,7 +244,7 @@ func NewAppConsensusEngine( engine := &AppConsensusEngine{ logger: logger, config: config, - appAddress: appAddress, + appAddress: appAddress, // buildutils:allow-slice-alias slice is static appFilter: appFilter, appAddressHex: hex.EncodeToString(appAddress), pubsub: ps, @@ -278,6 +292,8 @@ func NewAppConsensusEngine( blacklistMap: make(map[string]bool), alertPublicKey: []byte{}, peerAuthCache: make(map[string]time.Time), + peerInfoDigestCache: make(map[string]struct{}), + keyRegistryDigestCache: make(map[string]struct{}), } engine.frameChainChecker = NewAppFrameChainChecker(clockStore, logger, appAddress) @@ -402,6 +418,10 @@ func NewAppConsensusEngine( } engine.executionManager = executionManager + if err := engine.ensureGlobalGenesis(); err != nil { + return nil, errors.Wrap(err, "new app consensus engine") + } + // Create dispatch service engine.dispatchService = dispatch.NewDispatchService( inboxStore, @@ -475,6 +495,7 @@ func NewAppConsensusEngine( componentBuilder.AddWorker(engine.executionManager.Start) componentBuilder.AddWorker(engine.eventDistributor.Start) componentBuilder.AddWorker(engine.appTimeReel.Start) + componentBuilder.AddWorker(engine.globalTimeReel.Start) componentBuilder.AddWorker(engine.startAppMessageAggregator) latest, err := engine.consensusStore.GetConsensusState(engine.appAddress) @@ -483,7 +504,7 @@ func NewAppConsensusEngine( *protobufs.AppShardFrame, *protobufs.ProposalVote, ] - if err != nil { + initializeCertifiedGenesis := func() { frame, qc := engine.initializeGenesis() state = &models.CertifiedState[*protobufs.AppShardFrame]{ State: &models.State[*protobufs.AppShardFrame]{ @@ -493,44 +514,53 @@ func NewAppConsensusEngine( }, CertifyingQuorumCertificate: qc, } + pending = nil + } + + if err != nil { + initializeCertifiedGenesis() } else { - qc, err := engine.clockStore.GetQuorumCertificate(nil, latest.FinalizedRank) - if err != nil { - panic(err) - } - frame, _, err := engine.clockStore.GetShardClockFrame( + qc, err := engine.clockStore.GetQuorumCertificate( engine.appAddress, - qc.GetFrameNumber(), - false, + latest.FinalizedRank, ) - if err != nil { - panic(err) + if err != nil || qc.GetFrameNumber() == 0 { + initializeCertifiedGenesis() + } else { + frame, _, err := engine.clockStore.GetShardClockFrame( + engine.appAddress, + qc.GetFrameNumber(), + false, + ) + if err != nil { + panic(err) + } + parentFrame, err := engine.clockStore.GetGlobalClockFrame( + qc.GetFrameNumber() - 1, + ) + if err != nil { + panic(err) + } + parentQC, err := engine.clockStore.GetQuorumCertificate( + engine.appAddress, + parentFrame.GetRank(), + ) + if err != nil { + panic(err) + } + state = &models.CertifiedState[*protobufs.AppShardFrame]{ + State: &models.State[*protobufs.AppShardFrame]{ + Rank: frame.GetRank(), + Identifier: frame.Identity(), + ProposerID: frame.Source(), + ParentQuorumCertificate: parentQC, + Timestamp: frame.GetTimestamp(), + State: &frame, + }, + CertifyingQuorumCertificate: qc, + } + pending = engine.getPendingProposals(frame.Header.FrameNumber) } - parentFrame, err := engine.clockStore.GetGlobalClockFrame( - qc.GetFrameNumber() - 1, - ) - if err != nil { - panic(err) - } - parentQC, err := engine.clockStore.GetQuorumCertificate( - nil, - parentFrame.GetRank(), - ) - if err != nil { - panic(err) - } - state = &models.CertifiedState[*protobufs.AppShardFrame]{ - State: &models.State[*protobufs.AppShardFrame]{ - Rank: frame.GetRank(), - Identifier: frame.Identity(), - ProposerID: frame.Source(), - ParentQuorumCertificate: parentQC, - Timestamp: frame.GetTimestamp(), - State: &frame, - }, - CertifyingQuorumCertificate: qc, - } - pending = engine.getPendingProposals(frame.Header.FrameNumber) } engine.recordProposalRank(state.Rank()) @@ -578,6 +608,22 @@ func NewAppConsensusEngine( } engine.forks = forks + dbConfig := config.DB.WithDefaults() + dbPath := dbConfig.Path + if engine.coreId > 0 { + if len(dbConfig.WorkerPaths) >= int(engine.coreId) { + dbPath = dbConfig.WorkerPaths[engine.coreId-1] + } else if dbConfig.WorkerPathPrefix != "" { + dbPath = fmt.Sprintf(dbConfig.WorkerPathPrefix, engine.coreId) + } + } + + appSyncHooks := qsync.NewAppSyncHooks( + appAddress, + dbPath, + config.P2P.Network, + ) + engine.syncProvider = qsync.NewSyncProvider[ *protobufs.AppShardFrame, *protobufs.AppShardProposal, @@ -599,6 +645,7 @@ func NewAppConsensusEngine( config, appAddress, engine.proverAddress, + appSyncHooks, ) // Add sync provider @@ -609,6 +656,11 @@ func NewAppConsensusEngine( ctx lifecycle.SignalerContext, ready lifecycle.ReadyFunc, ) { + if err := engine.waitForProverRegistration(ctx); err != nil { + engine.logger.Error("prover unavailable", zap.Error(err)) + ctx.Throw(err) + return + } if err := engine.startConsensus(state, pending, ctx, ready); err != nil { ctx.Throw(err) return @@ -785,6 +837,119 @@ func (e *AppConsensusEngine) Stop(force bool) <-chan error { return errChan } +func (e *AppConsensusEngine) handleGlobalProverRoot( + frame *protobufs.GlobalFrame, +) { + if frame == nil || frame.Header == nil { + return + } + + frameNumber := frame.Header.FrameNumber + expectedProverRoot := frame.Header.ProverTreeCommitment + + localRoot, err := e.computeLocalGlobalProverRoot(frameNumber) + if err != nil { + e.logger.Warn( + "failed to compute local global prover root", + zap.Uint64("frame_number", frameNumber), + zap.Error(err), + ) + e.globalProverRootSynced.Store(false) + e.globalProverRootVerifiedFrame.Store(0) + e.triggerGlobalHypersync(frame.Header.Prover) + return + } + + if len(localRoot) == 0 || len(expectedProverRoot) == 0 { + return + } + + if !bytes.Equal(localRoot, expectedProverRoot) { + e.logger.Warn( + "global prover root mismatch", + zap.Uint64("frame_number", frameNumber), + zap.String("expected_root", hex.EncodeToString(expectedProverRoot)), + zap.String("local_root", hex.EncodeToString(localRoot)), + ) + e.globalProverRootSynced.Store(false) + e.globalProverRootVerifiedFrame.Store(0) + e.triggerGlobalHypersync(frame.Header.Prover) + return + } + + prev := e.globalProverRootVerifiedFrame.Load() + if prev >= frameNumber { + return + } + + e.globalProverRootSynced.Store(true) + e.globalProverRootVerifiedFrame.Store(frameNumber) + + if err := e.proverRegistry.Refresh(); err != nil { + e.logger.Warn("failed to refresh prover registry", zap.Error(err)) + } +} + +func (e *AppConsensusEngine) computeLocalGlobalProverRoot( + frameNumber uint64, +) ([]byte, error) { + if e.hypergraph == nil { + return nil, errors.New("hypergraph unavailable") + } + + commitSet, err := e.hypergraph.Commit(frameNumber) + if err != nil { + return nil, errors.Wrap(err, "compute global prover root") + } + + var zeroShardKey tries.ShardKey + for shardKey, phaseCommits := range commitSet { + if shardKey.L1 == zeroShardKey.L1 { + if len(phaseCommits) == 0 || len(phaseCommits[0]) == 0 { + return nil, errors.New("empty global prover root commitment") + } + return slices.Clone(phaseCommits[0]), nil + } + } + + return nil, errors.New("global prover root shard missing") +} + +func (e *AppConsensusEngine) triggerGlobalHypersync(proposer []byte) { + if e.syncProvider == nil || len(proposer) == 0 { + e.logger.Debug("no sync provider or proposer for hypersync") + return + } + if bytes.Equal(proposer, e.proverAddress) { + e.logger.Debug("proposer matches local prover, skipping hypersync") + return + } + if !e.globalProverSyncInProgress.CompareAndSwap(false, true) { + e.logger.Debug("global hypersync already running") + return + } + + go func() { + defer e.globalProverSyncInProgress.Store(false) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + shardKey := tries.ShardKey{ + L1: [3]byte{0x00, 0x00, 0x00}, + L2: intrinsics.GLOBAL_INTRINSIC_ADDRESS, + } + + e.syncProvider.HyperSync(ctx, proposer, shardKey, nil) + if err := e.proverRegistry.Refresh(); err != nil { + e.logger.Warn( + "failed to refresh prover registry after hypersync", + zap.Error(err), + ) + } + }() +} + func (e *AppConsensusEngine) GetFrame() *protobufs.AppShardFrame { frame, _, _ := e.clockStore.GetLatestShardClockFrame(e.appAddress) return frame @@ -1296,7 +1461,7 @@ func (e *AppConsensusEngine) initializeGenesis() ( } genesisQC := &protobufs.QuorumCertificate{ Rank: 0, - Filter: e.appFilter, + Filter: e.appAddress, FrameNumber: genesisFrame.Header.FrameNumber, Selector: []byte(genesisFrame.Identity()), Timestamp: 0, @@ -1347,6 +1512,89 @@ func (e *AppConsensusEngine) initializeGenesis() ( return genesisFrame, genesisQC } +func (e *AppConsensusEngine) ensureGlobalGenesis() error { + genesisFrameNumber := global.ExpectedGenesisFrameNumber(e.config, e.logger) + _, err := e.clockStore.GetGlobalClockFrame(genesisFrameNumber) + if err == nil { + return nil + } + if !errors.Is(err, store.ErrNotFound) { + return errors.Wrap(err, "ensure global genesis") + } + + e.logger.Info("global genesis missing, initializing") + _, _, initErr := global.InitializeGenesisState( + e.logger, + e.config, + e.clockStore, + e.shardsStore, + e.hypergraph, + e.consensusStore, + e.inclusionProver, + e.keyManager, + e.proverRegistry, + ) + if initErr != nil { + return errors.Wrap(initErr, "ensure global genesis") + } + return nil +} + +func (e *AppConsensusEngine) ensureAppGenesis() error { + _, _, err := e.clockStore.GetShardClockFrame( + e.appAddress, + 0, + false, + ) + if err == nil { + return nil + } + if !errors.Is(err, store.ErrNotFound) { + return errors.Wrap(err, "ensure app genesis") + } + e.logger.Info( + "app shard genesis missing, initializing", + zap.String("shard_address", hex.EncodeToString(e.appAddress)), + ) + _, _ = e.initializeGenesis() + return nil +} + +func (e *AppConsensusEngine) waitForProverRegistration( + ctx lifecycle.SignalerContext, +) error { + logger := e.logger.With(zap.String("shard_address", e.appAddressHex)) + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + provers, err := e.proverRegistry.GetActiveProvers(e.appAddress) + if err != nil { + logger.Warn("could not query prover registry", zap.Error(err)) + } else { + for _, prover := range provers { + if bytes.Equal(prover.Address, e.proverAddress) { + logger.Info("prover present in registry, starting consensus") + return nil + } + } + logger.Info("waiting for prover registration") + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + } + } +} + // adjustFeeForTraffic calculates a traffic-adjusted fee multiplier based on // frame timing func (e *AppConsensusEngine) adjustFeeForTraffic(baseFee uint64) uint64 { @@ -1518,7 +1766,7 @@ func (e *AppConsensusEngine) internalProveFrame( timestamp := time.Now().UnixMilli() difficulty := e.difficultyAdjuster.GetNextDifficulty( - previousFrame.GetRank()+1, + previousFrame.GetFrameNumber()+1, timestamp, ) @@ -1795,38 +2043,6 @@ func (e *AppConsensusEngine) OnOwnProposal( proposal.PreviousRankTimeoutCertificate.(*protobufs.TimeoutCertificate) } - provers, err := e.proverRegistry.GetActiveProvers(e.appAddress) - if err != nil { - e.logger.Error("could not get provers", zap.Error(err)) - return - } - - var signingProverPubKey []byte - var signingProverIndex int - for i, prover := range provers { - if bytes.Equal( - prover.Address, - (*proposal.Vote).PublicKeySignatureBls48581.Address, - ) { - signingProverIndex = i - signingProverPubKey = prover.PublicKey - break - } - } - - bitmask := make([]byte, (len(provers)+7)/8) - bitmask[signingProverIndex/8] = 1 << (signingProverIndex % 8) - - // Manually override the signature as the vdf prover's signature is invalid - (*proposal.State.State).Header.PublicKeySignatureBls48581 = - &protobufs.BLS48581AggregateSignature{ - PublicKey: &protobufs.BLS48581G2PublicKey{ - KeyValue: signingProverPubKey, - }, - Signature: (*proposal.Vote).PublicKeySignatureBls48581.Signature, - Bitmask: bitmask, - } - pbProposal := &protobufs.AppShardProposal{ State: *proposal.State.State, ParentQuorumCertificate: proposal.Proposal.State.ParentQuorumCertificate.(*protobufs.QuorumCertificate), @@ -2166,7 +2382,9 @@ func (e *AppConsensusEngine) OnQuorumCertificateTriggeredRankChange( if bytes.Equal(info[i].Address, e.getProverAddress()) { myIndex = i } - ids = append(ids, info[i].Address) + if !bytes.Equal([]byte(nextLeader), info[i].Address) { + ids = append(ids, info[i].Address) + } } if myIndex == -1 { @@ -2525,7 +2743,7 @@ func (e *AppConsensusEngine) VerifyQuorumCertificate( // genesis qc is special: if quorumCertificate.GetRank() == 0 { - genqc, err := e.clockStore.GetQuorumCertificate(nil, 0) + genqc, err := e.clockStore.GetQuorumCertificate(e.appAddress, 0) if err != nil { return errors.Wrap(err, "verify quorum certificate") } @@ -2567,7 +2785,7 @@ func (e *AppConsensusEngine) VerifyQuorumCertificate( if valid := e.blsConstructor.VerifySignatureRaw( qc.AggregateSignature.GetPubKey(), qc.AggregateSignature.GetSignature(), - verification.MakeVoteMessage(nil, qc.Rank, qc.Identity()), + verification.MakeVoteMessage(e.appAddress, qc.Rank, qc.Identity()), slices.Concat([]byte("appshard"), e.appAddress), ); !valid { return models.ErrInvalidSignature @@ -2612,7 +2830,7 @@ func (e *AppConsensusEngine) VerifyTimeoutCertificate( pubkeys = append(pubkeys, prover.PublicKey) signatures = append(signatures, tc.AggregateSignature.GetSignature()) messages = append(messages, verification.MakeTimeoutMessage( - nil, + e.appAddress, tc.Rank, tc.LatestRanks[idx], )) @@ -2678,7 +2896,7 @@ func (e *AppConsensusEngine) VerifyVote( if valid := e.blsConstructor.VerifySignatureRaw( pubkey, (*vote).PublicKeySignatureBls48581.Signature[:74], - verification.MakeVoteMessage(nil, (*vote).Rank, (*vote).Source()), + verification.MakeVoteMessage(e.appAddress, (*vote).Rank, (*vote).Source()), slices.Concat([]byte("appshard"), e.appAddress), ); !valid { return models.ErrInvalidSignature diff --git a/node/consensus/app/app_consensus_engine_chaos_integration_test.go b/node/consensus/app/app_consensus_engine_chaos_integration_test.go index f2e1a37..f0c771b 100644 --- a/node/consensus/app/app_consensus_engine_chaos_integration_test.go +++ b/node/consensus/app/app_consensus_engine_chaos_integration_test.go @@ -169,7 +169,7 @@ func TestAppConsensusEngine_Integration_ChaosScenario(t *testing.T) { sharedInclusionProver := bls48581.NewKZGInclusionProver(logger) sharedVerifiableEncryptor := verenc.NewMPCitHVerifiableEncryptor(1) sharedHypergraphStore := store.NewPebbleHypergraphStore(&config.DBConfig{InMemoryDONOTUSE: true, Path: ".test/app_chaos__shared"}, sharedDB, logger, sharedVerifiableEncryptor, sharedInclusionProver) - sharedHg := hypergraph.NewHypergraph(logger, sharedHypergraphStore, sharedInclusionProver, []int{}, &tests.Nopthenticator{}) + sharedHg := hypergraph.NewHypergraph(logger, sharedHypergraphStore, sharedInclusionProver, []int{}, &tests.Nopthenticator{}, 1) proverRegistry, err := provers.NewProverRegistry(logger, sharedHg) require.NoError(t, err) @@ -221,7 +221,7 @@ func TestAppConsensusEngine_Integration_ChaosScenario(t *testing.T) { nodeInboxStore := store.NewPebbleInboxStore(nodeDB, logger) nodeShardsStore := store.NewPebbleShardsStore(nodeDB, logger) nodeConsensusStore := store.NewPebbleConsensusStore(nodeDB, logger) - nodeHg := hypergraph.NewHypergraph(logger, nodeHypergraphStore, nodeInclusionProver, []int{}, &tests.Nopthenticator{}) + nodeHg := hypergraph.NewHypergraph(logger, nodeHypergraphStore, nodeInclusionProver, []int{}, &tests.Nopthenticator{}, 1) // Create mock pubsub for network simulation pubsub := newMockAppIntegrationPubSub(c, logger, []byte(m.Nodes[nodeIdx].ID()), m.Nodes[nodeIdx], m.Keys[nodeIdx], m.Nodes) diff --git a/node/consensus/app/app_consensus_engine_integration_test.go b/node/consensus/app/app_consensus_engine_integration_test.go index 5dc8fca..ad1d840 100644 --- a/node/consensus/app/app_consensus_engine_integration_test.go +++ b/node/consensus/app/app_consensus_engine_integration_test.go @@ -120,7 +120,7 @@ func TestAppConsensusEngine_Integration_BasicFrameProgression(t *testing.T) { // Create hypergraph hypergraphStore := store.NewPebbleHypergraphStore(&config.DBConfig{InMemoryDONOTUSE: true, Path: ".test/app_basic"}, pebbleDB, logger, verifiableEncryptor, inclusionProver) - hg := hypergraph.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}) + hg := hypergraph.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}, 1) // Create consensus store consensusStore := store.NewPebbleConsensusStore(pebbleDB, logger) @@ -457,7 +457,7 @@ func TestAppConsensusEngine_Integration_FeeVotingMechanics(t *testing.T) { tempClockStore := store.NewPebbleClockStore(tempDB, logger) tempInboxStore := store.NewPebbleInboxStore(tempDB, logger) tempShardsStore := store.NewPebbleShardsStore(tempDB, logger) - hg := hypergraph.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}) + hg := hypergraph.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}, 1) proverRegistry, err := provers.NewProverRegistry(zap.NewNop(), hg) require.NoError(t, err) @@ -778,7 +778,7 @@ func TestAppConsensusEngine_Integration_ReconnectCatchup(t *testing.T) { defer nodeDB.Close() inclusionProver := bls48581.NewKZGInclusionProver(nodeLogger) hypergraphStore := store.NewPebbleHypergraphStore(&config.DBConfig{InMemoryDONOTUSE: true, Path: fmt.Sprintf(".test/app_partition_catchup_%d", i)}, nodeDB, nodeLogger, verifiableEncryptor, inclusionProver) - hg := hypergraph.NewHypergraph(nodeLogger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}) + hg := hypergraph.NewHypergraph(nodeLogger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}, 1) proverRegistry, err := provers.NewProverRegistry(zap.NewNop(), hg) require.NoError(t, err) @@ -1061,8 +1061,8 @@ func TestAppConsensusEngine_Integration_MultipleAppShards(t *testing.T) { tempConsensusStore := store.NewPebbleConsensusStore(pebbleDB, logger) hypergraphStore := store.NewPebbleHypergraphStore(&config.DBConfig{InMemoryDONOTUSE: true, Path: fmt.Sprintf(".test/app_multi_%d", i)}, pebbleDB, logger, verifiableEncryptor, inclusionProver) - hg := hypergraph.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}) - tempHg := hypergraph.NewHypergraph(logger, tempHypergraphStore, tempInclusionProver, []int{}, &tests.Nopthenticator{}) + hg := hypergraph.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}, 1) + tempHg := hypergraph.NewHypergraph(logger, tempHypergraphStore, tempInclusionProver, []int{}, &tests.Nopthenticator{}, 1) tempClockStore := store.NewPebbleClockStore(tempDB, logger) tempInboxStore := store.NewPebbleInboxStore(tempDB, logger) tempShardsStore := store.NewPebbleShardsStore(tempDB, logger) @@ -1221,7 +1221,7 @@ func TestAppConsensusEngine_Integration_GlobalAppCoordination(t *testing.T) { tempInclusionProver := bls48581.NewKZGInclusionProver(logger) tempVerifiableEncryptor := verenc.NewMPCitHVerifiableEncryptor(1) tempHypergraphStore := store.NewPebbleHypergraphStore(&config.DBConfig{InMemoryDONOTUSE: true, Path: ".test/app_coord_temp"}, tempDB, logger, tempVerifiableEncryptor, tempInclusionProver) - tempHg := hypergraph.NewHypergraph(logger, tempHypergraphStore, tempInclusionProver, []int{}, &tests.Nopthenticator{}) + tempHg := hypergraph.NewHypergraph(logger, tempHypergraphStore, tempInclusionProver, []int{}, &tests.Nopthenticator{}, 1) tempClockStore := store.NewPebbleClockStore(tempDB, logger) tempInboxStore := store.NewPebbleInboxStore(tempDB, logger) proverRegistry, err := provers.NewProverRegistry(zap.NewNop(), tempHg) @@ -1275,7 +1275,7 @@ func TestAppConsensusEngine_Integration_GlobalAppCoordination(t *testing.T) { shardsStore := store.NewPebbleShardsStore(pebbleDB, logger) hypergraphStore := store.NewPebbleHypergraphStore(&config.DBConfig{InMemoryDONOTUSE: true, Path: ".test/app_coordination"}, pebbleDB, logger, verifiableEncryptor, inclusionProver) consensusStore := store.NewPebbleConsensusStore(pebbleDB, logger) - hg := hypergraph.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}) + hg := hypergraph.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}, 1) keyStore := store.NewPebbleKeyStore(pebbleDB, logger) @@ -1417,7 +1417,7 @@ func TestAppConsensusEngine_Integration_ProverTrieMembership(t *testing.T) { tempDecafConstructor := &bulletproofs.Decaf448KeyConstructor{} tempCompiler := compiler.NewBedlamCompiler() tempHypergraphStore := store.NewPebbleHypergraphStore(&config.DBConfig{InMemoryDONOTUSE: true, Path: ".test/app_prover_temp"}, tempDB, logger, tempVerifiableEncryptor, tempInclusionProver) - tempHg := hypergraph.NewHypergraph(logger, tempHypergraphStore, tempInclusionProver, []int{}, &tests.Nopthenticator{}) + tempHg := hypergraph.NewHypergraph(logger, tempHypergraphStore, tempInclusionProver, []int{}, &tests.Nopthenticator{}, 1) proverRegistry, err := provers.NewProverRegistry(zap.NewNop(), tempHg) require.NoError(t, err) @@ -1444,7 +1444,7 @@ func TestAppConsensusEngine_Integration_ProverTrieMembership(t *testing.T) { verifiableEncryptor := verenc.NewMPCitHVerifiableEncryptor(1) shardsStore := store.NewPebbleShardsStore(pebbleDB, logger) hypergraphStore := store.NewPebbleHypergraphStore(&config.DBConfig{InMemoryDONOTUSE: true, Path: fmt.Sprintf(".test/app_prover_%d", i)}, pebbleDB, logger, verifiableEncryptor, inclusionProver) - hg := hypergraph.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}) + hg := hypergraph.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}, 1) keyStore := store.NewPebbleKeyStore(pebbleDB, logger) clockStore := store.NewPebbleClockStore(pebbleDB, logger) @@ -1573,7 +1573,7 @@ func TestAppConsensusEngine_Integration_InvalidFrameRejection(t *testing.T) { compiler := compiler.NewBedlamCompiler() hypergraphStore := store.NewPebbleHypergraphStore(&config.DBConfig{InMemoryDONOTUSE: true, Path: ".test/app_invalid_rejection"}, pebbleDB, logger, verifiableEncryptor, inclusionProver) - hg := hypergraph.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}) + hg := hypergraph.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}, 1) keyStore := store.NewPebbleKeyStore(pebbleDB, logger) clockStore := store.NewPebbleClockStore(pebbleDB, logger) @@ -1896,7 +1896,7 @@ func TestAppConsensusEngine_Integration_ComplexMultiShardScenario(t *testing.T) nodeInboxStore := store.NewPebbleInboxStore(nodeDB, logger) nodeShardsStore := store.NewPebbleShardsStore(nodeDB, logger) nodeConsensusStore := store.NewPebbleConsensusStore(nodeDB, logger) - nodeHg := hypergraph.NewHypergraph(logger, nodeHypergraphStore, nodeInclusionProver, []int{}, &tests.Nopthenticator{}) + nodeHg := hypergraph.NewHypergraph(logger, nodeHypergraphStore, nodeInclusionProver, []int{}, &tests.Nopthenticator{}, 1) nodeProverRegistry, err := provers.NewProverRegistry(zap.NewNop(), nodeHg) nodeBulletproof := bulletproofs.NewBulletproofProver() nodeDecafConstructor := &bulletproofs.Decaf448KeyConstructor{} @@ -2352,7 +2352,7 @@ func TestGenerateAddressesForComplexTest(t *testing.T) { nodeVerifiableEncryptor := verenc.NewMPCitHVerifiableEncryptor(1) nodeHypergraphStore := store.NewPebbleHypergraphStore(&config.DBConfig{InMemoryDONOTUSE: true, Path: ".test/app_chaos"}, nodeDB, zap.L(), nodeVerifiableEncryptor, nodeInclusionProver) - nodeHg := hypergraph.NewHypergraph(zap.L(), nodeHypergraphStore, nodeInclusionProver, []int{}, &tests.Nopthenticator{}) + nodeHg := hypergraph.NewHypergraph(zap.L(), nodeHypergraphStore, nodeInclusionProver, []int{}, &tests.Nopthenticator{}, 1) vksHex := []string{ "67ebe1f52284c24bbb2061b6b35823726688fb2d1d474195ad629dc2a8a7442df3e72f164fecc624df8f720ba96ebaf4e3a9ca551490f200", "05e729b718f137ce985471e80e3530e1b6a6356f218f64571f3249f9032dd3c08fec428c368959e0e0ff0e6a0e42aa4ca18427cac0b14516", @@ -2695,7 +2695,7 @@ func TestAppConsensusEngine_Integration_NoProversStaysInLoading(t *testing.T) { InMemoryDONOTUSE: true, Path: fmt.Sprintf(".test/app_no_provers_%d", nodeID), }, pebbleDB, logger, verifiableEncryptor, inclusionProver) - hg := hypergraph.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}) + hg := hypergraph.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}, 1) clockStore := store.NewPebbleClockStore(pebbleDB, logger) inboxStore := store.NewPebbleInboxStore(pebbleDB, logger) @@ -2896,7 +2896,7 @@ func TestAppConsensusEngine_Integration_AlertStopsProgression(t *testing.T) { // Create hypergraph hypergraphStore := store.NewPebbleHypergraphStore(&config.DBConfig{InMemoryDONOTUSE: true, Path: ".test/app_basic"}, pebbleDB, logger, verifiableEncryptor, inclusionProver) - hg := hypergraph.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}) + hg := hypergraph.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}, 1) // Create key store keyStore := store.NewPebbleKeyStore(pebbleDB, logger) diff --git a/node/consensus/app/consensus_dynamic_committee.go b/node/consensus/app/consensus_dynamic_committee.go index 5170574..0053230 100644 --- a/node/consensus/app/consensus_dynamic_committee.go +++ b/node/consensus/app/consensus_dynamic_committee.go @@ -123,6 +123,11 @@ func (e *AppConsensusEngine) LeaderForRank(rank uint64) ( return "", errors.Wrap(err, "leader for rank") } + // Handle condition where prover cannot be yet known due to lack of sync: + if len(proverSet) == 0 { + return models.Identity(make([]byte, 32)), nil + } + inputBI.Mod(inputBI, big.NewInt(int64(len(proverSet)))) index := inputBI.Int64() return models.Identity(proverSet[int(index)].Address), nil diff --git a/node/consensus/app/consensus_voting_provider.go b/node/consensus/app/consensus_voting_provider.go index a904561..8051875 100644 --- a/node/consensus/app/consensus_voting_provider.go +++ b/node/consensus/app/consensus_voting_provider.go @@ -144,7 +144,7 @@ func (p *AppVotingProvider) SignTimeoutVote( // Create vote message vote := &protobufs.ProposalVote{ - Filter: filter, + Filter: filter, // buildutils:allow-slice-alias slice is static FrameNumber: 0, Rank: currentRank, Selector: nil, diff --git a/node/consensus/app/coverage_events.go b/node/consensus/app/coverage_events.go new file mode 100644 index 0000000..bc15af3 --- /dev/null +++ b/node/consensus/app/coverage_events.go @@ -0,0 +1,251 @@ +package app + +import ( + "encoding/hex" + "fmt" + "math/big" + + "github.com/pkg/errors" + "go.uber.org/zap" + typesconsensus "source.quilibrium.com/quilibrium/monorepo/types/consensus" +) + +type coverageStreak struct { + StartFrame uint64 + LastFrame uint64 + Count uint64 +} + +type shardCoverage struct { + ProverCount int + AttestedStorage uint64 + TreeMetadata []typesconsensus.TreeMetadata +} + +func (e *AppConsensusEngine) ensureCoverageThresholds() { + e.coverageOnce.Do(func() { + e.coverageMinProvers = e.minimumProvers() + if e.config.P2P.Network == 0 { + e.coverageHaltThreshold = 3 + } else { + if e.coverageMinProvers > 1 { + e.coverageHaltThreshold = 1 + } else { + e.coverageHaltThreshold = 0 + } + } + e.coverageHaltGrace = 360 + }) +} + +func (e *AppConsensusEngine) checkShardCoverage(frameNumber uint64) error { + e.ensureCoverageThresholds() + + coverage, ok := e.getShardCoverage() + if !ok { + e.clearCoverageStreak(string(e.appAddress)) + return nil + } + + key := string(e.appAddress) + size := big.NewInt(0) + for _, metadata := range coverage.TreeMetadata { + size = size.Add(size, new(big.Int).SetUint64(metadata.TotalSize)) + } + + if uint64(coverage.ProverCount) <= e.coverageHaltThreshold && + size.Cmp(big.NewInt(0)) > 0 { + streak, err := e.bumpCoverageStreak(key, frameNumber) + if err != nil { + return errors.Wrap(err, "check shard coverage") + } + + var remaining int64 = int64(e.coverageHaltGrace) - int64(streak.Count) + if remaining < 0 { + remaining = 0 + } + + if e.config.P2P.Network == 0 && remaining == 0 { + e.emitCoverageEvent( + typesconsensus.ControlEventCoverageHalt, + &typesconsensus.CoverageEventData{ + ShardAddress: e.appAddress, + ProverCount: coverage.ProverCount, + RequiredProvers: int(e.coverageMinProvers), + AttestedStorage: coverage.AttestedStorage, + TreeMetadata: coverage.TreeMetadata, + Message: fmt.Sprintf( + "Shard %s has only %d provers, halting operations", + e.appAddressHex, + coverage.ProverCount, + ), + }, + ) + return nil + } + + e.emitCoverageEvent( + typesconsensus.ControlEventCoverageWarn, + &typesconsensus.CoverageEventData{ + ShardAddress: e.appAddress, + ProverCount: coverage.ProverCount, + RequiredProvers: int(e.coverageMinProvers), + AttestedStorage: coverage.AttestedStorage, + TreeMetadata: coverage.TreeMetadata, + Message: fmt.Sprintf( + "Critical coverage (<= %d provers). Grace period: %d/%d frames toward halt.", + e.coverageHaltThreshold, + streak.Count, + e.coverageHaltGrace, + ), + }, + ) + return nil + } + + e.clearCoverageStreak(key) + + if uint64(coverage.ProverCount) < e.coverageMinProvers { + e.emitCoverageEvent( + typesconsensus.ControlEventCoverageWarn, + &typesconsensus.CoverageEventData{ + ShardAddress: e.appAddress, + ProverCount: coverage.ProverCount, + RequiredProvers: int(e.coverageMinProvers), + AttestedStorage: coverage.AttestedStorage, + TreeMetadata: coverage.TreeMetadata, + Message: fmt.Sprintf( + "Shard %s below minimum coverage: %d/%d provers.", + e.appAddressHex, + coverage.ProverCount, + e.coverageMinProvers, + ), + }, + ) + } + + return nil +} + +func (e *AppConsensusEngine) getShardCoverage() (*shardCoverage, bool) { + proverCount, err := e.proverRegistry.GetProverCount(e.appAddress) + if err != nil { + e.logger.Warn( + "failed to get prover count for shard", + zap.String("shard_address", e.appAddressHex), + zap.Error(err), + ) + return nil, false + } + if proverCount == 0 { + return nil, false + } + + activeProvers, err := e.proverRegistry.GetActiveProvers(e.appAddress) + if err != nil { + e.logger.Warn( + "failed to get active provers for shard", + zap.String("shard_address", e.appAddressHex), + zap.Error(err), + ) + return nil, false + } + + attestedStorage := uint64(0) + for _, prover := range activeProvers { + attestedStorage += prover.AvailableStorage + } + + var treeMetadata []typesconsensus.TreeMetadata + metadata, err := e.hypergraph.GetMetadataAtKey(e.appAddress) + if err != nil { + e.logger.Error("could not obtain metadata for shard", zap.Error(err)) + return nil, false + } + for _, entry := range metadata { + treeMetadata = append( + treeMetadata, + typesconsensus.TreeMetadata{ + CommitmentRoot: entry.Commitment, + TotalSize: entry.Size, + TotalLeaves: entry.LeafCount, + }, + ) + } + + return &shardCoverage{ + ProverCount: proverCount, + AttestedStorage: attestedStorage, + TreeMetadata: treeMetadata, + }, true +} + +func (e *AppConsensusEngine) ensureCoverageStreakMap(frameNumber uint64) error { + if e.lowCoverageStreak != nil { + return nil + } + + e.lowCoverageStreak = make(map[string]*coverageStreak) + coverage, ok := e.getShardCoverage() + if !ok { + return nil + } + if uint64(coverage.ProverCount) <= e.coverageHaltThreshold { + e.lowCoverageStreak[string(e.appAddress)] = &coverageStreak{ + StartFrame: frameNumber, + LastFrame: frameNumber, + Count: 1, + } + } + return nil +} + +func (e *AppConsensusEngine) bumpCoverageStreak( + key string, + frame uint64, +) (*coverageStreak, error) { + if err := e.ensureCoverageStreakMap(frame); err != nil { + return nil, errors.Wrap(err, "bump coverage streak") + } + streak := e.lowCoverageStreak[key] + if streak == nil { + streak = &coverageStreak{ + StartFrame: frame, + LastFrame: frame, + Count: 1, + } + e.lowCoverageStreak[key] = streak + return streak, nil + } + if frame > streak.LastFrame { + streak.Count += (frame - streak.LastFrame) + streak.LastFrame = frame + } + return streak, nil +} + +func (e *AppConsensusEngine) clearCoverageStreak(key string) { + if e.lowCoverageStreak != nil { + delete(e.lowCoverageStreak, key) + } +} + +func (e *AppConsensusEngine) emitCoverageEvent( + eventType typesconsensus.ControlEventType, + data *typesconsensus.CoverageEventData, +) { + event := typesconsensus.ControlEvent{ + Type: eventType, + Data: data, + } + + go e.eventDistributor.Publish(event) + + e.logger.Info( + "emitted coverage event", + zap.String("type", fmt.Sprintf("%d", eventType)), + zap.String("shard_address", hex.EncodeToString(data.ShardAddress)), + zap.Int("prover_count", data.ProverCount), + zap.String("message", data.Message), + ) +} diff --git a/node/consensus/app/event_distributor.go b/node/consensus/app/event_distributor.go index 121508d..15d2ca5 100644 --- a/node/consensus/app/event_distributor.go +++ b/node/consensus/app/event_distributor.go @@ -64,6 +64,12 @@ func (e *AppConsensusEngine) eventDistributorLoop( zap.Error(err), ) } + + if err := e.checkShardCoverage( + data.Frame.Header.FrameNumber, + ); err != nil { + e.logger.Error("could not check shard coverage", zap.Error(err)) + } } case typesconsensus.ControlEventAppEquivocation: // Handle equivocation by constructing and publishing a ProverKick diff --git a/node/consensus/app/factory.go b/node/consensus/app/factory.go index e90309f..8887d7a 100644 --- a/node/consensus/app/factory.go +++ b/node/consensus/app/factory.go @@ -108,6 +108,7 @@ func NewAppConsensusEngineFactory( rewardIssuance: rewardIssuance, blsConstructor: blsConstructor, encryptedChannel: encryptedChannel, + peerInfoManager: peerInfoManager, } } diff --git a/node/consensus/app/frame_chain_checker.go b/node/consensus/app/frame_chain_checker.go index 65d916f..47c537d 100644 --- a/node/consensus/app/frame_chain_checker.go +++ b/node/consensus/app/frame_chain_checker.go @@ -43,7 +43,7 @@ func NewAppFrameChainChecker( } return &AppFrameChainChecker{ store: appFrameChainStoreAdapter{store: store}, - filter: filter, + filter: filter, // buildutils:allow-slice-alias slice is static logger: logger, } } diff --git a/node/consensus/app/message_processors.go b/node/consensus/app/message_processors.go index 42a20ba..62d0310 100644 --- a/node/consensus/app/message_processors.go +++ b/node/consensus/app/message_processors.go @@ -3,23 +3,32 @@ package app import ( "bytes" "context" + "crypto/sha256" "encoding/binary" "encoding/hex" + "errors" + "fmt" "slices" + "time" "github.com/iden3/go-iden3-crypto/poseidon" + pcrypto "github.com/libp2p/go-libp2p/core/crypto" "github.com/libp2p/go-libp2p/core/peer" "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" "golang.org/x/crypto/sha3" + "google.golang.org/protobuf/proto" "source.quilibrium.com/quilibrium/monorepo/consensus/models" "source.quilibrium.com/quilibrium/monorepo/go-libp2p-blossomsub/pb" "source.quilibrium.com/quilibrium/monorepo/lifecycle" "source.quilibrium.com/quilibrium/monorepo/protobufs" "source.quilibrium.com/quilibrium/monorepo/types/crypto" + "source.quilibrium.com/quilibrium/monorepo/types/store" "source.quilibrium.com/quilibrium/monorepo/types/tries" ) +var keyRegistryDomain = []byte("KEY_REGISTRY") + func (e *AppConsensusEngine) processConsensusMessageQueue( ctx lifecycle.SignalerContext, ) { @@ -310,6 +319,32 @@ func (e *AppConsensusEngine) handleAppShardProposal( func (e *AppConsensusEngine) processProposal( proposal *protobufs.AppShardProposal, ) bool { + return e.processProposalInternal(proposal, false) +} + +func (e *AppConsensusEngine) processProposalInternal( + proposal *protobufs.AppShardProposal, + skipAncestors bool, +) bool { + if proposal == nil || proposal.State == nil || proposal.State.Header == nil { + return false + } + + if !skipAncestors { + if ok, err := e.ensureShardAncestorStates(proposal); err != nil { + e.logger.Warn( + "failed to recover app shard ancestors", + zap.String("address", e.appAddressHex), + zap.Uint64("frame_number", proposal.State.Header.FrameNumber), + zap.Error(err), + ) + e.requestShardAncestorSync(proposal) + return false + } else if !ok { + return false + } + } + e.logger.Debug( "processing proposal", zap.String("id", hex.EncodeToString([]byte(proposal.State.Identity()))), @@ -397,6 +432,560 @@ func (e *AppConsensusEngine) processProposal( return true } +type shardAncestorDescriptor struct { + frameNumber uint64 + selector []byte +} + +func (e *AppConsensusEngine) ensureShardAncestorStates( + proposal *protobufs.AppShardProposal, +) (bool, error) { + ancestors, err := e.collectMissingShardAncestors(proposal) + if err != nil { + return false, err + } + + if len(ancestors) == 0 { + return true, nil + } + + for i := len(ancestors) - 1; i >= 0; i-- { + ancestor, err := e.buildStoredShardProposal(ancestors[i]) + if err != nil { + return false, err + } + if !e.processProposalInternal(ancestor, true) { + return false, fmt.Errorf( + "unable to process ancestor frame %d", + ancestors[i].frameNumber, + ) + } + } + + return true, nil +} + +func (e *AppConsensusEngine) collectMissingShardAncestors( + proposal *protobufs.AppShardProposal, +) ([]shardAncestorDescriptor, error) { + header := proposal.State.Header + if header == nil || header.FrameNumber == 0 { + return nil, nil + } + + finalized := e.forks.FinalizedState() + if finalized == nil || finalized.State == nil || + (*finalized.State).Header == nil { + return nil, errors.New("finalized state unavailable") + } + + finalizedFrame := (*finalized.State).Header.FrameNumber + finalizedSelector := []byte(finalized.Identifier) + + parentFrame := header.FrameNumber - 1 + parentSelector := slices.Clone(header.ParentSelector) + if len(parentSelector) == 0 { + return nil, nil + } + + var ancestors []shardAncestorDescriptor + anchored := false + + for parentFrame > finalizedFrame && len(parentSelector) > 0 { + if _, found := e.forks.GetState( + models.Identity(string(parentSelector)), + ); found { + anchored = true + break + } + + ancestors = append(ancestors, shardAncestorDescriptor{ + frameNumber: parentFrame, + selector: slices.Clone(parentSelector), + }) + + frame, err := e.loadShardFrameFromStore(parentFrame, parentSelector) + if err != nil { + return nil, err + } + + parentFrame-- + parentSelector = slices.Clone(frame.Header.ParentSelector) + } + + if !anchored { + switch { + case parentFrame == finalizedFrame: + if !bytes.Equal(parentSelector, finalizedSelector) { + return nil, fmt.Errorf( + "ancestor chain not rooted at finalized frame %d", + finalizedFrame, + ) + } + anchored = true + case parentFrame < finalizedFrame: + return nil, fmt.Errorf( + "ancestor chain crossed finalized boundary (frame %d < %d)", + parentFrame, + finalizedFrame, + ) + case len(parentSelector) == 0: + return nil, errors.New( + "ancestor selector missing before reaching finalized state", + ) + } + } + + if !anchored { + return nil, errors.New("ancestor chain could not be anchored in forks") + } + + return ancestors, nil +} + +func (e *AppConsensusEngine) loadShardFrameFromStore( + frameNumber uint64, + selector []byte, +) (*protobufs.AppShardFrame, error) { + frame, err := e.clockStore.GetStagedShardClockFrame( + e.appAddress, + frameNumber, + selector, + false, + ) + if err != nil { + if !errors.Is(err, store.ErrNotFound) { + return nil, err + } + frame, _, err = e.clockStore.GetShardClockFrame( + e.appAddress, + frameNumber, + false, + ) + if err != nil { + return nil, err + } + if frame == nil || frame.Header == nil || + !bytes.Equal([]byte(frame.Identity()), selector) { + return nil, fmt.Errorf( + "sealed shard frame mismatch at %d", + frameNumber, + ) + } + } + + if frame == nil || frame.Header == nil { + return nil, errors.New("stored shard frame missing header") + } + + return frame, nil +} + +func (e *AppConsensusEngine) buildStoredShardProposal( + desc shardAncestorDescriptor, +) (*protobufs.AppShardProposal, error) { + frame, err := e.loadShardFrameFromStore(desc.frameNumber, desc.selector) + if err != nil { + return nil, err + } + + var parentQC *protobufs.QuorumCertificate + if frame.GetRank() > 0 { + parentQC, err = e.clockStore.GetQuorumCertificate( + e.appAddress, + frame.GetRank()-1, + ) + if err != nil { + return nil, err + } + } + + var priorTC *protobufs.TimeoutCertificate + if frame.GetRank() > 0 { + priorTC, err = e.clockStore.GetTimeoutCertificate( + e.appAddress, + frame.GetRank()-1, + ) + if err != nil && !errors.Is(err, store.ErrNotFound) { + return nil, err + } + if errors.Is(err, store.ErrNotFound) { + priorTC = nil + } + } + + vote, err := e.clockStore.GetProposalVote( + e.appAddress, + frame.GetRank(), + []byte(frame.Identity()), + ) + if err != nil { + return nil, err + } + + return &protobufs.AppShardProposal{ + State: frame, + ParentQuorumCertificate: parentQC, + PriorRankTimeoutCertificate: priorTC, + Vote: vote, + }, nil +} + +func (e *AppConsensusEngine) requestShardAncestorSync( + proposal *protobufs.AppShardProposal, +) { + if proposal == nil || proposal.State == nil || proposal.State.Header == nil { + return + } + if e.syncProvider == nil { + return + } + + peerID, err := e.getPeerIDOfProver(proposal.State.Header.Prover) + if err != nil { + peerID, err = e.getRandomProverPeerId() + if err != nil { + return + } + } + + head, _, err := e.clockStore.GetLatestShardClockFrame(e.appAddress) + if err != nil || head == nil || head.Header == nil { + e.logger.Debug("could not obtain shard head for sync", zap.Error(err)) + return + } + + e.syncProvider.AddState( + []byte(peerID), + head.Header.FrameNumber, + []byte(head.Identity()), + ) +} + +type keyRegistryValidationResult struct { + identityPeerID []byte + proverAddress []byte +} + +func (e *AppConsensusEngine) isDuplicatePeerInfo( + peerInfo *protobufs.PeerInfo, +) bool { + digest, err := hashPeerInfo(peerInfo) + if err != nil { + e.logger.Warn("failed to hash peer info", zap.Error(err)) + return false + } + + e.peerInfoDigestCacheMu.Lock() + defer e.peerInfoDigestCacheMu.Unlock() + + if _, ok := e.peerInfoDigestCache[digest]; ok { + return true + } + + e.peerInfoDigestCache[digest] = struct{}{} + return false +} + +func (e *AppConsensusEngine) isDuplicateKeyRegistry( + keyRegistry *protobufs.KeyRegistry, +) bool { + digest, err := hashKeyRegistry(keyRegistry) + if err != nil { + e.logger.Warn("failed to hash key registry", zap.Error(err)) + return false + } + + e.keyRegistryDigestCacheMu.Lock() + defer e.keyRegistryDigestCacheMu.Unlock() + + if _, ok := e.keyRegistryDigestCache[digest]; ok { + return true + } + + e.keyRegistryDigestCache[digest] = struct{}{} + return false +} + +func hashPeerInfo(peerInfo *protobufs.PeerInfo) (string, error) { + cloned := proto.Clone(peerInfo).(*protobufs.PeerInfo) + cloned.Timestamp = 0 + + data, err := cloned.ToCanonicalBytes() + if err != nil { + return "", err + } + + sum := sha256.Sum256(data) + return hex.EncodeToString(sum[:]), nil +} + +func hashKeyRegistry(keyRegistry *protobufs.KeyRegistry) (string, error) { + cloned := proto.Clone(keyRegistry).(*protobufs.KeyRegistry) + cloned.LastUpdated = 0 + + data, err := cloned.ToCanonicalBytes() + if err != nil { + return "", err + } + + sum := sha256.Sum256(data) + return hex.EncodeToString(sum[:]), nil +} + +func (e *AppConsensusEngine) validateKeyRegistry( + keyRegistry *protobufs.KeyRegistry, +) (*keyRegistryValidationResult, error) { + if keyRegistry.IdentityKey == nil || + len(keyRegistry.IdentityKey.KeyValue) == 0 { + return nil, fmt.Errorf("key registry missing identity key") + } + if err := keyRegistry.IdentityKey.Validate(); err != nil { + return nil, fmt.Errorf("invalid identity key: %w", err) + } + + pubKey, err := pcrypto.UnmarshalEd448PublicKey( + keyRegistry.IdentityKey.KeyValue, + ) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal identity key: %w", err) + } + peerID, err := peer.IDFromPublicKey(pubKey) + if err != nil { + return nil, fmt.Errorf("failed to derive identity peer id: %w", err) + } + identityPeerID := []byte(peerID) + + if keyRegistry.ProverKey == nil || + len(keyRegistry.ProverKey.KeyValue) == 0 { + return nil, fmt.Errorf("key registry missing prover key") + } + if err := keyRegistry.ProverKey.Validate(); err != nil { + return nil, fmt.Errorf("invalid prover key: %w", err) + } + + if keyRegistry.IdentityToProver == nil || + len(keyRegistry.IdentityToProver.Signature) == 0 { + return nil, fmt.Errorf("missing identity-to-prover signature") + } + + identityMsg := slices.Concat( + keyRegistryDomain, + keyRegistry.ProverKey.KeyValue, + ) + valid, err := e.keyManager.ValidateSignature( + crypto.KeyTypeEd448, + keyRegistry.IdentityKey.KeyValue, + identityMsg, + keyRegistry.IdentityToProver.Signature, + nil, + ) + if err != nil { + return nil, fmt.Errorf( + "identity-to-prover signature validation failed: %w", + err, + ) + } + if !valid { + return nil, fmt.Errorf("identity-to-prover signature invalid") + } + + if keyRegistry.ProverToIdentity == nil || + len(keyRegistry.ProverToIdentity.Signature) == 0 { + return nil, fmt.Errorf("missing prover-to-identity signature") + } + + valid, err = e.keyManager.ValidateSignature( + crypto.KeyTypeBLS48581G1, + keyRegistry.ProverKey.KeyValue, + keyRegistry.IdentityKey.KeyValue, + keyRegistry.ProverToIdentity.Signature, + keyRegistryDomain, + ) + if err != nil { + return nil, fmt.Errorf( + "prover-to-identity signature validation failed: %w", + err, + ) + } + if !valid { + return nil, fmt.Errorf("prover-to-identity signature invalid") + } + + addrBI, err := poseidon.HashBytes(keyRegistry.ProverKey.KeyValue) + if err != nil { + return nil, fmt.Errorf("failed to derive prover key address: %w", err) + } + proverAddress := addrBI.FillBytes(make([]byte, 32)) + + for purpose, collection := range keyRegistry.KeysByPurpose { + if collection == nil { + continue + } + for _, key := range collection.X448Keys { + if err := e.validateSignedX448Key( + key, + identityPeerID, + proverAddress, + keyRegistry, + ); err != nil { + return nil, fmt.Errorf( + "invalid x448 key (purpose %s): %w", + purpose, + err, + ) + } + } + for _, key := range collection.Decaf448Keys { + if err := e.validateSignedDecaf448Key( + key, + identityPeerID, + proverAddress, + keyRegistry, + ); err != nil { + return nil, fmt.Errorf( + "invalid decaf448 key (purpose %s): %w", + purpose, + err, + ) + } + } + } + + return &keyRegistryValidationResult{ + identityPeerID: identityPeerID, + proverAddress: proverAddress, + }, nil +} + +func (e *AppConsensusEngine) validateSignedX448Key( + key *protobufs.SignedX448Key, + identityPeerID []byte, + proverAddress []byte, + keyRegistry *protobufs.KeyRegistry, +) error { + if key == nil || key.Key == nil || len(key.Key.KeyValue) == 0 { + return nil + } + + msg := slices.Concat(keyRegistryDomain, key.Key.KeyValue) + switch sig := key.Signature.(type) { + case *protobufs.SignedX448Key_Ed448Signature: + if sig.Ed448Signature == nil || + len(sig.Ed448Signature.Signature) == 0 { + return fmt.Errorf("missing ed448 signature") + } + if !bytes.Equal(key.ParentKeyAddress, identityPeerID) { + return fmt.Errorf("unexpected parent for ed448 signed x448 key") + } + valid, err := e.keyManager.ValidateSignature( + crypto.KeyTypeEd448, + keyRegistry.IdentityKey.KeyValue, + msg, + sig.Ed448Signature.Signature, + nil, + ) + if err != nil { + return fmt.Errorf("failed to validate ed448 signature: %w", err) + } + if !valid { + return fmt.Errorf("ed448 signature invalid") + } + case *protobufs.SignedX448Key_BlsSignature: + if sig.BlsSignature == nil || + len(sig.BlsSignature.Signature) == 0 { + return fmt.Errorf("missing bls signature") + } + if len(proverAddress) != 0 && + !bytes.Equal(key.ParentKeyAddress, proverAddress) { + return fmt.Errorf("unexpected parent for bls signed x448 key") + } + valid, err := e.keyManager.ValidateSignature( + crypto.KeyTypeBLS48581G1, + keyRegistry.ProverKey.KeyValue, + key.Key.KeyValue, + sig.BlsSignature.Signature, + keyRegistryDomain, + ) + if err != nil { + return fmt.Errorf("failed to validate bls signature: %w", err) + } + if !valid { + return fmt.Errorf("bls signature invalid") + } + case *protobufs.SignedX448Key_DecafSignature: + return fmt.Errorf("decaf signature not supported for x448 key") + default: + return fmt.Errorf("missing signature for x448 key") + } + + return nil +} + +func (e *AppConsensusEngine) validateSignedDecaf448Key( + key *protobufs.SignedDecaf448Key, + identityPeerID []byte, + proverAddress []byte, + keyRegistry *protobufs.KeyRegistry, +) error { + if key == nil || key.Key == nil || len(key.Key.KeyValue) == 0 { + return nil + } + + msg := slices.Concat(keyRegistryDomain, key.Key.KeyValue) + switch sig := key.Signature.(type) { + case *protobufs.SignedDecaf448Key_Ed448Signature: + if sig.Ed448Signature == nil || + len(sig.Ed448Signature.Signature) == 0 { + return fmt.Errorf("missing ed448 signature") + } + if !bytes.Equal(key.ParentKeyAddress, identityPeerID) { + return fmt.Errorf("unexpected parent for ed448 signed decaf key") + } + valid, err := e.keyManager.ValidateSignature( + crypto.KeyTypeEd448, + keyRegistry.IdentityKey.KeyValue, + msg, + sig.Ed448Signature.Signature, + nil, + ) + if err != nil { + return fmt.Errorf("failed to validate ed448 signature: %w", err) + } + if !valid { + return fmt.Errorf("ed448 signature invalid") + } + case *protobufs.SignedDecaf448Key_BlsSignature: + if sig.BlsSignature == nil || + len(sig.BlsSignature.Signature) == 0 { + return fmt.Errorf("missing bls signature") + } + if len(proverAddress) != 0 && + !bytes.Equal(key.ParentKeyAddress, proverAddress) { + return fmt.Errorf("unexpected parent for bls signed decaf key") + } + valid, err := e.keyManager.ValidateSignature( + crypto.KeyTypeBLS48581G1, + keyRegistry.ProverKey.KeyValue, + key.Key.KeyValue, + sig.BlsSignature.Signature, + keyRegistryDomain, + ) + if err != nil { + return fmt.Errorf("failed to validate bls signature: %w", err) + } + if !valid { + return fmt.Errorf("bls signature invalid") + } + case *protobufs.SignedDecaf448Key_DecafSignature: + return fmt.Errorf("decaf signature validation not supported") + default: + return fmt.Errorf("missing signature for decaf key") + } + + return nil +} + func (e *AppConsensusEngine) cacheProposal( proposal *protobufs.AppShardProposal, ) { @@ -625,6 +1214,10 @@ func (e *AppConsensusEngine) addCertifiedState( txn.Abort() return } + + if err := e.checkShardCoverage(parent.State.Header.FrameNumber); err != nil { + e.logger.Error("could not check shard coverage", zap.Error(err)) + } } func (e *AppConsensusEngine) handleConsensusMessage(message *pb.Message) { @@ -781,6 +1374,8 @@ func (e *AppConsensusEngine) handleGlobalFrameMessage(message *pb.Message) { return } + e.handleGlobalProverRoot(frame) + // Success metric recorded at the end of processing globalFramesProcessedTotal.WithLabelValues("success").Inc() default: @@ -842,6 +1437,23 @@ func (e *AppConsensusEngine) handlePeerInfoMessage(message *pb.Message) { return } + if e.peerInfoManager == nil { + e.logger.Warn( + "peer info manager unavailable; dropping peer info", + zap.ByteString("peer_id", peerInfo.PeerId), + ) + return + } + + if e.isDuplicatePeerInfo(peerInfo) { + if existing := e.peerInfoManager.GetPeerInfo( + peerInfo.PeerId, + ); existing != nil { + existing.LastSeen = time.Now().UnixMilli() + return + } + } + // Validate signature if !e.validatePeerInfoSignature(peerInfo) { e.logger.Debug("invalid peer info signature", @@ -851,6 +1463,141 @@ func (e *AppConsensusEngine) handlePeerInfoMessage(message *pb.Message) { // Also add to the existing peer info manager e.peerInfoManager.AddPeerInfo(peerInfo) + case protobufs.KeyRegistryType: + keyRegistry := &protobufs.KeyRegistry{} + if err := keyRegistry.FromCanonicalBytes(message.Data); err != nil { + e.logger.Debug("failed to unmarshal key registry", zap.Error(err)) + return + } + + if err := keyRegistry.Validate(); err != nil { + e.logger.Debug("invalid key registry", zap.Error(err)) + return + } + + validation, err := e.validateKeyRegistry(keyRegistry) + if err != nil { + e.logger.Debug("invalid key registry signatures", zap.Error(err)) + return + } + + if e.isDuplicateKeyRegistry(keyRegistry) { + _, err := e.keyStore.GetKeyRegistry(validation.identityPeerID) + if err == nil { + return + } + } + + txn, err := e.keyStore.NewTransaction() + if err != nil { + e.logger.Error("failed to create keystore txn", zap.Error(err)) + return + } + + commit := false + defer func() { + if !commit { + if abortErr := txn.Abort(); abortErr != nil { + e.logger.Warn("failed to abort keystore txn", zap.Error(abortErr)) + } + } + }() + + var identityAddress []byte + if keyRegistry.IdentityKey != nil && + len(keyRegistry.IdentityKey.KeyValue) != 0 { + if err := e.keyStore.PutIdentityKey( + txn, + validation.identityPeerID, + keyRegistry.IdentityKey, + ); err != nil { + e.logger.Error("failed to store identity key", zap.Error(err)) + return + } + identityAddress = validation.identityPeerID + } + + var proverAddress []byte + if keyRegistry.ProverKey != nil && + len(keyRegistry.ProverKey.KeyValue) != 0 { + if err := e.keyStore.PutProvingKey( + txn, + validation.proverAddress, + &protobufs.BLS48581SignatureWithProofOfPossession{ + PublicKey: keyRegistry.ProverKey, + }, + ); err != nil { + e.logger.Error("failed to store prover key", zap.Error(err)) + return + } + proverAddress = validation.proverAddress + } + + if len(identityAddress) != 0 && len(proverAddress) == 32 && + keyRegistry.IdentityToProver != nil && + len(keyRegistry.IdentityToProver.Signature) != 0 && + keyRegistry.ProverToIdentity != nil && + len(keyRegistry.ProverToIdentity.Signature) != 0 { + if err := e.keyStore.PutCrossSignature( + txn, + identityAddress, + proverAddress, + keyRegistry.IdentityToProver.Signature, + keyRegistry.ProverToIdentity.Signature, + ); err != nil { + e.logger.Error("failed to store cross signatures", zap.Error(err)) + return + } + } + + for _, collection := range keyRegistry.KeysByPurpose { + for _, key := range collection.X448Keys { + if key == nil || key.Key == nil || + len(key.Key.KeyValue) == 0 { + continue + } + addrBI, err := poseidon.HashBytes(key.Key.KeyValue) + if err != nil { + e.logger.Error("failed to derive x448 key address", zap.Error(err)) + return + } + address := addrBI.FillBytes(make([]byte, 32)) + if err := e.keyStore.PutSignedX448Key(txn, address, key); err != nil { + e.logger.Error("failed to store signed x448 key", zap.Error(err)) + return + } + } + + for _, key := range collection.Decaf448Keys { + if key == nil || key.Key == nil || + len(key.Key.KeyValue) == 0 { + continue + } + addrBI, err := poseidon.HashBytes(key.Key.KeyValue) + if err != nil { + e.logger.Error( + "failed to derive decaf448 key address", + zap.Error(err), + ) + return + } + address := addrBI.FillBytes(make([]byte, 32)) + if err := e.keyStore.PutSignedDecaf448Key( + txn, + address, + key, + ); err != nil { + e.logger.Error("failed to store signed decaf448 key", zap.Error(err)) + return + } + } + } + + if err := txn.Commit(); err != nil { + e.logger.Error("failed to commit key registry txn", zap.Error(err)) + return + } + commit = true default: e.logger.Debug( diff --git a/node/consensus/app/message_validation.go b/node/consensus/app/message_validation.go index ece7524..ed1e3a9 100644 --- a/node/consensus/app/message_validation.go +++ b/node/consensus/app/message_validation.go @@ -62,12 +62,6 @@ func (e *AppConsensusEngine) validateConsensusMessage( return p2p.ValidationResultIgnore } - if proposal.State.Header.PublicKeySignatureBls48581 != nil { - e.logger.Debug("frame validation has signature") - proposalValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc() - return p2p.ValidationResultReject - } - valid, err := e.frameValidator.Validate(proposal.State) if err != nil { e.logger.Debug("frame validation error", zap.Error(err)) @@ -341,7 +335,7 @@ func (e *AppConsensusEngine) validateGlobalFrameMessage( // Check if data is long enough to contain type prefix if len(message.Data) < 4 { e.logger.Debug("message too short", zap.Int("data_length", len(message.Data))) - globalFrameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc() + globalFrameValidationTotal.WithLabelValues("reject").Inc() return p2p.ValidationResultReject } @@ -353,7 +347,7 @@ func (e *AppConsensusEngine) validateGlobalFrameMessage( frame := &protobufs.GlobalFrame{} if err := frame.FromCanonicalBytes(message.Data); err != nil { e.logger.Debug("failed to unmarshal frame", zap.Error(err)) - globalFrameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc() + globalFrameValidationTotal.WithLabelValues("reject").Inc() return p2p.ValidationResultReject } @@ -361,20 +355,20 @@ func (e *AppConsensusEngine) validateGlobalFrameMessage( frame.Header.PublicKeySignatureBls48581.PublicKey == nil || frame.Header.PublicKeySignatureBls48581.PublicKey.KeyValue == nil { e.logger.Debug("frame validation missing signature") - globalFrameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc() + globalFrameValidationTotal.WithLabelValues("reject").Inc() return p2p.ValidationResultReject } valid, err := e.globalFrameValidator.Validate(frame) if err != nil { e.logger.Debug("frame validation error", zap.Error(err)) - globalFrameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc() + globalFrameValidationTotal.WithLabelValues("reject").Inc() return p2p.ValidationResultReject } if !valid { e.logger.Debug("invalid frame") - globalFrameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc() + globalFrameValidationTotal.WithLabelValues("reject").Inc() return p2p.ValidationResultReject } @@ -382,7 +376,7 @@ func (e *AppConsensusEngine) validateGlobalFrameMessage( return p2p.ValidationResultIgnore } - globalFrameValidationTotal.WithLabelValues(e.appAddressHex, "accept").Inc() + globalFrameValidationTotal.WithLabelValues("accept").Inc() default: return p2p.ValidationResultReject @@ -492,6 +486,27 @@ func (e *AppConsensusEngine) validatePeerInfoMessage( ) return p2p.ValidationResultIgnore } + case protobufs.KeyRegistryType: + keyRegistry := &protobufs.KeyRegistry{} + if err := keyRegistry.FromCanonicalBytes(message.Data); err != nil { + e.logger.Debug("failed to unmarshal key registry", zap.Error(err)) + return p2p.ValidationResultReject + } + + if err := keyRegistry.Validate(); err != nil { + e.logger.Debug("key registry validation error", zap.Error(err)) + return p2p.ValidationResultReject + } + + now := time.Now().UnixMilli() + if int64(keyRegistry.LastUpdated) < now-1000 { + e.logger.Debug("key registry timestamp too old") + return p2p.ValidationResultIgnore + } + if int64(keyRegistry.LastUpdated) > now+5000 { + e.logger.Debug("key registry timestamp too far in future") + return p2p.ValidationResultIgnore + } default: e.logger.Debug("received unknown type", zap.Uint32("type", typePrefix)) diff --git a/node/consensus/global/consensus_leader_provider.go b/node/consensus/global/consensus_leader_provider.go index c562520..8179500 100644 --- a/node/consensus/global/consensus_leader_provider.go +++ b/node/consensus/global/consensus_leader_provider.go @@ -71,6 +71,15 @@ func (p *GlobalLeaderProvider) ProveNextState( filter []byte, priorState models.Identity, ) (**protobufs.GlobalFrame, error) { + if !p.engine.tryBeginProvingRank(rank) { + frameProvingTotal.WithLabelValues("error").Inc() + return nil, models.NewNoVoteErrorf( + "in-progress proving for rank %d", + rank, + ) + } + defer p.engine.endProvingRank(rank) + latestQC, qcErr := p.engine.clockStore.GetLatestQuorumCertificate(nil) if qcErr != nil { p.engine.logger.Debug( diff --git a/node/consensus/global/coverage_events.go b/node/consensus/global/coverage_events.go index 995acf1..c0bd224 100644 --- a/node/consensus/global/coverage_events.go +++ b/node/consensus/global/coverage_events.go @@ -115,7 +115,7 @@ func (e *GlobalConsensusEngine) checkShardCoverage(frameNumber uint64) error { } else { remaining = int(haltGraceFrames - streak.Count) } - if remaining <= 0 { + if remaining <= 0 && e.config.P2P.Network == 0 { e.logger.Error( "CRITICAL: Shard has insufficient coverage - triggering network halt", zap.String("shard_address", hex.EncodeToString([]byte(shardAddress))), @@ -212,7 +212,7 @@ func (e *GlobalConsensusEngine) handleLowCoverage( e.emitCoverageEvent( typesconsensus.ControlEventCoverageWarn, &typesconsensus.CoverageEventData{ - ShardAddress: shardAddress, + ShardAddress: shardAddress, // buildutils:allow-slice-alias slice is static ProverCount: coverage.ProverCount, RequiredProvers: int(minProvers), AttestedStorage: coverage.AttestedStorage, @@ -277,7 +277,7 @@ func (e *GlobalConsensusEngine) handleLowCoverage( e.emitCoverageEvent( typesconsensus.ControlEventCoverageWarn, &typesconsensus.CoverageEventData{ - ShardAddress: shardAddress, + ShardAddress: shardAddress, // buildutils:allow-slice-alias slice is static ProverCount: coverage.ProverCount, RequiredProvers: int(minProvers), AttestedStorage: coverage.AttestedStorage, @@ -291,7 +291,7 @@ func (e *GlobalConsensusEngine) handleLowCoverage( e.emitCoverageEvent( typesconsensus.ControlEventCoverageWarn, &typesconsensus.CoverageEventData{ - ShardAddress: shardAddress, + ShardAddress: shardAddress, // buildutils:allow-slice-alias slice is static ProverCount: coverage.ProverCount, RequiredProvers: int(minProvers), AttestedStorage: coverage.AttestedStorage, @@ -328,7 +328,7 @@ func (e *GlobalConsensusEngine) handleHighCoverage( // Emit split eligible event e.emitSplitEvent(&typesconsensus.ShardSplitEventData{ - ShardAddress: shardAddress, + ShardAddress: shardAddress, // buildutils:allow-slice-alias slice is static ProverCount: coverage.ProverCount, AttestedStorage: coverage.AttestedStorage, ProposedShards: proposedShards, diff --git a/node/consensus/global/event_distributor.go b/node/consensus/global/event_distributor.go index 8fb0b47..73ab8c2 100644 --- a/node/consensus/global/event_distributor.go +++ b/node/consensus/global/event_distributor.go @@ -276,6 +276,8 @@ func (e *GlobalConsensusEngine) eventDistributorLoop( } } +const pendingFilterGraceFrames = 720 + func (e *GlobalConsensusEngine) emitCoverageEvent( eventType typesconsensus.ControlEventType, data *typesconsensus.CoverageEventData, @@ -390,6 +392,7 @@ func (e *GlobalConsensusEngine) evaluateForProposals( allowProposals bool, ) { self, effectiveSeniority := e.allocationContext() + e.reconcileWorkerAllocations(data.Frame.Header.FrameNumber, self) e.checkExcessPendingJoins(self, data.Frame.Header.FrameNumber) canPropose, skipReason := e.joinProposalReady(data.Frame.Header.FrameNumber) @@ -416,6 +419,7 @@ func (e *GlobalConsensusEngine) evaluateForProposals( proposalDescriptors, 100, worldBytes, + data.Frame.Header.FrameNumber, ) if err != nil { e.logger.Error("could not plan shard allocations", zap.Error(err)) @@ -518,6 +522,114 @@ func (s *allocationSnapshot) proposalSnapshotFields() []zap.Field { } } +func (e *GlobalConsensusEngine) reconcileWorkerAllocations( + frameNumber uint64, + self *typesconsensus.ProverInfo, +) { + if e.workerManager == nil { + return + } + + workers, err := e.workerManager.RangeWorkers() + if err != nil { + e.logger.Warn("could not load workers for reconciliation", zap.Error(err)) + return + } + + filtersToWorkers := make(map[string]*store.WorkerInfo, len(workers)) + freeWorkers := make([]*store.WorkerInfo, 0, len(workers)) + for _, worker := range workers { + if worker == nil { + continue + } + if len(worker.Filter) == 0 { + freeWorkers = append(freeWorkers, worker) + continue + } + filtersToWorkers[string(worker.Filter)] = worker + } + + seenFilters := make(map[string]struct{}) + if self != nil { + for _, alloc := range self.Allocations { + if len(alloc.ConfirmationFilter) == 0 { + continue + } + + key := string(alloc.ConfirmationFilter) + worker, ok := filtersToWorkers[key] + if !ok { + if len(freeWorkers) == 0 { + e.logger.Warn( + "no free worker available for registry allocation", + zap.String("filter", hex.EncodeToString(alloc.ConfirmationFilter)), + ) + continue + } + worker = freeWorkers[0] + freeWorkers = freeWorkers[1:] + worker.Filter = slices.Clone(alloc.ConfirmationFilter) + } + + seenFilters[key] = struct{}{} + + desiredAllocated := alloc.Status == typesconsensus.ProverStatusActive || + alloc.Status == typesconsensus.ProverStatusPaused + + pendingFrame := alloc.JoinFrameNumber + if desiredAllocated { + pendingFrame = 0 + } + + if worker.Allocated != desiredAllocated || + worker.PendingFilterFrame != pendingFrame { + worker.Allocated = desiredAllocated + worker.PendingFilterFrame = pendingFrame + if err := e.workerManager.RegisterWorker(worker); err != nil { + e.logger.Warn( + "failed to update worker allocation state", + zap.Uint("core_id", worker.CoreId), + zap.Error(err), + ) + } + } + } + } + + for _, worker := range workers { + if worker == nil || len(worker.Filter) == 0 { + continue + } + if _, ok := seenFilters[string(worker.Filter)]; ok { + continue + } + + if worker.PendingFilterFrame != 0 { + if frameNumber <= worker.PendingFilterFrame { + continue + } + if frameNumber-worker.PendingFilterFrame < pendingFilterGraceFrames { + continue + } + } + + if worker.PendingFilterFrame == 0 && self == nil { + continue + } + + worker.Filter = nil + worker.Allocated = false + worker.PendingFilterFrame = 0 + if err := e.workerManager.RegisterWorker(worker); err != nil { + e.logger.Warn( + "failed to clear stale worker filter", + zap.Uint("core_id", worker.CoreId), + zap.Error(err), + ) + } + } +} + func (e *GlobalConsensusEngine) collectAllocationSnapshot( ctx context.Context, data *consensustime.GlobalEvent, @@ -719,8 +831,17 @@ func (e *GlobalConsensusEngine) collectAllocationSnapshot( above := []*typesconsensus.ProverInfo{} for _, i := range prs { - if i.Seniority >= effectiveSeniority { - above = append(above, i) + for _, a := range i.Allocations { + if !bytes.Equal(a.ConfirmationFilter, bp) { + continue + } + if a.Status == typesconsensus.ProverStatusActive || + a.Status == typesconsensus.ProverStatusJoining { + if i.Seniority >= effectiveSeniority { + above = append(above, i) + } + break + } } } @@ -1027,7 +1148,7 @@ func (e *GlobalConsensusEngine) getAppShardsFromProver( response, err := client.GetAppShards( getCtx, &protobufs.GetAppShardsRequest{ - ShardKey: shardKey, + ShardKey: shardKey, // buildutils:allow-slice-alias slice is static }, // The message size limits are swapped because the server is the one // sending the data. diff --git a/node/consensus/global/genesis.go b/node/consensus/global/genesis.go index 62d7dcd..d25d10c 100644 --- a/node/consensus/global/genesis.go +++ b/node/consensus/global/genesis.go @@ -7,6 +7,7 @@ import ( "encoding/binary" "encoding/hex" "encoding/json" + "errors" "math/big" "slices" "time" @@ -16,6 +17,8 @@ import ( "github.com/libp2p/go-libp2p/core/peer" "github.com/mr-tron/base58" "go.uber.org/zap" + "source.quilibrium.com/quilibrium/monorepo/config" + "source.quilibrium.com/quilibrium/monorepo/consensus" "source.quilibrium.com/quilibrium/monorepo/consensus/models" hgcrdt "source.quilibrium.com/quilibrium/monorepo/hypergraph" globalintrinsics "source.quilibrium.com/quilibrium/monorepo/node/execution/intrinsics/global" @@ -23,7 +26,11 @@ import ( "source.quilibrium.com/quilibrium/monorepo/node/execution/intrinsics/token" hgstate "source.quilibrium.com/quilibrium/monorepo/node/execution/state/hypergraph" "source.quilibrium.com/quilibrium/monorepo/protobufs" + typesconsensus "source.quilibrium.com/quilibrium/monorepo/types/consensus" + "source.quilibrium.com/quilibrium/monorepo/types/crypto" "source.quilibrium.com/quilibrium/monorepo/types/execution/intrinsics" + "source.quilibrium.com/quilibrium/monorepo/types/hypergraph" + typeskeys "source.quilibrium.com/quilibrium/monorepo/types/keys" "source.quilibrium.com/quilibrium/monorepo/types/schema" "source.quilibrium.com/quilibrium/monorepo/types/store" "source.quilibrium.com/quilibrium/monorepo/types/tries" @@ -55,6 +62,30 @@ func (e *GlobalConsensusEngine) getMainnetGenesisJSON() *GenesisJson { return genesisData } +// ExpectedGenesisFrameNumber returns the frame number the node should treat as +// genesis for the provided configuration (mainnet vs. dev/test). +func ExpectedGenesisFrameNumber( + cfg *config.Config, + logger *zap.Logger, +) uint64 { + if cfg != nil && cfg.P2P.Network == 0 { + genesisData := &GenesisJson{} + if err := json.Unmarshal(mainnetGenesisJSON, genesisData); err != nil { + if logger != nil { + logger.Error( + "failed to parse embedded genesis data", + zap.Error(err), + ) + } + return 0 + } + if genesisData.FrameNumber > 0 { + return genesisData.FrameNumber + } + } + return 0 +} + // TODO[2.1.1+]: Refactor out direct hypergraph access func (e *GlobalConsensusEngine) initializeGenesis() ( *protobufs.GlobalFrame, @@ -695,6 +726,41 @@ func (e *GlobalConsensusEngine) createStubGenesis() *protobufs.GlobalFrame { return genesisFrame } +// InitializeGenesisState ensures the global genesis frame and QC exist using the +// provided stores and executors. It is primarily used by components that need +// the genesis state (like app consensus engines) without instantiating the full +// global consensus engine. +func InitializeGenesisState( + logger *zap.Logger, + cfg *config.Config, + clockStore store.ClockStore, + shardsStore store.ShardsStore, + hypergraph hypergraph.Hypergraph, + consensusStore consensus.ConsensusStore[*protobufs.ProposalVote], + inclusionProver crypto.InclusionProver, + keyManager typeskeys.KeyManager, + proverRegistry typesconsensus.ProverRegistry, +) (*protobufs.GlobalFrame, *protobufs.QuorumCertificate, error) { + engine := &GlobalConsensusEngine{ + logger: logger, + config: cfg, + clockStore: clockStore, + shardsStore: shardsStore, + hypergraph: hypergraph, + consensusStore: consensusStore, + inclusionProver: inclusionProver, + keyManager: keyManager, + proverRegistry: proverRegistry, + frameStore: make(map[string]*protobufs.GlobalFrame), + } + + frame, qc := engine.initializeGenesis() + if frame == nil || qc == nil { + return nil, nil, errors.New("failed to initialize global genesis") + } + return frame, qc, nil +} + func (e *GlobalConsensusEngine) establishMainnetGenesisProvers( state *hgstate.HypergraphState, genesisData *GenesisJson, diff --git a/node/consensus/global/global_consensus_engine.go b/node/consensus/global/global_consensus_engine.go index 4305204..fbb0607 100644 --- a/node/consensus/global/global_consensus_engine.go +++ b/node/consensus/global/global_consensus_engine.go @@ -194,6 +194,8 @@ type GlobalConsensusEngine struct { proposalCacheMu sync.RWMutex pendingCertifiedParents map[uint64]*protobufs.GlobalProposal pendingCertifiedParentsMu sync.RWMutex + activeProveRanks map[uint64]struct{} + activeProveRanksMu sync.Mutex appFrameStore map[string]*protobufs.AppShardFrame appFrameStoreMu sync.RWMutex lowCoverageStreak map[string]*coverageStreak @@ -231,10 +233,13 @@ type GlobalConsensusEngine struct { livenessProvider *GlobalLivenessProvider // Cross-provider state - collectedMessages [][]byte - shardCommitments [][]byte - proverRoot []byte - commitmentHash []byte + collectedMessages [][]byte + shardCommitments [][]byte + proverRoot []byte + commitmentHash []byte + shardCommitmentTrees []*tries.VectorCommitmentTree + shardCommitmentKeySets []map[string]struct{} + shardCommitmentMu sync.Mutex // Authentication provider authProvider channel.AuthenticationProvider @@ -313,6 +318,9 @@ func NewGlobalConsensusEngine( appFrameStore: make(map[string]*protobufs.AppShardFrame), proposalCache: make(map[uint64]*protobufs.GlobalProposal), pendingCertifiedParents: make(map[uint64]*protobufs.GlobalProposal), + activeProveRanks: make(map[uint64]struct{}), + shardCommitmentTrees: make([]*tries.VectorCommitmentTree, 256), + shardCommitmentKeySets: make([]map[string]struct{}, 256), globalConsensusMessageQueue: make(chan *pb.Message, 1000), globalFrameMessageQueue: make(chan *pb.Message, 100), globalProverMessageQueue: make(chan *pb.Message, 1000), @@ -577,6 +585,12 @@ func NewGlobalConsensusEngine( *protobufs.GlobalFrame, *protobufs.ProposalVote, ]{} + if _, rebuildErr := engine.rebuildShardCommitments( + frame.Header.FrameNumber+1, + frame.Header.Rank+1, + ); rebuildErr != nil { + panic(rebuildErr) + } } if err != nil { establishGenesis() @@ -739,6 +753,7 @@ func NewGlobalConsensusEngine( config, nil, engine.proverAddress, + nil, ) // Add sync provider @@ -784,6 +799,7 @@ func NewGlobalConsensusEngine( config, nil, engine.proverAddress, + nil, ) } @@ -974,6 +990,24 @@ func NewGlobalConsensusEngine( return engine, nil } +func (e *GlobalConsensusEngine) tryBeginProvingRank(rank uint64) bool { + e.activeProveRanksMu.Lock() + defer e.activeProveRanksMu.Unlock() + + if _, exists := e.activeProveRanks[rank]; exists { + return false + } + + e.activeProveRanks[rank] = struct{}{} + return true +} + +func (e *GlobalConsensusEngine) endProvingRank(rank uint64) { + e.activeProveRanksMu.Lock() + delete(e.activeProveRanks, rank) + e.activeProveRanksMu.Unlock() +} + func (e *GlobalConsensusEngine) setupGRPCServer() error { // Parse the StreamListenMultiaddr to get the listen address listenAddr := "0.0.0.0:8340" // Default @@ -1533,6 +1567,10 @@ func (e *GlobalConsensusEngine) materialize( requests := frame.Requests expectedProverRoot := frame.Header.ProverTreeCommitment proposer := frame.Header.Prover + start := time.Now() + var appliedCount atomic.Int64 + var skippedCount atomic.Int64 + _, err := e.hypergraph.Commit(frameNumber) if err != nil { e.logger.Error("error committing hypergraph", zap.Error(err)) @@ -1555,13 +1593,15 @@ func (e *GlobalConsensusEngine) materialize( eg.SetLimit(len(requests)) for i, request := range requests { + idx := i + req := request eg.Go(func() error { - requestBytes, err := request.ToCanonicalBytes() + requestBytes, err := req.ToCanonicalBytes() if err != nil { e.logger.Error( "error serializing request", - zap.Int("message_index", i), + zap.Int("message_index", idx), zap.Error(err), ) return errors.Wrap(err, "materialize") @@ -1570,7 +1610,7 @@ func (e *GlobalConsensusEngine) materialize( if len(requestBytes) == 0 { e.logger.Error( "empty request bytes", - zap.Int("message_index", i), + zap.Int("message_index", idx), ) return errors.Wrap(errors.New("empty request"), "materialize") } @@ -1579,9 +1619,10 @@ func (e *GlobalConsensusEngine) materialize( if err != nil { e.logger.Error( "invalid message", - zap.Int("message_index", i), + zap.Int("message_index", idx), zap.Error(err), ) + skippedCount.Add(1) return nil } @@ -1608,11 +1649,13 @@ func (e *GlobalConsensusEngine) materialize( if err != nil { e.logger.Error( "error processing message", - zap.Int("message_index", i), + zap.Int("message_index", idx), zap.Error(err), ) + skippedCount.Add(1) return nil } + appliedCount.Add(1) return nil }) @@ -1622,6 +1665,11 @@ func (e *GlobalConsensusEngine) materialize( return err } + err = e.proverRegistry.PruneOrphanJoins(frameNumber) + if err != nil { + return errors.Wrap(err, "materialize") + } + if err := state.Commit(); err != nil { return errors.Wrap(err, "materialize") } @@ -1631,51 +1679,102 @@ func (e *GlobalConsensusEngine) materialize( return errors.Wrap(err, "materialize") } - if !e.config.Engine.ArchiveMode || e.config.P2P.Network == 99 { - if e.verifyProverRoot(frameNumber, expectedProverRoot, proposer) { + shouldVerifyRoot := !e.config.Engine.ArchiveMode || e.config.P2P.Network == 99 + localProverRoot, localRootErr := e.computeLocalProverRoot(frameNumber) + if localRootErr != nil { + logMsg := "failed to compute local prover root" + if shouldVerifyRoot { + e.logger.Warn( + logMsg, + zap.Uint64("frame_number", frameNumber), + zap.Error(localRootErr), + ) + } else { + e.logger.Debug( + logMsg, + zap.Uint64("frame_number", frameNumber), + zap.Error(localRootErr), + ) + } + } + + if len(localProverRoot) > 0 && shouldVerifyRoot { + if e.verifyProverRoot( + frameNumber, + expectedProverRoot, + localProverRoot, + proposer, + ) { e.reconcileLocalWorkerAllocations() } } + var expectedRootHex string + if len(expectedProverRoot) > 0 { + expectedRootHex = hex.EncodeToString(expectedProverRoot) + } + localRootHex := "" + if len(localProverRoot) > 0 { + localRootHex = hex.EncodeToString(localProverRoot) + } + + e.logger.Info( + "materialized global frame", + zap.Uint64("frame_number", frameNumber), + zap.Int("request_count", len(requests)), + zap.Int("applied_requests", int(appliedCount.Load())), + zap.Int("skipped_requests", int(skippedCount.Load())), + zap.String("expected_root", expectedRootHex), + zap.String("local_root", localRootHex), + zap.String("proposer", hex.EncodeToString(proposer)), + zap.Duration("duration", time.Since(start)), + ) + return nil } +func (e *GlobalConsensusEngine) computeLocalProverRoot( + frameNumber uint64, +) ([]byte, error) { + if e.hypergraph == nil { + return nil, errors.New("hypergraph unavailable") + } + + commitSet, err := e.hypergraph.Commit(frameNumber) + if err != nil { + return nil, errors.Wrap(err, "compute local prover root") + } + + var zeroShardKey tries.ShardKey + for shardKey, phaseCommits := range commitSet { + if shardKey.L1 == zeroShardKey.L1 { + if len(phaseCommits) == 0 || len(phaseCommits[0]) == 0 { + return nil, errors.New("empty prover root commitment") + } + return slices.Clone(phaseCommits[0]), nil + } + } + + return nil, errors.New("prover root shard missing") +} + func (e *GlobalConsensusEngine) verifyProverRoot( frameNumber uint64, expected []byte, + localRoot []byte, proposer []byte, ) bool { - if len(expected) == 0 || e.hypergraph == nil { + if len(expected) == 0 || len(localRoot) == 0 { return true } - roots, err := e.hypergraph.GetShardCommits( - frameNumber, - intrinsics.GLOBAL_INTRINSIC_ADDRESS[:], - ) - if err != nil || len(roots) == 0 || len(roots[0]) == 0 { - if err != nil { - e.logger.Warn( - "failed to load local prover root", - zap.Uint64("frame_number", frameNumber), - zap.Error(err), - ) - } else { - e.logger.Warn( - "local prover root missing", - zap.Uint64("frame_number", frameNumber), - ) - } - return false - } - - localRoot := roots[0] if !bytes.Equal(localRoot, expected) { - e.logger.Debug( + e.logger.Warn( "prover root mismatch", zap.Uint64("frame_number", frameNumber), zap.String("expected_root", hex.EncodeToString(expected)), zap.String("local_root", hex.EncodeToString(localRoot)), + zap.String("proposer", hex.EncodeToString(proposer)), ) e.proverRootSynced.Store(false) e.proverRootVerifiedFrame.Store(0) @@ -1683,6 +1782,13 @@ func (e *GlobalConsensusEngine) verifyProverRoot( return false } + e.logger.Debug( + "prover root verified", + zap.Uint64("frame_number", frameNumber), + zap.String("root", hex.EncodeToString(localRoot)), + zap.String("proposer", hex.EncodeToString(proposer)), + ) + e.proverRootSynced.Store(true) e.proverRootVerifiedFrame.Store(frameNumber) return true @@ -1710,7 +1816,7 @@ func (e *GlobalConsensusEngine) triggerProverHypersync(proposer []byte) { L1: [3]byte{0x00, 0x00, 0x00}, L2: intrinsics.GLOBAL_INTRINSIC_ADDRESS, } - e.syncProvider.HyperSync(ctx, proposer, shardKey) + e.syncProvider.HyperSync(ctx, proposer, shardKey, nil) if err := e.proverRegistry.Refresh(); err != nil { e.logger.Warn( "failed to refresh prover registry after hypersync", @@ -3411,6 +3517,19 @@ func (e *GlobalConsensusEngine) OnOwnProposal( PriorRankTimeoutCertificate: priorTC, Vote: *proposal.Vote, } + frame := pbProposal.State + var proverRootHex string + if frame.Header != nil { + proverRootHex = hex.EncodeToString(frame.Header.ProverTreeCommitment) + } + e.logger.Info( + "publishing own global proposal", + zap.Uint64("rank", frame.GetRank()), + zap.Uint64("frame_number", frame.GetFrameNumber()), + zap.Int("request_count", len(frame.GetRequests())), + zap.String("prover_root", proverRootHex), + zap.String("proposer", hex.EncodeToString([]byte(frame.Source()))), + ) data, err := pbProposal.ToCanonicalBytes() if err != nil { e.logger.Error("could not serialize proposal", zap.Error(err)) @@ -3712,11 +3831,24 @@ func (e *GlobalConsensusEngine) OnQuorumCertificateTriggeredRankChange( // OnRankChange implements consensus.Consumer. func (e *GlobalConsensusEngine) OnRankChange(oldRank uint64, newRank uint64) { + if e.currentRank == newRank { + return + } + e.currentRank = newRank - prior, err := e.clockStore.GetLatestGlobalClockFrame() + qc, err := e.clockStore.GetLatestQuorumCertificate(nil) if err != nil { - e.logger.Error("new rank, no latest global clock frame") + e.logger.Error("new rank, no latest QC") + frameProvingTotal.WithLabelValues("error").Inc() + return + } + prior, err := e.clockStore.GetGlobalClockFrameCandidate( + qc.FrameNumber, + []byte(qc.Identity()), + ) + if err != nil { + e.logger.Error("new rank, no global clock frame candidate") frameProvingTotal.WithLabelValues("error").Inc() return } @@ -3734,11 +3866,6 @@ func (e *GlobalConsensusEngine) rebuildShardCommitments( frameNumber uint64, rank uint64, ) ([]byte, error) { - commitments := make([]*tries.VectorCommitmentTree, 256) - for i := range commitments { - commitments[i] = &tries.VectorCommitmentTree{} - } - commitSet, err := e.hypergraph.Commit(frameNumber) if err != nil { e.logger.Error("could not commit", zap.Error(err)) @@ -3753,48 +3880,141 @@ func (e *GlobalConsensusEngine) rebuildShardCommitments( ) } + e.shardCommitmentMu.Lock() + defer e.shardCommitmentMu.Unlock() + + if e.shardCommitmentTrees == nil { + e.shardCommitmentTrees = make([]*tries.VectorCommitmentTree, 256) + } + if e.shardCommitmentKeySets == nil { + e.shardCommitmentKeySets = make([]map[string]struct{}, 256) + } + if e.shardCommitments == nil { + e.shardCommitments = make([][]byte, 256) + } + + currentKeySets := make([]map[string]struct{}, 256) + changedTrees := make([]bool, 256) + proverRoot := make([]byte, 64) collected := 0 + var zeroShardKeyL1 [3]byte - for sk, s := range commitSet { - if !bytes.Equal(sk.L1[:], []byte{0x00, 0x00, 0x00}) { - collected++ - - for phaseSet := 0; phaseSet < 4; phaseSet++ { - commit := s[phaseSet] - foldedShardKey := make([]byte, 32) - copy(foldedShardKey, sk.L2[:]) - - foldedShardKey[0] |= byte(phaseSet << 6) - for l1Idx := 0; l1Idx < 3; l1Idx++ { - if err := commitments[sk.L1[l1Idx]].Insert( - foldedShardKey, - commit, - nil, - big.NewInt(int64(len(commit))), - ); err != nil { - return nil, errors.Wrap(err, "rebuild shard commitments") - } - } + for sk, phaseCommits := range commitSet { + if sk.L1 == zeroShardKeyL1 { + if len(phaseCommits) > 0 { + proverRoot = slices.Clone(phaseCommits[0]) + } + continue + } + + collected++ + + for phaseSet := 0; phaseSet < len(phaseCommits); phaseSet++ { + commit := phaseCommits[phaseSet] + foldedShardKey := make([]byte, 32) + copy(foldedShardKey, sk.L2[:]) + + foldedShardKey[0] |= byte(phaseSet << 6) + keyStr := string(foldedShardKey) + var valueCopy []byte + + for l1Idx := 0; l1Idx < len(sk.L1); l1Idx++ { + index := int(sk.L1[l1Idx]) + if index >= len(e.shardCommitmentTrees) { + e.logger.Warn( + "shard commitment index out of range", + zap.Int("index", index), + ) + continue + } + + if e.shardCommitmentTrees[index] == nil { + e.shardCommitmentTrees[index] = &tries.VectorCommitmentTree{} + } + + if currentKeySets[index] == nil { + currentKeySets[index] = make(map[string]struct{}) + } + currentKeySets[index][keyStr] = struct{}{} + + tree := e.shardCommitmentTrees[index] + if existing, err := tree.Get(foldedShardKey); err == nil && + bytes.Equal(existing, commit) { + continue + } + + if valueCopy == nil { + valueCopy = slices.Clone(commit) + } + + if err := tree.Insert( + foldedShardKey, + valueCopy, + nil, + big.NewInt(int64(len(commit))), + ); err != nil { + return nil, errors.Wrap(err, "rebuild shard commitments") + } + + changedTrees[index] = true } - } else { - proverRoot = s[0] } } - shardCommitments := make([][]byte, 256) - for i := 0; i < 256; i++ { - shardCommitments[i] = commitments[i].Commit(e.inclusionProver, false) + for idx := 0; idx < len(e.shardCommitmentTrees); idx++ { + prevKeys := e.shardCommitmentKeySets[idx] + currKeys := currentKeySets[idx] + + if len(prevKeys) > 0 { + for key := range prevKeys { + if currKeys != nil { + if _, ok := currKeys[key]; ok { + continue + } + } + + tree := e.shardCommitmentTrees[idx] + if tree == nil { + continue + } + + if err := tree.Delete([]byte(key)); err != nil { + e.logger.Debug( + "failed to delete shard commitment leaf", + zap.Int("shard_index", idx), + zap.Error(err), + ) + continue + } + + changedTrees[idx] = true + } + } + + e.shardCommitmentKeySets[idx] = currKeys + } + + for i := 0; i < len(e.shardCommitmentTrees); i++ { + if e.shardCommitmentTrees[i] == nil { + e.shardCommitmentTrees[i] = &tries.VectorCommitmentTree{} + } + + if changedTrees[i] || e.shardCommitments[i] == nil { + e.shardCommitments[i] = e.shardCommitmentTrees[i].Commit( + e.inclusionProver, + false, + ) + } } preimage := slices.Concat( - slices.Concat(shardCommitments...), + slices.Concat(e.shardCommitments...), proverRoot, ) commitmentHash := sha3.Sum256(preimage) - e.shardCommitments = shardCommitments e.proverRoot = proverRoot e.commitmentHash = commitmentHash[:] @@ -4117,7 +4337,7 @@ func (e *GlobalConsensusEngine) getPendingProposals( parent, err := e.clockStore.GetQuorumCertificate(nil, startRank) if err != nil { - panic(err) + return result } for rank := startRank + 1; rank <= endRank; rank++ { diff --git a/node/consensus/global/global_consensus_engine_integration_test.go b/node/consensus/global/global_consensus_engine_integration_test.go index 67a13cf..5b437a4 100644 --- a/node/consensus/global/global_consensus_engine_integration_test.go +++ b/node/consensus/global/global_consensus_engine_integration_test.go @@ -466,7 +466,7 @@ func createIntegrationTestGlobalConsensusEngineWithHypergraphAndKey( hg = sharedHypergraph } else { hypergraphStore := store.NewPebbleHypergraphStore(&config.DBConfig{InMemoryDONOTUSE: true, Path: ".test/global"}, pebbleDB, logger, verifiableEncryptor, inclusionProver) - hg = hgcrdt.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}) + hg = hgcrdt.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}, 1) } // Create key store @@ -655,7 +655,7 @@ func TestGlobalConsensusEngine_Integration_MultiNodeConsensus(t *testing.T) { // Create and register 6 provers (one for each node) for i := 0; i < 6; i++ { hypergraphStores[i] = store.NewPebbleHypergraphStore(&config.DBConfig{InMemoryDONOTUSE: true, Path: ".test/global_shared"}, pebbleDB, logger, verifiableEncryptor, inclusionProver) - hypergraphs[i] = hgcrdt.NewHypergraph(logger, hypergraphStores[i], inclusionProver, []int{}, &tests.Nopthenticator{}) + hypergraphs[i] = hgcrdt.NewHypergraph(logger, hypergraphStores[i], inclusionProver, []int{}, &tests.Nopthenticator{}, 1) } for i := 0; i < 6; i++ { tempKeyManager := keys.NewInMemoryKeyManager(bc, dc) @@ -864,7 +864,7 @@ func TestGlobalConsensusEngine_Integration_ShardCoverage(t *testing.T) { InMemoryDONOTUSE: true, Path: ".test/global", }, pebbleDB, zap.L(), &verenc.MPCitHVerifiableEncryptor{}, inclusionProver) - hg := hgcrdt.NewHypergraph(zap.NewNop(), hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}) + hg := hgcrdt.NewHypergraph(zap.NewNop(), hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}, 1) for i := range 6 { k := make([]byte, 585) k[1] = byte(i) @@ -972,7 +972,7 @@ func TestGlobalConsensusEngine_Integration_NoProversStaysInVerifying(t *testing. InMemoryDONOTUSE: true, Path: fmt.Sprintf(".test/global_no_provers_%d", nodeID), }, pebbleDB, logger, verifiableEncryptor, inclusionProver) - hg := hgcrdt.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}) + hg := hgcrdt.NewHypergraph(logger, hypergraphStore, inclusionProver, []int{}, &tests.Nopthenticator{}, 1) // Create prover registry - but don't register any provers proverRegistry, err := provers.NewProverRegistry(logger, hg) diff --git a/node/consensus/global/message_collector.go b/node/consensus/global/message_collector.go index 8fa3539..338e1d3 100644 --- a/node/consensus/global/message_collector.go +++ b/node/consensus/global/message_collector.go @@ -219,7 +219,7 @@ func (e *GlobalConsensusEngine) addGlobalMessage(data []byte) { return } - payload := data + payload := data // buildutils:allow-slice-alias slice is static if len(data) >= 4 { typePrefix := binary.BigEndian.Uint32(data[:4]) if typePrefix == protobufs.MessageBundleType { diff --git a/node/consensus/global/message_processors.go b/node/consensus/global/message_processors.go index 5971fbe..f011f77 100644 --- a/node/consensus/global/message_processors.go +++ b/node/consensus/global/message_processors.go @@ -6,6 +6,7 @@ import ( "crypto/sha256" "encoding/binary" "encoding/hex" + "errors" "fmt" "slices" "time" @@ -22,6 +23,7 @@ import ( "source.quilibrium.com/quilibrium/monorepo/lifecycle" "source.quilibrium.com/quilibrium/monorepo/protobufs" "source.quilibrium.com/quilibrium/monorepo/types/crypto" + "source.quilibrium.com/quilibrium/monorepo/types/store" "source.quilibrium.com/quilibrium/monorepo/types/tries" ) @@ -914,11 +916,23 @@ func (e *GlobalConsensusEngine) handleGlobalProposal( } }() + frame := proposal.State + var proverRootHex string + if frame.Header != nil { + proverRootHex = hex.EncodeToString(frame.Header.ProverTreeCommitment) + } + proposerHex := "" + if proposal.Vote != nil { + proposerHex = hex.EncodeToString([]byte(proposal.Vote.Identity())) + } e.logger.Debug( "handling global proposal", zap.Uint64("rank", proposal.GetRank()), - zap.Uint64("frame_number", proposal.State.GetFrameNumber()), - zap.String("id", hex.EncodeToString([]byte(proposal.State.Identity()))), + zap.Uint64("frame_number", frame.GetFrameNumber()), + zap.Int("request_count", len(frame.GetRequests())), + zap.String("id", hex.EncodeToString([]byte(frame.Identity()))), + zap.String("prover_root", proverRootHex), + zap.String("proposer", proposerHex), ) // Small gotcha: the proposal structure uses interfaces, so we can't assign @@ -1095,14 +1109,68 @@ func (e *GlobalConsensusEngine) tryRecoverFinalizedFrame( func (e *GlobalConsensusEngine) processProposal( proposal *protobufs.GlobalProposal, ) bool { + return e.processProposalInternal(proposal, false) +} + +func (e *GlobalConsensusEngine) processProposalInternal( + proposal *protobufs.GlobalProposal, + skipAncestors bool, +) bool { + if proposal == nil || proposal.State == nil || proposal.State.Header == nil { + return false + } + + if !skipAncestors { + if ok, err := e.ensureAncestorStates(proposal); err != nil { + e.logger.Warn( + "failed to recover ancestor states for proposal", + zap.Uint64("frame_number", proposal.State.Header.FrameNumber), + zap.Uint64("rank", proposal.State.Header.Rank), + zap.Error(err), + ) + e.requestAncestorSync(proposal) + return false + } else if !ok { + return false + } + } + + frame := proposal.State + var proverRootHex string + if frame.Header != nil { + proverRootHex = hex.EncodeToString(frame.Header.ProverTreeCommitment) + } + proposerHex := "" + if proposal.Vote != nil { + proposerHex = hex.EncodeToString([]byte(proposal.Vote.Identity())) + } e.logger.Debug( "processing proposal", zap.Uint64("rank", proposal.GetRank()), - zap.Uint64("frame_number", proposal.State.GetFrameNumber()), - zap.String("id", hex.EncodeToString([]byte(proposal.State.Identity()))), + zap.Uint64("frame_number", frame.GetFrameNumber()), + zap.Int("request_count", len(frame.GetRequests())), + zap.String("id", hex.EncodeToString([]byte(frame.Identity()))), + zap.String("prover_root", proverRootHex), + zap.String("proposer", proposerHex), ) - err := e.VerifyQuorumCertificate(proposal.ParentQuorumCertificate) + txn, err := e.clockStore.NewTransaction(false) + if err != nil { + return false + } + + err = e.clockStore.PutGlobalClockFrameCandidate(proposal.State, txn) + if err != nil { + txn.Abort() + return false + } + + if err = txn.Commit(); err != nil { + txn.Abort() + return false + } + + err = e.VerifyQuorumCertificate(proposal.ParentQuorumCertificate) if err != nil { e.logger.Debug( "proposal has invalid qc", @@ -1202,6 +1270,211 @@ func (e *GlobalConsensusEngine) processProposal( return true } +type ancestorDescriptor struct { + frameNumber uint64 + selector []byte +} + +func (e *GlobalConsensusEngine) ensureAncestorStates( + proposal *protobufs.GlobalProposal, +) (bool, error) { + ancestors, err := e.collectMissingAncestors(proposal) + if err != nil { + return false, err + } + + if len(ancestors) == 0 { + return true, nil + } + + for i := len(ancestors) - 1; i >= 0; i-- { + ancestor, err := e.buildStoredProposal(ancestors[i]) + if err != nil { + return false, err + } + if !e.processProposalInternal(ancestor, true) { + return false, fmt.Errorf( + "unable to process ancestor frame %d", + ancestors[i].frameNumber, + ) + } + } + + return true, nil +} + +func (e *GlobalConsensusEngine) collectMissingAncestors( + proposal *protobufs.GlobalProposal, +) ([]ancestorDescriptor, error) { + header := proposal.State.Header + if header == nil || header.FrameNumber == 0 { + return nil, nil + } + + finalized := e.forks.FinalizedState() + if finalized == nil || finalized.State == nil || (*finalized.State).Header == nil { + return nil, errors.New("finalized state unavailable") + } + finalizedFrame := (*finalized.State).Header.FrameNumber + finalizedSelector := []byte(finalized.Identifier) + + parentFrame := header.FrameNumber - 1 + parentSelector := slices.Clone(header.ParentSelector) + if len(parentSelector) == 0 { + return nil, nil + } + + var ancestors []ancestorDescriptor + anchored := false + for parentFrame > finalizedFrame && len(parentSelector) > 0 { + if _, found := e.forks.GetState( + models.Identity(string(parentSelector)), + ); found { + anchored = true + break + } + ancestors = append(ancestors, ancestorDescriptor{ + frameNumber: parentFrame, + selector: slices.Clone(parentSelector), + }) + + frame, err := e.clockStore.GetGlobalClockFrameCandidate( + parentFrame, + parentSelector, + ) + if err != nil { + return nil, err + } + if frame == nil || frame.Header == nil { + return nil, errors.New("ancestor frame missing header") + } + + parentFrame-- + parentSelector = slices.Clone(frame.Header.ParentSelector) + } + + if !anchored { + switch { + case parentFrame == finalizedFrame: + if !bytes.Equal(parentSelector, finalizedSelector) { + return nil, fmt.Errorf( + "ancestor chain not rooted at finalized frame %d", + finalizedFrame, + ) + } + anchored = true + case parentFrame < finalizedFrame: + return nil, fmt.Errorf( + "ancestor chain crossed finalized boundary (frame %d < %d)", + parentFrame, + finalizedFrame, + ) + case len(parentSelector) == 0: + return nil, errors.New( + "ancestor selector missing before reaching finalized state", + ) + } + } + + if !anchored { + return nil, errors.New("ancestor chain could not be anchored in forks") + } + + return ancestors, nil +} + +func (e *GlobalConsensusEngine) buildStoredProposal( + desc ancestorDescriptor, +) (*protobufs.GlobalProposal, error) { + frame, err := e.clockStore.GetGlobalClockFrameCandidate( + desc.frameNumber, + desc.selector, + ) + if err != nil { + return nil, err + } + if frame == nil || frame.Header == nil { + return nil, errors.New("stored ancestor missing header") + } + + var parentQC *protobufs.QuorumCertificate + if frame.GetRank() > 0 { + parentQC, err = e.clockStore.GetQuorumCertificate( + nil, + frame.GetRank()-1, + ) + if err != nil { + return nil, err + } + if parentQC == nil { + return nil, fmt.Errorf( + "missing parent qc for frame %d", + frame.GetRank()-1, + ) + } + } + + var priorTC *protobufs.TimeoutCertificate + if frame.GetRank() > 0 { + priorTC, err = e.clockStore.GetTimeoutCertificate( + nil, + frame.GetRank()-1, + ) + if err != nil && !errors.Is(err, store.ErrNotFound) { + return nil, err + } + if errors.Is(err, store.ErrNotFound) { + priorTC = nil + } + } + + vote, err := e.clockStore.GetProposalVote( + nil, + frame.GetRank(), + []byte(frame.Identity()), + ) + if err != nil { + return nil, err + } + + return &protobufs.GlobalProposal{ + State: frame, + ParentQuorumCertificate: parentQC, + PriorRankTimeoutCertificate: priorTC, + Vote: vote, + }, nil +} + +func (e *GlobalConsensusEngine) requestAncestorSync( + proposal *protobufs.GlobalProposal, +) { + if proposal == nil || proposal.State == nil || proposal.State.Header == nil { + return + } + if e.syncProvider == nil { + return + } + + peerID, err := e.getPeerIDOfProver(proposal.State.Header.Prover) + if err != nil { + peerID, err = e.getRandomProverPeerId() + if err != nil { + return + } + } + + head := e.forks.FinalizedState() + if head == nil || head.State == nil { + return + } + + e.syncProvider.AddState( + []byte(peerID), + (*head.State).Header.FrameNumber, + []byte(head.Identifier), + ) +} + func (e *GlobalConsensusEngine) cacheProposal( proposal *protobufs.GlobalProposal, ) { @@ -1345,16 +1618,7 @@ func (e *GlobalConsensusEngine) trySealParentWithChild( zap.Uint64("child_frame", header.FrameNumber), ) - head, err := e.clockStore.GetLatestGlobalClockFrame() - if err != nil { - e.logger.Error("error fetching time reel head", zap.Error(err)) - return - } - - if head.Header.FrameNumber+1 == parent.State.Header.FrameNumber { - e.addCertifiedState(parent, child) - } - + e.addCertifiedState(parent, child) e.pendingCertifiedParentsMu.Lock() delete(e.pendingCertifiedParents, parentFrame) e.pendingCertifiedParentsMu.Unlock() diff --git a/node/consensus/provers/proposer.go b/node/consensus/provers/proposer.go index 2212002..4718a1b 100644 --- a/node/consensus/provers/proposer.go +++ b/node/consensus/provers/proposer.go @@ -89,11 +89,14 @@ func NewManager( // PlanAndAllocate picks up to maxAllocations of the best shard filters and // updates the filter in the worker manager for each selected free worker. // If maxAllocations == 0, it will use as many free workers as available. +// frameNumber is recorded so pending joins survive restarts while the network +// processes the request. func (m *Manager) PlanAndAllocate( difficulty uint64, shards []ShardDescriptor, maxAllocations int, worldBytes *big.Int, + frameNumber uint64, ) ([]Proposal, error) { m.mu.Lock() isPlanning := m.isPlanning @@ -237,7 +240,7 @@ func (m *Manager) PlanAndAllocate( } if len(proposals) > 0 { - m.persistPlannedFilters(proposals, workerLookup) + m.persistPlannedFilters(proposals, workerLookup, frameNumber) } // Perform allocations @@ -263,6 +266,7 @@ func (m *Manager) PlanAndAllocate( func (m *Manager) persistPlannedFilters( proposals []Proposal, workers map[uint]*store.WorkerInfo, + frameNumber uint64, ) { for _, proposal := range proposals { info, ok := workers[proposal.WorkerId] @@ -288,6 +292,7 @@ func (m *Manager) persistPlannedFilters( copy(filterCopy, proposal.Filter) info.Filter = filterCopy info.Allocated = false + info.PendingFilterFrame = frameNumber if err := m.workerMgr.RegisterWorker(info); err != nil { m.logger.Warn( diff --git a/node/consensus/provers/proposer_test.go b/node/consensus/provers/proposer_test.go index 0a22ef4..a5aba0a 100644 --- a/node/consensus/provers/proposer_test.go +++ b/node/consensus/provers/proposer_test.go @@ -94,8 +94,9 @@ func createWorkers(n int) []*store.WorkerInfo { ws := make([]*store.WorkerInfo, n) for i := 0; i < n; i++ { ws[i] = &store.WorkerInfo{ - CoreId: uint(i + 1), - Allocated: false, + CoreId: uint(i + 1), + Allocated: false, + PendingFilterFrame: 0, } } return ws @@ -132,7 +133,7 @@ func TestPlanAndAllocate_EqualScores_RandomizedWhenNotDataGreedy(t *testing.T) { time.Sleep(5 * time.Millisecond) wm.lastFiltersHex = nil - _, err := m.PlanAndAllocate(100, shards, 1, big.NewInt(40000)) + _, err := m.PlanAndAllocate(100, shards, 1, big.NewInt(40000), uint64(i+1)) if err != nil { t.Fatalf("PlanAndAllocate failed: %v", err) } @@ -144,6 +145,7 @@ func TestPlanAndAllocate_EqualScores_RandomizedWhenNotDataGreedy(t *testing.T) { // Reset worker filter to simulate completion for _, worker := range wm.workers { worker.Filter = nil + worker.PendingFilterFrame = 0 } } @@ -172,7 +174,7 @@ func TestPlanAndAllocate_EqualSizes_DeterministicWhenDataGreedy(t *testing.T) { const runs = 16 for i := 0; i < runs; i++ { wm.lastFiltersHex = nil - _, err := m.PlanAndAllocate(100, shards, 1, big.NewInt(40000)) + _, err := m.PlanAndAllocate(100, shards, 1, big.NewInt(40000), uint64(i+1)) if err != nil { t.Fatalf("PlanAndAllocate failed: %v", err) } @@ -195,7 +197,7 @@ func TestPlanAndAllocate_UnequalScores_PicksMax(t *testing.T) { other2 := createShard([]byte{0x02}, 50_000, 0, 1) shards := []ShardDescriptor{other1, other2, best} - _, err := m.PlanAndAllocate(100, shards, 1, big.NewInt(300000)) + _, err := m.PlanAndAllocate(100, shards, 1, big.NewInt(300000), 1) if err != nil { t.Fatalf("PlanAndAllocate failed: %v", err) } diff --git a/node/consensus/provers/prover_registry.go b/node/consensus/provers/prover_registry.go index 00cc4ed..d5ffbc8 100644 --- a/node/consensus/provers/prover_registry.go +++ b/node/consensus/provers/prover_registry.go @@ -5,12 +5,14 @@ import ( "encoding/binary" "encoding/hex" "fmt" + "slices" "sort" "sync" + "github.com/iden3/go-iden3-crypto/poseidon" "github.com/pkg/errors" "go.uber.org/zap" - "golang.org/x/exp/slices" + hgcrdt "source.quilibrium.com/quilibrium/monorepo/hypergraph" "source.quilibrium.com/quilibrium/monorepo/node/execution/intrinsics/global" hgstate "source.quilibrium.com/quilibrium/monorepo/node/execution/state/hypergraph" "source.quilibrium.com/quilibrium/monorepo/types/consensus" @@ -402,6 +404,195 @@ func (r *ProverRegistry) UpdateProverActivity( return nil } +// PruneOrphanJoins implements ProverRegistry +func (r *ProverRegistry) PruneOrphanJoins(frameNumber uint64) error { + r.mu.Lock() + defer r.mu.Unlock() + + if frameNumber <= 760 { + return nil + } + + cutoff := frameNumber - 760 + var pruned int + + set := r.hypergraph.(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(tries.ShardKey{ + L1: [3]byte{0x00, 0x00, 0x00}, + L2: [32]byte(bytes.Repeat([]byte{0xff}, 32)), + }) + + txn, err := set.GetTree().Store.NewTransaction(false) + if err != nil { + return errors.Wrap(err, "prune orphan joins") + } + + for _, info := range r.proverCache { + if info == nil || len(info.Allocations) == 0 { + continue + } + + updated := info.Allocations[:0] + var removedFilters map[string]struct{} + + for _, allocation := range info.Allocations { + if allocation.Status == consensus.ProverStatusJoining && + allocation.JoinFrameNumber < cutoff { + if err := r.pruneAllocationVertex(txn, info, allocation); err != nil { + txn.Abort() + return errors.Wrap(err, "prune orphan joins") + } + + if removedFilters == nil { + removedFilters = make(map[string]struct{}) + } + removedFilters[string(allocation.ConfirmationFilter)] = struct{}{} + pruned++ + continue + } + + updated = append(updated, allocation) + } + + if len(updated) != len(info.Allocations) { + info.Allocations = updated + r.cleanupFilterCache(info, removedFilters) + } + } + + if pruned > 0 { + if err := txn.Commit(); err != nil { + return errors.Wrap(err, "prune orphan joins") + } + + r.logger.Info( + "pruned orphan prover allocations", + zap.Int("allocations_pruned", pruned), + zap.Uint64("frame_cutoff", cutoff), + ) + } else { + txn.Abort() + } + + return nil +} + +func (r *ProverRegistry) pruneAllocationVertex( + txn tries.TreeBackingStoreTransaction, + info *consensus.ProverInfo, + allocation consensus.ProverAllocationInfo, +) error { + if info == nil { + return errors.New("missing info") + } + if len(info.PublicKey) == 0 { + r.logger.Warn( + "unable to prune allocation without public key", + zap.String("address", hex.EncodeToString(info.Address)), + ) + return errors.New("invalid record") + } + + allocationHash, err := poseidon.HashBytes( + slices.Concat( + []byte("PROVER_ALLOCATION"), + info.PublicKey, + allocation.ConfirmationFilter, + ), + ) + if err != nil { + return errors.Wrap(err, "prune allocation hash") + } + + var vertexID [64]byte + copy(vertexID[:32], intrinsics.GLOBAL_INTRINSIC_ADDRESS[:]) + copy( + vertexID[32:], + allocationHash.FillBytes(make([]byte, 32)), + ) + + _, err = r.hypergraph.GetVertex(vertexID) + if err != nil { + if errors.Cause(err) == hypergraph.ErrRemoved { + return nil + } + r.logger.Debug( + "allocation vertex missing during prune", + zap.String("address", hex.EncodeToString(info.Address)), + zap.String( + "filter", + hex.EncodeToString(allocation.ConfirmationFilter), + ), + zap.Error(err), + ) + return nil + } + + set := r.hypergraph.(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(tries.ShardKey{ + L1: [3]byte{0x00, 0x00, 0x00}, + L2: [32]byte(bytes.Repeat([]byte{0xff}, 32)), + }) + + vtree := set.GetTree() + if err := vtree.Delete(txn, vertexID[:]); err != nil { + return errors.Wrap(err, "prune allocation remove vertex") + } + + return nil +} + +func (r *ProverRegistry) cleanupFilterCache( + info *consensus.ProverInfo, + filters map[string]struct{}, +) { + if len(filters) == 0 { + return + } + + for filterKey := range filters { + if r.proverHasFilter(info, filterKey) { + continue + } + r.removeFilterCacheEntry(filterKey, info) + } +} + +func (r *ProverRegistry) proverHasFilter( + info *consensus.ProverInfo, + filterKey string, +) bool { + if info == nil { + return false + } + for _, allocation := range info.Allocations { + if string(allocation.ConfirmationFilter) == filterKey { + return true + } + } + return false +} + +func (r *ProverRegistry) removeFilterCacheEntry( + filterKey string, + info *consensus.ProverInfo, +) { + provers, ok := r.filterCache[filterKey] + if !ok { + return + } + for i, candidate := range provers { + if candidate == info { + r.filterCache[filterKey] = append( + provers[:i], + provers[i+1:]..., + ) + break + } + } + if len(r.filterCache[filterKey]) == 0 { + delete(r.filterCache, filterKey) + } +} + // Helper method to get provers by status, returns lexicographic order func (r *ProverRegistry) getProversByStatusInternal( filter []byte, @@ -1664,7 +1855,7 @@ func (r *ProverRegistry) extractProverFromAddress( // Create ProverInfo proverInfo := &consensus.ProverInfo{ PublicKey: publicKey, - Address: proverAddress, + Address: proverAddress, // buildutils:allow-slice-alias slice is static Status: mappedStatus, AvailableStorage: availableStorage, Seniority: seniority, diff --git a/node/consensus/sync/app_sync_hooks.go b/node/consensus/sync/app_sync_hooks.go new file mode 100644 index 0000000..be4df87 --- /dev/null +++ b/node/consensus/sync/app_sync_hooks.go @@ -0,0 +1,303 @@ +package sync + +import ( + "bufio" + "bytes" + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "math/big" + "net/http" + "os" + "path" + "path/filepath" + "strings" + "time" + + "github.com/pkg/errors" + "go.uber.org/zap" + "source.quilibrium.com/quilibrium/monorepo/node/execution/intrinsics/token" + "source.quilibrium.com/quilibrium/monorepo/protobufs" + "source.quilibrium.com/quilibrium/monorepo/types/tries" + up2p "source.quilibrium.com/quilibrium/monorepo/utils/p2p" +) + +type AppSyncHooks struct { + shardAddress []byte + shardKey tries.ShardKey + snapshotPath string + network uint8 + tokenShard bool +} + +func NewAppSyncHooks( + shardAddress []byte, + snapshotPath string, + network uint8, +) *AppSyncHooks { + var shardKey tries.ShardKey + if len(shardAddress) > 0 { + l1 := up2p.GetBloomFilterIndices(shardAddress, 256, 3) + copy(shardKey.L1[:], l1) + copy(shardKey.L2[:], shardAddress[:min(len(shardAddress), 32)]) + } + + tokenShard := len(shardAddress) >= 32 && + bytes.Equal(shardAddress[:32], token.QUIL_TOKEN_ADDRESS[:]) + + return &AppSyncHooks{ + shardAddress: append([]byte(nil), shardAddress...), + shardKey: shardKey, + snapshotPath: snapshotPath, + network: network, + tokenShard: tokenShard, + } +} + +func (h *AppSyncHooks) BeforeMeshSync( + ctx context.Context, + p *SyncProvider[*protobufs.AppShardFrame, *protobufs.AppShardProposal], +) { + h.ensureHyperSync(ctx, p) + h.ensureSnapshot(p) +} + +func (h *AppSyncHooks) ensureHyperSync( + ctx context.Context, + p *SyncProvider[*protobufs.AppShardFrame, *protobufs.AppShardProposal], +) { + if p.forks == nil || len(h.shardAddress) == 0 { + return + } + + head := p.forks.FinalizedState() + if head == nil || head.State == nil { + return + } + + frame := *head.State + if frame == nil || frame.Header == nil { + return + } + + stateRoots, err := p.hypergraph.CommitShard( + frame.Header.FrameNumber, + h.shardAddress, + ) + if err != nil { + p.logger.Debug( + "could not compute shard commitments for hypersync check", + zap.Error(err), + ) + return + } + + mismatch := len(stateRoots) != len(frame.Header.StateRoots) + if !mismatch { + for i := range frame.Header.StateRoots { + if !bytes.Equal(stateRoots[i], frame.Header.StateRoots[i]) { + mismatch = true + break + } + } + } + + if mismatch { + p.logger.Info( + "detected divergence between local hypergraph and frame roots, initiating hypersync", + zap.Uint64("frame_number", frame.Header.FrameNumber), + ) + p.HyperSync(ctx, frame.Header.Prover, h.shardKey, frame.Header.Address) + } +} + +func (h *AppSyncHooks) ensureSnapshot( + p *SyncProvider[*protobufs.AppShardFrame, *protobufs.AppShardProposal], +) { + if !h.shouldAttemptSnapshot(p) { + return + } + + if err := downloadSnapshot(h.snapshotPath, h.network, h.shardAddress); err != nil { + p.logger.Warn("could not perform snapshot reload", zap.Error(err)) + return + } + + p.logger.Info( + "snapshot reload completed", + zap.String("path", h.snapshotPath), + ) +} + +func (h *AppSyncHooks) shouldAttemptSnapshot( + p *SyncProvider[*protobufs.AppShardFrame, *protobufs.AppShardProposal], +) bool { + if h.snapshotPath == "" || !h.tokenShard || h.network != 0 { + return false + } + + size := p.hypergraph.GetSize(nil, nil) + return size != nil && size.Cmp(big.NewInt(0)) == 0 +} + +func downloadSnapshot( + dbPath string, + network uint8, + lookupKey []byte, +) error { + if dbPath == "" { + return errors.New("snapshot path not configured") + } + + base := "https://frame-snapshots.quilibrium.com" + keyHex := fmt.Sprintf("%x", lookupKey) + + manifestURL := fmt.Sprintf("%s/%d/%s/manifest", base, network, keyHex) + resp, err := http.Get(manifestURL) + if err != nil { + return errors.Wrap(err, "download snapshot") + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return errors.Wrap( + fmt.Errorf("manifest http status %d", resp.StatusCode), + "download snapshot", + ) + } + + type mfLine struct { + Name string + Hash string + } + + var lines []mfLine + sc := bufio.NewScanner(resp.Body) + sc.Buffer(make([]byte, 0, 64*1024), 10*1024*1024) + for sc.Scan() { + raw := strings.TrimSpace(sc.Text()) + if raw == "" || strings.HasPrefix(raw, "#") { + continue + } + fields := strings.Fields(raw) + if len(fields) != 2 { + return errors.Wrap( + fmt.Errorf("invalid manifest line: %q", raw), + "download snapshot", + ) + } + name := fields[0] + hash := strings.ToLower(fields[1]) + if _, err := hex.DecodeString(hash); err != nil || len(hash) != 64 { + return errors.Wrap( + fmt.Errorf("invalid sha256 hex in manifest for %s: %q", name, hash), + "download snapshot", + ) + } + lines = append(lines, mfLine{Name: name, Hash: hash}) + } + if err := sc.Err(); err != nil { + return errors.Wrap(err, "download snapshot") + } + if len(lines) == 0 { + return errors.Wrap(errors.New("manifest is empty"), "download snapshot") + } + + snapDir := path.Join(dbPath, "snapshot") + _ = os.RemoveAll(snapDir) + if err := os.MkdirAll(snapDir, 0o755); err != nil { + return errors.Wrap(err, "download snapshot") + } + + for _, entry := range lines { + srcURL := fmt.Sprintf("%s/%d/%s/%s", base, network, keyHex, entry.Name) + dstPath := filepath.Join(snapDir, entry.Name) + + if err := os.MkdirAll(filepath.Dir(dstPath), 0o755); err != nil { + return errors.Wrap( + fmt.Errorf("mkdir for %s: %w", dstPath, err), + "download snapshot", + ) + } + + if err := downloadWithRetryAndHash( + srcURL, + dstPath, + entry.Hash, + 5, + ); err != nil { + return errors.Wrap( + fmt.Errorf("downloading %s failed: %w", entry.Name, err), + "download snapshot", + ) + } + } + + return nil +} + +func downloadWithRetryAndHash( + url, dstPath, expectedHex string, + retries int, +) error { + var lastErr error + for attempt := 1; attempt <= retries; attempt++ { + if err := func() error { + resp, err := http.Get(url) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("http status %d", resp.StatusCode) + } + + tmp, err := os.CreateTemp(filepath.Dir(dstPath), ".part-*") + if err != nil { + return err + } + defer func() { + tmp.Close() + _ = os.Remove(tmp.Name()) + }() + + h := sha256.New() + if _, err := io.Copy(io.MultiWriter(tmp, h), resp.Body); err != nil { + return err + } + + sumHex := hex.EncodeToString(h.Sum(nil)) + if !strings.EqualFold(sumHex, expectedHex) { + return fmt.Errorf( + "hash mismatch for %s: expected %s, got %s", + url, + expectedHex, + sumHex, + ) + } + + if err := tmp.Sync(); err != nil { + return err + } + + if err := os.Rename(tmp.Name(), dstPath); err != nil { + return err + } + return nil + }(); err != nil { + lastErr = err + time.Sleep(time.Duration(200*attempt) * time.Millisecond) + continue + } + return nil + } + return lastErr +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} diff --git a/node/consensus/sync/sync_client.go b/node/consensus/sync/sync_client.go index dca0461..2c14476 100644 --- a/node/consensus/sync/sync_client.go +++ b/node/consensus/sync/sync_client.go @@ -187,7 +187,7 @@ func NewAppSyncClient( config: config, blsConstructor: blsConstructor, proposalProcessor: proposalProcessor, - filter: filter, + filter: filter, // buildutils:allow-slice-alias slice is static } } diff --git a/node/consensus/sync/sync_provider.go b/node/consensus/sync/sync_provider.go index 8c9bbd3..4b502d8 100644 --- a/node/consensus/sync/sync_provider.go +++ b/node/consensus/sync/sync_provider.go @@ -60,10 +60,19 @@ type SyncProvider[StateT UniqueFrame, ProposalT any] struct { filter []byte proverAddress []byte + filterLabel string + hooks SyncProviderHooks[StateT, ProposalT] } var _ consensus.SyncProvider[*protobufs.GlobalFrame] = (*SyncProvider[*protobufs.GlobalFrame, *protobufs.GlobalProposal])(nil) +type SyncProviderHooks[StateT UniqueFrame, ProposalT any] interface { + BeforeMeshSync( + ctx context.Context, + provider *SyncProvider[StateT, ProposalT], + ) +} + func NewSyncProvider[StateT UniqueFrame, ProposalT any]( logger *zap.Logger, forks consensus.Forks[StateT], @@ -75,19 +84,26 @@ func NewSyncProvider[StateT UniqueFrame, ProposalT any]( config *config.Config, filter []byte, proverAddress []byte, + hooks SyncProviderHooks[StateT, ProposalT], ) *SyncProvider[StateT, ProposalT] { + label := "global" + if len(filter) > 0 { + label = hex.EncodeToString(filter) + } return &SyncProvider[StateT, ProposalT]{ logger: logger, - filter: filter, + filter: filter, // buildutils:allow-slice-alias slice is static forks: forks, proverRegistry: proverRegistry, signerRegistry: signerRegistry, peerInfoManager: peerInfoManager, proposalSynchronizer: proposalSynchronizer, hypergraph: hypergraph, - proverAddress: proverAddress, + proverAddress: proverAddress, // buildutils:allow-slice-alias slice is static config: config, queuedStates: make(chan syncRequest, defaultStateQueueCapacity), + filterLabel: label, + hooks: hooks, } } @@ -190,7 +206,7 @@ func (p *SyncProvider[StateT, ProposalT]) Synchronize( dataCh <- head.State } - syncStatusCheck.WithLabelValues("synced").Inc() + syncStatusCheck.WithLabelValues(p.filterLabel, "synced").Inc() errCh <- nil }() @@ -202,6 +218,10 @@ func (p *SyncProvider[StateT, ProposalT]) syncWithMesh( ) error { p.logger.Info("synchronizing with peers") + if p.hooks != nil { + p.hooks.BeforeMeshSync(ctx, p) + } + head := p.forks.FinalizedState() peers, err := p.proverRegistry.GetActiveProvers(p.filter) @@ -343,6 +363,7 @@ func (p *SyncProvider[StateT, ProposalT]) HyperSync( ctx context.Context, prover []byte, shardKey tries.ShardKey, + filter []byte, ) { registry, err := p.signerRegistry.GetKeyRegistryByProver(prover) if err != nil || registry == nil || registry.IdentityKey == nil { @@ -392,7 +413,7 @@ func (p *SyncProvider[StateT, ProposalT]) HyperSync( } for _, reachability := range info.Reachability { - if !bytes.Equal(reachability.Filter, p.filter) { + if !bytes.Equal(reachability.Filter, filter) { continue } for _, s := range reachability.StreamMultiaddrs { diff --git a/node/consensus/time/app_time_reel.go b/node/consensus/time/app_time_reel.go index 9715bea..7c91abb 100644 --- a/node/consensus/time/app_time_reel.go +++ b/node/consensus/time/app_time_reel.go @@ -127,7 +127,7 @@ func NewAppTimeReel( return &AppTimeReel{ logger: logger, - address: address, + address: address, // buildutils:allow-slice-alias slice is static proverRegistry: proverRegistry, nodes: make(map[string]*FrameNode), framesByNumber: make(map[uint64][]*FrameNode), diff --git a/node/datarpc/data_worker_ipc_server.go b/node/datarpc/data_worker_ipc_server.go index d0d3d10..edf365d 100644 --- a/node/datarpc/data_worker_ipc_server.go +++ b/node/datarpc/data_worker_ipc_server.go @@ -3,6 +3,7 @@ package datarpc import ( "context" "encoding/hex" + "time" pcrypto "github.com/libp2p/go-libp2p/core/crypto" "github.com/multiformats/go-multiaddr" @@ -45,6 +46,8 @@ type DataWorkerIPCServer struct { server *grpc.Server frameProver crypto.FrameProver quit chan struct{} + peerInfoCtx lifecycle.SignalerContext + peerInfoCancel context.CancelFunc } func NewDataWorkerIPCServer( @@ -103,6 +106,24 @@ func NewDataWorkerIPCServer( } func (r *DataWorkerIPCServer) Start() error { + peerInfoCtx, peerInfoCancel, _ := lifecycle.WithSignallerAndCancel( + context.Background(), + ) + peerInfoReady := make(chan struct{}) + go r.peerInfoManager.Start( + peerInfoCtx, + func() { + close(peerInfoReady) + }, + ) + select { + case <-peerInfoReady: + case <-time.After(5 * time.Second): + r.logger.Warn("peer info manager did not start before timeout") + } + r.peerInfoCtx = peerInfoCtx + r.peerInfoCancel = peerInfoCancel + r.RespawnServer(nil) <-r.quit @@ -115,6 +136,10 @@ func (r *DataWorkerIPCServer) Stop() error { if r.server != nil { r.server.GracefulStop() } + if r.peerInfoCancel != nil { + r.peerInfoCancel() + r.peerInfoCancel = nil + } go func() { r.quit <- struct{}{} }() diff --git a/node/execution/engines/compute_execution_engine.go b/node/execution/engines/compute_execution_engine.go index eec711e..cb6e8f2 100644 --- a/node/execution/engines/compute_execution_engine.go +++ b/node/execution/engines/compute_execution_engine.go @@ -726,7 +726,7 @@ func (e *ComputeExecutionEngine) handleBundle( responses := &execution.ProcessMessageResult{} - movingAddress := address + movingAddress := address // buildutils:allow-slice-alias assigned slice will not mutate, reassignment will // Validate fees distribute correctly feeQueue := fees.CollectBundleFees(bundle, DefaultFeeMarket) diff --git a/node/execution/engines/token_execution_engine.go b/node/execution/engines/token_execution_engine.go index 70f90ab..8613d57 100644 --- a/node/execution/engines/token_execution_engine.go +++ b/node/execution/engines/token_execution_engine.go @@ -663,7 +663,7 @@ func (e *TokenExecutionEngine) processIndividualMessage( ) (*execution.ProcessMessageResult, []byte, error) { payload := []byte{} var err error - domain := address + domain := address // buildutils:allow-slice-alias assigned slice will not mutate, reassignment will switch message.Request.(type) { case *protobufs.MessageRequest_TokenDeploy: payload, err = message.GetTokenDeploy().ToCanonicalBytes() diff --git a/node/execution/intrinsics/compute/compute_conversions.go b/node/execution/intrinsics/compute/compute_conversions.go index 7115ebc..cc69154 100644 --- a/node/execution/intrinsics/compute/compute_conversions.go +++ b/node/execution/intrinsics/compute/compute_conversions.go @@ -406,7 +406,7 @@ func CodeFinalizeFromProtobuf( verEnc: verEnc, keyManager: keyManager, config: config, - privateKey: privateKey, + privateKey: privateKey, // buildutils:allow-slice-alias slice is static }, nil } diff --git a/node/execution/intrinsics/compute/compute_intrinsic_code_deployment.go b/node/execution/intrinsics/compute/compute_intrinsic_code_deployment.go index 7c3f98d..4c426e9 100644 --- a/node/execution/intrinsics/compute/compute_intrinsic_code_deployment.go +++ b/node/execution/intrinsics/compute/compute_intrinsic_code_deployment.go @@ -40,10 +40,10 @@ func NewCodeDeployment( compiler compiler.CircuitCompiler, ) (*CodeDeployment, error) { return &CodeDeployment{ - inputQCLSource: sourceCode, + inputQCLSource: sourceCode, // buildutils:allow-slice-alias slice is static Domain: domain, InputTypes: inputTypes, - OutputTypes: outputTypes, + OutputTypes: outputTypes, // buildutils:allow-slice-alias slice is static inputSizes: inputSizes, compiler: compiler, }, nil diff --git a/node/execution/intrinsics/compute/compute_intrinsic_code_execute.go b/node/execution/intrinsics/compute/compute_intrinsic_code_execute.go index acc9079..5d321ad 100644 --- a/node/execution/intrinsics/compute/compute_intrinsic_code_execute.go +++ b/node/execution/intrinsics/compute/compute_intrinsic_code_execute.go @@ -99,15 +99,15 @@ func NewCodeExecute( Domain: domain, ProofOfPayment: [2][]byte{}, Rendezvous: rendezvous, - ExecuteOperations: operations, + ExecuteOperations: operations, // buildutils:allow-slice-alias slice is static hypergraph: hypergraph, bulletproofProver: bulletproofProver, inclusionProver: inclusionProver, verEnc: verEnc, decafConstructor: decafConstructor, keyManager: keyManager, - payerPublicKey: payerPublicKey, - secretKey: secretKey, + payerPublicKey: payerPublicKey, // buildutils:allow-slice-alias slice is static + secretKey: secretKey, // buildutils:allow-slice-alias slice is static rdfMultiprover: schema.NewRDFMultiprover( &schema.TurtleRDFParser{}, inclusionProver, diff --git a/node/execution/intrinsics/compute/compute_intrinsic_code_finalize.go b/node/execution/intrinsics/compute/compute_intrinsic_code_finalize.go index 102e160..1d7f724 100644 --- a/node/execution/intrinsics/compute/compute_intrinsic_code_finalize.go +++ b/node/execution/intrinsics/compute/compute_intrinsic_code_finalize.go @@ -71,11 +71,11 @@ func NewCodeFinalize( ) *CodeFinalize { return &CodeFinalize{ Rendezvous: rendezvous, - Results: results, - StateChanges: stateChanges, - MessageOutput: messageOutput, + Results: results, // buildutils:allow-slice-alias slice is static + StateChanges: stateChanges, // buildutils:allow-slice-alias slice is static + MessageOutput: messageOutput, // buildutils:allow-slice-alias slice is static domain: domain, - privateKey: privateKey, + privateKey: privateKey, // buildutils:allow-slice-alias slice is static config: config, hypergraph: hypergraph, bulletproofProver: bulletproofProver, diff --git a/node/execution/intrinsics/global/global_conversions.go b/node/execution/intrinsics/global/global_conversions.go index fd82e62..8669be2 100644 --- a/node/execution/intrinsics/global/global_conversions.go +++ b/node/execution/intrinsics/global/global_conversions.go @@ -29,7 +29,7 @@ func BLS48581G2PublicKeyToProtobuf( return nil } return &protobufs.BLS48581G2PublicKey{ - KeyValue: keyValue, + KeyValue: keyValue, // buildutils:allow-slice-alias slice is static } } diff --git a/node/execution/intrinsics/global/global_prover_confirm.go b/node/execution/intrinsics/global/global_prover_confirm.go index 6195340..eb338ae 100644 --- a/node/execution/intrinsics/global/global_prover_confirm.go +++ b/node/execution/intrinsics/global/global_prover_confirm.go @@ -49,7 +49,7 @@ func NewProverConfirm( rdfMultiprover *schema.RDFMultiprover, ) (*ProverConfirm, error) { return &ProverConfirm{ - Filters: filters, + Filters: filters, // buildutils:allow-slice-alias slice is static FrameNumber: frameNumber, keyManager: keyManager, hypergraph: hypergraph, diff --git a/node/execution/intrinsics/global/global_prover_join.go b/node/execution/intrinsics/global/global_prover_join.go index 31677b5..6937250 100644 --- a/node/execution/intrinsics/global/global_prover_join.go +++ b/node/execution/intrinsics/global/global_prover_join.go @@ -93,10 +93,10 @@ func NewProverJoin( frameStore store.ClockStore, ) (*ProverJoin, error) { return &ProverJoin{ - Filters: filters, + Filters: filters, // buildutils:allow-slice-alias slice is static FrameNumber: frameNumber, - MergeTargets: mergeTargets, - DelegateAddress: delegateAddress, + MergeTargets: mergeTargets, // buildutils:allow-slice-alias slice is static + DelegateAddress: delegateAddress, // buildutils:allow-slice-alias slice is static keyManager: keyManager, hypergraph: hypergraph, rdfMultiprover: rdfMultiprover, @@ -710,8 +710,8 @@ func (p *ProverJoin) Verify(frameNumber uint64) (valid bool, err error) { fmt.Sprintf("frame number: %d", p.FrameNumber), ), "verify") } - frames.Close() frame, err = frames.Value() + frames.Close() if err != nil { return false, errors.Wrap(errors.Wrap( err, diff --git a/node/execution/intrinsics/global/global_prover_kick.go b/node/execution/intrinsics/global/global_prover_kick.go index 57548c8..d8bf5b7 100644 --- a/node/execution/intrinsics/global/global_prover_kick.go +++ b/node/execution/intrinsics/global/global_prover_kick.go @@ -60,9 +60,9 @@ func NewProverKick( ) (*ProverKick, error) { return &ProverKick{ FrameNumber: frameNumber, - KickedProverPublicKey: kickedProverPublicKey, - ConflictingFrame1: conflictingFrame1, - ConflictingFrame2: conflictingFrame2, + KickedProverPublicKey: kickedProverPublicKey, // buildutils:allow-slice-alias slice is static + ConflictingFrame1: conflictingFrame1, // buildutils:allow-slice-alias slice is static + ConflictingFrame2: conflictingFrame2, // buildutils:allow-slice-alias slice is static blsConstructor: blsConstructor, frameProver: frameProver, hypergraph: hypergraph, @@ -415,8 +415,8 @@ func (p *ProverKick) Verify(frameNumber uint64) (bool, error) { fmt.Sprintf("frame number: %d", p.FrameNumber), ), "verify") } - frames.Close() frame, err = frames.Value() + frames.Close() if err != nil { return false, errors.Wrap(errors.Wrap( err, diff --git a/node/execution/intrinsics/global/global_prover_leave.go b/node/execution/intrinsics/global/global_prover_leave.go index d347e81..f2fb9b6 100644 --- a/node/execution/intrinsics/global/global_prover_leave.go +++ b/node/execution/intrinsics/global/global_prover_leave.go @@ -40,7 +40,7 @@ func NewProverLeave( rdfMultiprover *schema.RDFMultiprover, ) (*ProverLeave, error) { return &ProverLeave{ - Filters: filters, + Filters: filters, // buildutils:allow-slice-alias slice is static FrameNumber: frameNumber, keyManager: keyManager, hypergraph: hypergraph, diff --git a/node/execution/intrinsics/global/global_prover_pause.go b/node/execution/intrinsics/global/global_prover_pause.go index ae0ac3e..de322c3 100644 --- a/node/execution/intrinsics/global/global_prover_pause.go +++ b/node/execution/intrinsics/global/global_prover_pause.go @@ -40,7 +40,7 @@ func NewProverPause( rdfMultiprover *schema.RDFMultiprover, ) (*ProverPause, error) { return &ProverPause{ - Filter: filter, + Filter: filter, // buildutils:allow-slice-alias slice is static FrameNumber: frameNumber, keyManager: keyManager, hypergraph: hypergraph, diff --git a/node/execution/intrinsics/global/global_prover_reject.go b/node/execution/intrinsics/global/global_prover_reject.go index c235818..2091e68 100644 --- a/node/execution/intrinsics/global/global_prover_reject.go +++ b/node/execution/intrinsics/global/global_prover_reject.go @@ -41,7 +41,7 @@ func NewProverReject( rdfMultiprover *schema.RDFMultiprover, ) (*ProverReject, error) { return &ProverReject{ - Filters: filters, + Filters: filters, // buildutils:allow-slice-alias slice is static FrameNumber: frameNumber, keyManager: keyManager, hypergraph: hypergraph, diff --git a/node/execution/intrinsics/global/global_prover_resume.go b/node/execution/intrinsics/global/global_prover_resume.go index 56a710a..8c5b480 100644 --- a/node/execution/intrinsics/global/global_prover_resume.go +++ b/node/execution/intrinsics/global/global_prover_resume.go @@ -40,7 +40,7 @@ func NewProverResume( rdfMultiprover *schema.RDFMultiprover, ) (*ProverResume, error) { return &ProverResume{ - Filter: filter, + Filter: filter, // buildutils:allow-slice-alias slice is static FrameNumber: frameNumber, keyManager: keyManager, hypergraph: hypergraph, diff --git a/node/execution/intrinsics/global/global_prover_update.go b/node/execution/intrinsics/global/global_prover_update.go index 88f5a0c..cf15c10 100644 --- a/node/execution/intrinsics/global/global_prover_update.go +++ b/node/execution/intrinsics/global/global_prover_update.go @@ -42,7 +42,7 @@ func NewProverUpdate( keyManager keys.KeyManager, ) *ProverUpdate { return &ProverUpdate{ - DelegateAddress: delegateAddress, + DelegateAddress: delegateAddress, // buildutils:allow-slice-alias slice is static PublicKeySignatureBLS48581: publicKeySignatureBLS48581, hypergraph: hypergraph, signer: signer, diff --git a/node/execution/intrinsics/hypergraph/hypergraph_intrinsic.go b/node/execution/intrinsics/hypergraph/hypergraph_intrinsic.go index d0f8f8d..1f84218 100644 --- a/node/execution/intrinsics/hypergraph/hypergraph_intrinsic.go +++ b/node/execution/intrinsics/hypergraph/hypergraph_intrinsic.go @@ -125,7 +125,7 @@ func LoadHypergraphIntrinsic( lockedWrites: make(map[string]struct{}), lockedReads: make(map[string]int), hypergraph: hypergraph, - domain: appAddress, + domain: appAddress, // buildutils:allow-slice-alias slice is static config: config, consensusMetadata: consensusMetadata, sumcheckInfo: sumcheckInfo, diff --git a/node/execution/intrinsics/hypergraph/hypergraph_vertex_add.go b/node/execution/intrinsics/hypergraph/hypergraph_vertex_add.go index 436efdc..3d4ab0d 100644 --- a/node/execution/intrinsics/hypergraph/hypergraph_vertex_add.go +++ b/node/execution/intrinsics/hypergraph/hypergraph_vertex_add.go @@ -47,8 +47,8 @@ func NewVertexAdd( signer: signer, config: config, verenc: verenc, - rawData: rawData, - encryptionKey: encryptionKey, + rawData: rawData, // buildutils:allow-slice-alias slice is static + encryptionKey: encryptionKey, // buildutils:allow-slice-alias slice is static keyManager: keyManager, } } diff --git a/node/execution/intrinsics/token/token_intrinsic_mint_transaction.go b/node/execution/intrinsics/token/token_intrinsic_mint_transaction.go index 6a6cb55..3b9be3a 100644 --- a/node/execution/intrinsics/token/token_intrinsic_mint_transaction.go +++ b/node/execution/intrinsics/token/token_intrinsic_mint_transaction.go @@ -67,7 +67,7 @@ func NewMintTransactionInput( ) (*MintTransactionInput, error) { return &MintTransactionInput{ Value: value, - contextData: contextData, + contextData: contextData, // buildutils:allow-slice-alias slice is static }, nil } @@ -1766,8 +1766,8 @@ func (i *MintTransactionInput) verifyWithProofOfMeaningfulWork( fmt.Sprintf("frame number: %d", frameNumber), ), "verify with mint with proof of meaningful work") } - frames.Close() frame, err = frames.Value() + frames.Close() if err != nil { return errors.Wrap(errors.Wrap( err, @@ -1965,8 +1965,8 @@ func NewMintTransactionOutput( return &MintTransactionOutput{ value: value, RecipientOutput: RecipientBundle{ - recipientView: recipientViewPubkey, - recipientSpend: recipientSpendPubkey, + recipientView: recipientViewPubkey, // buildutils:allow-slice-alias slice is static + recipientSpend: recipientSpendPubkey, // buildutils:allow-slice-alias slice is static }, }, nil } @@ -2128,9 +2128,9 @@ func NewMintTransaction( ) *MintTransaction { return &MintTransaction{ Domain: domain, - Inputs: inputs, - Outputs: outputs, - Fees: fees, + Inputs: inputs, // buildutils:allow-slice-alias slice is static + Outputs: outputs, // buildutils:allow-slice-alias slice is static + Fees: fees, // buildutils:allow-slice-alias slice is static hypergraph: hypergraph, bulletproofProver: bulletproofProver, inclusionProver: inclusionProver, diff --git a/node/execution/intrinsics/token/token_intrinsic_pending_transaction.go b/node/execution/intrinsics/token/token_intrinsic_pending_transaction.go index 93e0349..5f864ff 100644 --- a/node/execution/intrinsics/token/token_intrinsic_pending_transaction.go +++ b/node/execution/intrinsics/token/token_intrinsic_pending_transaction.go @@ -59,7 +59,7 @@ func NewPendingTransactionInput(address []byte) ( error, ) { return &PendingTransactionInput{ - address: address, + address: address, // buildutils:allow-slice-alias slice is static }, nil } @@ -798,12 +798,12 @@ func NewPendingTransactionOutput( return &PendingTransactionOutput{ value: value, ToOutput: RecipientBundle{ - recipientView: toViewPubkey, - recipientSpend: toSpendPubkey, + recipientView: toViewPubkey, // buildutils:allow-slice-alias slice is static + recipientSpend: toSpendPubkey, // buildutils:allow-slice-alias slice is static }, RefundOutput: RecipientBundle{ - recipientView: refundViewPubkey, - recipientSpend: refundSpendPubkey, + recipientView: refundViewPubkey, // buildutils:allow-slice-alias slice is static + recipientSpend: refundSpendPubkey, // buildutils:allow-slice-alias slice is static }, Expiration: expiration, }, nil @@ -1067,9 +1067,9 @@ func NewPendingTransaction( ) *PendingTransaction { return &PendingTransaction{ Domain: domain, - Inputs: inputs, - Outputs: outputs, - Fees: fees, + Inputs: inputs, // buildutils:allow-slice-alias slice is static + Outputs: outputs, // buildutils:allow-slice-alias slice is static + Fees: fees, // buildutils:allow-slice-alias slice is static hypergraph: hypergraph, bulletproofProver: bulletproofProver, inclusionProver: inclusionProver, diff --git a/node/execution/intrinsics/token/token_intrinsic_transaction.go b/node/execution/intrinsics/token/token_intrinsic_transaction.go index 6b2c586..2c0507e 100644 --- a/node/execution/intrinsics/token/token_intrinsic_transaction.go +++ b/node/execution/intrinsics/token/token_intrinsic_transaction.go @@ -60,7 +60,7 @@ type TransactionInput struct { func NewTransactionInput(address []byte) (*TransactionInput, error) { return &TransactionInput{ - address: address, + address: address, // buildutils:allow-slice-alias slice is static }, nil } @@ -818,8 +818,8 @@ func NewTransactionOutput( return &TransactionOutput{ value: value, RecipientOutput: RecipientBundle{ - recipientView: recipientViewPubkey, - recipientSpend: recipientSpendPubkey, + recipientView: recipientViewPubkey, // buildutils:allow-slice-alias slice is static + recipientSpend: recipientSpendPubkey, // buildutils:allow-slice-alias slice is static }, }, nil } @@ -972,9 +972,9 @@ func NewTransaction( ) *Transaction { return &Transaction{ Domain: domain, - Inputs: inputs, - Outputs: outputs, - Fees: fees, + Inputs: inputs, // buildutils:allow-slice-alias slice is static + Outputs: outputs, // buildutils:allow-slice-alias slice is static + Fees: fees, // buildutils:allow-slice-alias slice is static hypergraph: hypergraph, bulletproofProver: bulletproofProver, inclusionProver: inclusionProver, diff --git a/node/execution/manager/execution_manager.go b/node/execution/manager/execution_manager.go index 862f82c..058077e 100644 --- a/node/execution/manager/execution_manager.go +++ b/node/execution/manager/execution_manager.go @@ -396,7 +396,7 @@ func (m *ExecutionEngineManager) ProcessMessage( } } - route := address + route := address // buildutils:allow-slice-alias slice reassigns itself if !(bytes.Equal(route, compute.COMPUTE_INTRINSIC_DOMAIN[:]) || bytes.Equal(route, hypergraphintrinsic.HYPERGRAPH_BASE_DOMAIN[:]) || diff --git a/node/execution/state/hypergraph/hypergraph_state.go b/node/execution/state/hypergraph/hypergraph_state.go index 2ff07c7..2b3a337 100644 --- a/node/execution/state/hypergraph/hypergraph_state.go +++ b/node/execution/state/hypergraph/hypergraph_state.go @@ -685,9 +685,9 @@ func (h *HypergraphState) Delete( h.mu.Lock() h.changeset = append(h.changeset, state.StateChange{ - Domain: domain, - Address: address, - Discriminator: discriminator, + Domain: domain, // buildutils:allow-slice-alias slice is static + Address: address, // buildutils:allow-slice-alias slice is static + Discriminator: discriminator, // buildutils:allow-slice-alias slice is static StateChange: state.DeleteStateChangeEvent, Value: value, }) @@ -787,9 +787,9 @@ func (h *HypergraphState) Set( h.mu.Lock() h.changeset = append(h.changeset, state.StateChange{ - Domain: domain, - Address: address, - Discriminator: discriminator, + Domain: domain, // buildutils:allow-slice-alias slice is static + Address: address, // buildutils:allow-slice-alias slice is static + Discriminator: discriminator, // buildutils:allow-slice-alias slice is static StateChange: stateChange, Value: value, }) diff --git a/node/go.mod b/node/go.mod index 1db293f..55534da 100644 --- a/node/go.mod +++ b/node/go.mod @@ -50,7 +50,8 @@ require ( github.com/charmbracelet/bubbles v0.21.0 github.com/charmbracelet/bubbletea v1.3.10 github.com/charmbracelet/lipgloss v1.1.0 - github.com/cockroachdb/pebble v1.1.4 + github.com/cockroachdb/pebble v1.1.5 + github.com/cockroachdb/pebble/v2 v2.1.2 github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1 github.com/libp2p/go-libp2p v0.41.1 github.com/libp2p/go-libp2p-kad-dht v0.23.0 @@ -79,12 +80,16 @@ require ( require ( filippo.io/edwards25519 v1.0.0-rc.1 // indirect + github.com/RaduBerinde/axisds v0.0.0-20250419182453-5135a0650657 // indirect + github.com/RaduBerinde/btreemap v0.0.0-20250419174037-3d62b7205d54 // indirect github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc // indirect github.com/charmbracelet/x/ansi v0.10.1 // indirect github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd // indirect github.com/charmbracelet/x/term v0.2.1 // indirect + github.com/cockroachdb/crlib v0.0.0-20241112164430-1264a2edc35b // indirect github.com/cockroachdb/fifo v0.0.0-20240606204812-0bbfbd93a7ce // indirect + github.com/cockroachdb/swiss v0.0.0-20250624142022-d6e517c1d961 // indirect github.com/deiu/gon3 v0.0.0-20241212124032-93153c038193 // indirect github.com/deiu/rdf2go v0.0.0-20241212211204-b661ba0dfd25 // indirect github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect @@ -97,6 +102,7 @@ require ( github.com/markkurossi/tabulate v0.0.0-20230223130100-d4965869b123 // indirect github.com/mattn/go-localereader v0.0.1 // indirect github.com/mattn/go-runewidth v0.0.16 // indirect + github.com/minio/minlz v1.0.1-0.20250507153514-87eb42fe8882 // indirect github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect github.com/muesli/cancelreader v0.2.2 // indirect github.com/muesli/termenv v0.16.0 // indirect @@ -137,7 +143,7 @@ require ( ) require ( - github.com/DataDog/zstd v1.4.5 // indirect + github.com/DataDog/zstd v1.5.7 // indirect github.com/btcsuite/btcd v0.21.0-beta.0.20201114000516-e9c7a5ac6401 github.com/bwesterb/go-ristretto v1.2.3 // indirect github.com/cockroachdb/errors v1.11.3 // indirect @@ -147,7 +153,7 @@ require ( github.com/consensys/gnark-crypto v0.5.3 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/getsentry/sentry-go v0.27.0 // indirect - github.com/golang/snappy v0.0.4 // indirect + github.com/golang/snappy v0.0.5-0.20231225225746-43d5d4cd4e0e // indirect github.com/gorilla/websocket v1.5.3 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 github.com/kr/pretty v0.3.1 // indirect @@ -238,7 +244,7 @@ require ( go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 golang.org/x/crypto v0.39.0 - golang.org/x/exp v0.0.0-20250606033433-dcc06ee1d476 + golang.org/x/exp v0.0.0-20250606033433-dcc06ee1d476 // indirect golang.org/x/mod v0.25.0 // indirect golang.org/x/net v0.41.0 // indirect golang.org/x/sync v0.17.0 diff --git a/node/go.sum b/node/go.sum index e22e523..be20e3a 100644 --- a/node/go.sum +++ b/node/go.sum @@ -10,8 +10,14 @@ filippo.io/edwards25519 v1.0.0-rc.1 h1:m0VOOB23frXZvAOK44usCgLWvtsxIoMCTBGJZlpmG filippo.io/edwards25519 v1.0.0-rc.1/go.mod h1:N1IkdkCkiLB6tki+MYJoSx2JTY9NUlxZE7eHn5EwJns= git.apache.org/thrift.git v0.0.0-20180902110319-2566ecd5d999/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqblrnkyeyg= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/DataDog/zstd v1.4.5 h1:EndNeuB0l9syBZhut0wns3gV1hL8zX8LIu6ZiVHWLIQ= -github.com/DataDog/zstd v1.4.5/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo= +github.com/DataDog/zstd v1.5.7 h1:ybO8RBeh29qrxIhCA9E8gKY6xfONU9T6G6aP9DTKfLE= +github.com/DataDog/zstd v1.5.7/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= +github.com/RaduBerinde/axisds v0.0.0-20250419182453-5135a0650657 h1:8XBWWQD+vFF+JqOsm16t0Kab1a7YWV8+GISVEP8AuZ8= +github.com/RaduBerinde/axisds v0.0.0-20250419182453-5135a0650657/go.mod h1:UHGJonU9z4YYGKJxSaC6/TNcLOBptpmM5m2Cksbnw0Y= +github.com/RaduBerinde/btreemap v0.0.0-20250419174037-3d62b7205d54 h1:bsU8Tzxr/PNz75ayvCnxKZWEYdLMPDkUgticP4a4Bvk= +github.com/RaduBerinde/btreemap v0.0.0-20250419174037-3d62b7205d54/go.mod h1:0tr7FllbE9gJkHq7CVeeDDFAFKQVy5RnCSSNBOvdqbc= +github.com/aclements/go-perfevent v0.0.0-20240301234650-f7843625020f h1:JjxwchlOepwsUWcQwD2mLUAGE9aCp0/ehy6yCHFBOvo= +github.com/aclements/go-perfevent v0.0.0-20240301234650-f7843625020f/go.mod h1:tMDTce/yLLN/SK8gMOxQfnyeMeCg8KGzp0D1cbECEeo= github.com/aead/siphash v1.0.1/go.mod h1:Nywa3cDsYNNK3gaciGTWPwHt0wlpNV15vwmswBAUSII= github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c= github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= @@ -60,18 +66,26 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/cloudflare/circl v1.6.1 h1:zqIqSPIndyBh1bjLVVDHMPpVKqp8Su/V+6MeDzzQBQ0= github.com/cloudflare/circl v1.6.1/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cockroachdb/datadriven v1.0.3-0.20230413201302-be42291fc80f h1:otljaYPt5hWxV3MUfO5dFPFiOXg9CyG5/kCfayTqsJ4= -github.com/cockroachdb/datadriven v1.0.3-0.20230413201302-be42291fc80f/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU= +github.com/cockroachdb/crlib v0.0.0-20241112164430-1264a2edc35b h1:SHlYZ/bMx7frnmeqCu+xm0TCxXLzX3jQIVuFbnFGtFU= +github.com/cockroachdb/crlib v0.0.0-20241112164430-1264a2edc35b/go.mod h1:Gq51ZeKaFCXk6QwuGM0w1dnaOqc/F5zKT2zA9D6Xeac= +github.com/cockroachdb/datadriven v1.0.3-0.20250407164829-2945557346d5 h1:UycK/E0TkisVrQbSoxvU827FwgBBcZ95nRRmpj/12QI= +github.com/cockroachdb/datadriven v1.0.3-0.20250407164829-2945557346d5/go.mod h1:jsaKMvD3RBCATk1/jbUZM8C9idWBJME9+VRZ5+Liq1g= github.com/cockroachdb/errors v1.11.3 h1:5bA+k2Y6r+oz/6Z/RFlNeVCesGARKuC6YymtcDrbC/I= github.com/cockroachdb/errors v1.11.3/go.mod h1:m4UIW4CDjx+R5cybPsNrRbreomiFqt8o1h1wUVazSd8= github.com/cockroachdb/fifo v0.0.0-20240606204812-0bbfbd93a7ce h1:giXvy4KSc/6g/esnpM7Geqxka4WSqI1SZc7sMJFd3y4= github.com/cockroachdb/fifo v0.0.0-20240606204812-0bbfbd93a7ce/go.mod h1:9/y3cnZ5GKakj/H4y9r9GTjCvAFta7KLgSHPJJYc52M= github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b h1:r6VH0faHjZeQy818SGhaone5OnYfxFR/+AzdY3sf5aE= github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs= -github.com/cockroachdb/pebble v1.1.4 h1:5II1uEP4MyHLDnsrbv/EZ36arcb9Mxg3n+owhZ3GrG8= -github.com/cockroachdb/pebble v1.1.4/go.mod h1:4exszw1r40423ZsmkG/09AFEG83I0uDgfujJdbL6kYU= +github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895 h1:XANOgPYtvELQ/h4IrmPAohXqe2pWA8Bwhejr3VQoZsA= +github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895/go.mod h1:aPd7gM9ov9M8v32Yy5NJrDyOcD8z642dqs+F0CeNXfA= +github.com/cockroachdb/pebble v1.1.5 h1:5AAWCBWbat0uE0blr8qzufZP5tBjkRyy/jWe1QWLnvw= +github.com/cockroachdb/pebble v1.1.5/go.mod h1:17wO9el1YEigxkP/YtV8NtCivQDgoCyBg5c4VR/eOWo= +github.com/cockroachdb/pebble/v2 v2.1.2 h1:IwYt+Y2Cdw6egblwk1kWzdmJvD2680t5VK/3i0BJ6IA= +github.com/cockroachdb/pebble/v2 v2.1.2/go.mod h1:Aza05DCCc05ghIJZkB4Q/axv/JK9wx5cFwWcnhG0eGw= github.com/cockroachdb/redact v1.1.5 h1:u1PMllDkdFfPWaNGMyLD1+so+aq3uUItthCFqzwPJ30= github.com/cockroachdb/redact v1.1.5/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= +github.com/cockroachdb/swiss v0.0.0-20250624142022-d6e517c1d961 h1:Nua446ru3juLHLZd4AwKNzClZgL1co3pUPGv3o8FlcA= +github.com/cockroachdb/swiss v0.0.0-20250624142022-d6e517c1d961/go.mod h1:yBRu/cnL4ks9bgy4vAASdjIW+/xMlFwuHKqtmh3GZQg= github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 h1:zuQyyAKVxetITBuuhv3BI9cMrmStnpT18zmgmTxunpo= github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06/go.mod h1:7nc4anLGjupUW/PeY5qiNYsdNXj7zopG+eqsS7To5IQ= github.com/consensys/bavard v0.1.8-0.20210915155054-088da2f7f54a/go.mod h1:9ItSMtA/dXMAiL7BG6bqW2m3NdSEObYWoH223nGHukI= @@ -116,6 +130,8 @@ github.com/gammazero/workerpool v1.1.3 h1:WixN4xzukFoN0XSeXF6puqEqFTl2mECI9S6W44 github.com/gammazero/workerpool v1.1.3/go.mod h1:wPjyBLDbyKnUn2XwwyD3EEwo9dHutia9/fwNmSHWACc= github.com/getsentry/sentry-go v0.27.0 h1:Pv98CIbtB3LkMWmXi4Joa5OOcwbmnX88sF5qbK3r3Ps= github.com/getsentry/sentry-go v0.27.0/go.mod h1:lc76E2QywIyW8WuBnwl8Lc4bkmQH4+w1gwTf25trprY= +github.com/ghemawat/stream v0.0.0-20171120220530-696b145b53b9 h1:r5GgOLGbza2wVHRzK7aAj6lWZjfbAwiu/RDCVOKjRyM= +github.com/ghemawat/stream v0.0.0-20171120220530-696b145b53b9/go.mod h1:106OIgooyS7OzLDOpUGgm9fA3bQENb/cFSyyBmMoJDs= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q= @@ -149,8 +165,8 @@ github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QD github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= -github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= -github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.5-0.20231225225746-43d5d4cd4e0e h1:4bw4WeyTYPp0smaXiJZCNnLrvVBqirQVreixayXezGc= +github.com/golang/snappy v0.0.5-0.20231225225746-43d5d4cd4e0e/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= @@ -315,6 +331,8 @@ github.com/mikioh/tcpinfo v0.0.0-20190314235526-30a79bb1804b/go.mod h1:lxPUiZwKo github.com/mikioh/tcpopt v0.0.0-20190314235656-172688c1accc h1:PTfri+PuQmWDqERdnNMiD9ZejrlswWrCpBEZgWOiTrc= github.com/mikioh/tcpopt v0.0.0-20190314235656-172688c1accc/go.mod h1:cGKTAVKx4SxOuR/czcZ/E2RSJ3sfHs8FpHhQ5CWMf9s= github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1/go.mod h1:pD8RvIylQ358TN4wwqatJ8rNavkEINozVn9DtGI3dfQ= +github.com/minio/minlz v1.0.1-0.20250507153514-87eb42fe8882 h1:0lgqHvJWHLGW5TuObJrfyEi6+ASTKDBWikGvPqy9Yiw= +github.com/minio/minlz v1.0.1-0.20250507153514-87eb42fe8882/go.mod h1:qT0aEB35q79LLornSzeDH75LBf3aH1MV+jB5w9Wasec= github.com/minio/sha256-simd v0.1.1-0.20190913151208-6de447530771/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM= github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM= github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dzMM= diff --git a/node/main.go b/node/main.go index e957a96..8a7d9f8 100644 --- a/node/main.go +++ b/node/main.go @@ -604,11 +604,14 @@ func main() { ) logger.Warn("you are at risk of running out of memory during runtime") default: - if _, limit := os.LookupEnv("GOMEMLIMIT"); !limit { - rdebug.SetMemoryLimit(availableOverhead * 8 / 10) - } - if _, explicitGOGC := os.LookupEnv("GOGC"); !explicitGOGC { - rdebug.SetGCPercent(10) + // Use defaults if archive mode: + if !nodeConfig.Engine.ArchiveMode { + if _, limit := os.LookupEnv("GOMEMLIMIT"); !limit { + rdebug.SetMemoryLimit(availableOverhead * 8 / 10) + } + if _, explicitGOGC := os.LookupEnv("GOGC"); !explicitGOGC { + rdebug.SetGCPercent(10) + } } } } diff --git a/node/p2p/blossomsub.go b/node/p2p/blossomsub.go index bf98dc0..3dc1525 100644 --- a/node/p2p/blossomsub.go +++ b/node/p2p/blossomsub.go @@ -1153,6 +1153,10 @@ func (b *BlossomSub) initConnectivityServices( isBootstrapPeer bool, bootstrappers []peer.AddrInfo, ) { + if b.p2pConfig.Network != 0 { + return + } + if b.h == nil { return } @@ -1422,7 +1426,7 @@ func (s *connectivityService) TestConnectivity( continue } // Build UDP multiaddr with actual IP - newAddr, err := ma.NewMultiaddr(fmt.Sprintf("/ip4/%s/udp/%s", host, port)) + newAddr, err := ma.NewMultiaddr(fmt.Sprintf("/ip4/%s/udp/%s/quic-v1", host, port)) if err != nil { continue } diff --git a/node/p2p/internal/peer_connector.go b/node/p2p/internal/peer_connector.go index 46e0921..a5d32d7 100644 --- a/node/p2p/internal/peer_connector.go +++ b/node/p2p/internal/peer_connector.go @@ -238,7 +238,7 @@ func (cpc *chainedPeerConnector) run() { func NewChainedPeerConnector(ctx context.Context, connectors ...PeerConnector) PeerConnector { cpc := &chainedPeerConnector{ ctx: ctx, - connectors: connectors, + connectors: connectors, // buildutils:allow-slice-alias slice is static connectCh: make(chan (chan<- struct{})), } go cpc.run() diff --git a/node/p2p/internal/peer_monitor.go b/node/p2p/internal/peer_monitor.go index f741b2b..baa5392 100644 --- a/node/p2p/internal/peer_monitor.go +++ b/node/p2p/internal/peer_monitor.go @@ -143,7 +143,7 @@ func MonitorPeers( timeout: timeout, period: period, attempts: attempts, - direct: directPeers, + direct: directPeers, // buildutils:allow-slice-alias slice is static directPeriod: 10 * time.Second, } diff --git a/node/p2p/internal/peer_source.go b/node/p2p/internal/peer_source.go index fd33017..080f745 100644 --- a/node/p2p/internal/peer_source.go +++ b/node/p2p/internal/peer_source.go @@ -38,7 +38,7 @@ func (s *staticPeerSource) Peers(context.Context) ( // NewStaticPeerSource creates a new static peer source. func NewStaticPeerSource(peers []peer.AddrInfo, permute bool) PeerSource { - return &staticPeerSource{peers: peers, permute: permute} + return &staticPeerSource{peers: peers, permute: permute} // buildutils:allow-slice-alias slice is static } type routingDiscoveryPeerSource struct { diff --git a/node/p2p/onion/grpc_transport.go b/node/p2p/onion/grpc_transport.go index 1326e69..e863e11 100644 --- a/node/p2p/onion/grpc_transport.go +++ b/node/p2p/onion/grpc_transport.go @@ -45,7 +45,7 @@ func NewGRPCTransport( ) *GRPCTransport { return &GRPCTransport{ logger: logger, - peerID: peerID, + peerID: peerID, // buildutils:allow-slice-alias slice is static peers: peers, signers: signers, serverStreams: make(map[string]protobufs.OnionService_ConnectServer), @@ -80,9 +80,9 @@ func (g *GRPCTransport) Send( // Send via server stream (to connected client) or client stream (to remote server) if hasServer { msg := &protobufs.ReceiveMessage{ - SourcePeerId: peerID, + SourcePeerId: peerID, // buildutils:allow-slice-alias slice is static CircId: circID, - Cell: cell, + Cell: cell, // buildutils:allow-slice-alias slice is static } if err := serverStream.Send(msg); err != nil { g.logger.Warn( @@ -99,9 +99,9 @@ func (g *GRPCTransport) Send( } } else if hasClient { msg := &protobufs.SendMessage{ - PeerId: peerID, + PeerId: peerID, // buildutils:allow-slice-alias slice is static CircId: circID, - Cell: cell, + Cell: cell, // buildutils:allow-slice-alias slice is static } if err := clientStream.Send(msg); err != nil { g.logger.Warn( diff --git a/node/p2p/onion/router.go b/node/p2p/onion/router.go index d6c2e97..c940080 100644 --- a/node/p2p/onion/router.go +++ b/node/p2p/onion/router.go @@ -1022,7 +1022,7 @@ func nonceFrom(prefix [12]byte, ctr uint64) []byte { // applyForward encrypts for exit->...->entry (reverse hop order) func applyForward(c *Circuit, inner []byte) ([]byte, error) { - out := inner + out := inner // buildutils:allow-slice-alias slice is static for i := len(c.Hops) - 1; i >= 0; i-- { h := &c.Hops[i] nonce := nonceFrom(h.keys.kf.nonce, h.keys.fCtr) @@ -1036,7 +1036,7 @@ func applyForward(c *Circuit, inner []byte) ([]byte, error) { // peelBackward decrypts data coming back from entry (encrypting hop-by-hop with // Kb) func peelBackward(c *Circuit, outer []byte) ([]byte, error) { - in := outer + in := outer // buildutils:allow-slice-alias slice is static for i := 0; i < len(c.Hops); i++ { h := &c.Hops[i] nonce := nonceFrom(h.keys.kb.nonce, h.keys.bCtr) diff --git a/node/rpc/hypergraph_sync_rpc_server_test.go b/node/rpc/hypergraph_sync_rpc_server_test.go index faf3d9d..b63c936 100644 --- a/node/rpc/hypergraph_sync_rpc_server_test.go +++ b/node/rpc/hypergraph_sync_rpc_server_test.go @@ -176,6 +176,28 @@ func TestHypergraphSyncServer(t *testing.T) { } servertxn.Commit() clienttxn.Commit() + + // Seed an orphan vertex that only exists on the client so pruning can remove it. + orphanData := make([]byte, 32) + _, _ = rand.Read(orphanData) + var orphanAddr [32]byte + copy(orphanAddr[:], orphanData) + orphanVertex := hgcrdt.NewVertex( + vertices1[0].GetAppAddress(), + orphanAddr, + dataTree1.Commit(inclusionProver, false), + dataTree1.GetSize(), + ) + orphanShard := application.GetShardKey(orphanVertex) + require.Equal(t, shardKey, orphanShard, "orphan vertex must share shard") + orphanTxn, err := clientHypergraphStore.NewTransaction(false) + require.NoError(t, err) + orphanID := orphanVertex.GetID() + require.NoError(t, clientHypergraphStore.SaveVertexTree(orphanTxn, orphanID[:], dataTree1)) + require.NoError(t, crdts[1].AddVertex(orphanTxn, orphanVertex)) + require.NoError(t, orphanTxn.Commit()) + clientSet := crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey) + require.True(t, clientSet.Has(orphanID), "client must start with orphan leaf") logger.Info("saved") for _, op := range operations1 { @@ -276,14 +298,39 @@ func TestHypergraphSyncServer(t *testing.T) { if err != nil { log.Fatalf("Client: failed to sync 1: %v", err) } - time.Sleep(10 * time.Second) str.CloseSend() + require.False(t, clientSet.Has(orphanID), "orphan vertex should be pruned after sync") leaves := crypto.CompareLeaves( crdts[0].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree(), crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree(), ) fmt.Println("pass completed, orphans:", len(leaves)) + // Ensure every leaf received during raw sync lies within the covered prefix path. + clientTree := crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree() + coveredPrefixPath := clientTree.CoveredPrefix + if len(coveredPrefixPath) == 0 { + coveredPrefixPath = tries.GetFullPath(orphanID[:])[:0] + } + allLeaves := tries.GetAllLeaves( + clientTree.SetType, + clientTree.PhaseType, + clientTree.ShardKey, + clientTree.Root, + ) + for _, leaf := range allLeaves { + if leaf == nil { + continue + } + if len(coveredPrefixPath) > 0 { + require.True( + t, + isPrefix(coveredPrefixPath, tries.GetFullPath(leaf.Key)), + "raw sync leaf outside covered prefix", + ) + } + } + crdts[0].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Commit(false) crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Commit(false) @@ -296,7 +343,6 @@ func TestHypergraphSyncServer(t *testing.T) { if err != nil { log.Fatalf("Client: failed to sync 2: %v", err) } - time.Sleep(10 * time.Second) str.CloseSend() if !bytes.Equal( @@ -547,87 +593,6 @@ func TestHypergraphPartialSync(t *testing.T) { log.Fatalf("Client: failed to stream: %v", err) } - now := time.Now() - response, err := client.GetChildrenForPath(context.TODO(), &protobufs.GetChildrenForPathRequest{ - ShardKey: append(append([]byte{}, shardKey.L1[:]...), shardKey.L2[:]...), - Path: toUint32Slice(branchfork), - PhaseSet: protobufs.HypergraphPhaseSet_HYPERGRAPH_PHASE_SET_VERTEX_ADDS, - }) - fmt.Println(time.Since(now)) - - require.NoError(t, err) - - slices.Reverse(response.PathSegments) - sum := uint64(0) - size := big.NewInt(0) - longestBranch := uint32(0) - - for _, ps := range response.PathSegments { - for _, s := range ps.Segments { - switch seg := s.Segment.(type) { - case *protobufs.TreePathSegment_Branch: - if isPrefix(toIntSlice(seg.Branch.FullPrefix), toIntSlice(toUint32Slice(branchfork))) { - seg.Branch.Commitment = nil - branchSize := new(big.Int).SetBytes(seg.Branch.Size) - if sum == 0 { - sum = seg.Branch.LeafCount - size.Add(size, branchSize) - longestBranch = seg.Branch.LongestBranch - } - seg.Branch.LeafCount -= sum - seg.Branch.Size = branchSize.Sub(branchSize, size).Bytes() - seg.Branch.LongestBranch -= longestBranch - } - } - } - } - slices.Reverse(response.PathSegments) - for i, ps := range response.PathSegments { - for _, s := range ps.Segments { - switch seg := s.Segment.(type) { - case *protobufs.TreePathSegment_Leaf: - err := crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().InsertLeafSkeleton( - nil, - &tries.LazyVectorCommitmentLeafNode{ - Key: seg.Leaf.Key, - Value: seg.Leaf.Value, - HashTarget: seg.Leaf.HashTarget, - Commitment: seg.Leaf.Commitment, - Size: new(big.Int).SetBytes(seg.Leaf.Size), - Store: crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Store, - }, - i == 0, - ) - if err != nil { - panic(err) - } - case *protobufs.TreePathSegment_Branch: - if isPrefix(toIntSlice(seg.Branch.FullPrefix), toIntSlice(toUint32Slice(branchfork))) { - seg.Branch.Commitment = nil - } - if !slices.Equal(toIntSlice(seg.Branch.FullPrefix), toIntSlice(toUint32Slice(branchfork))) { - err := crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().InsertBranchSkeleton( - nil, - &tries.LazyVectorCommitmentBranchNode{ - Prefix: toIntSlice(seg.Branch.Prefix), - Commitment: seg.Branch.Commitment, - Size: new(big.Int).SetBytes(seg.Branch.Size), - LeafCount: int(seg.Branch.LeafCount), - LongestBranch: int(seg.Branch.LongestBranch), - FullPrefix: toIntSlice(seg.Branch.FullPrefix), - Store: crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Store, - }, - i == 0, - ) - if err != nil { - panic(err) - } - } - // } - } - } - } - err = crdts[1].Sync(str, shardKey, protobufs.HypergraphPhaseSet_HYPERGRAPH_PHASE_SET_VERTEX_ADDS) if err != nil { log.Fatalf("Client: failed to sync 1: %v", err) @@ -642,100 +607,6 @@ func TestHypergraphPartialSync(t *testing.T) { crdts[0].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Commit(false) crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Commit(false) - require.Equal(t, crdts[0].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Root.GetSize().Int64(), crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Root.GetSize().Int64()) - require.Equal(t, crdts[0].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Root.(*tries.LazyVectorCommitmentBranchNode).LeafCount, crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Root.(*tries.LazyVectorCommitmentBranchNode).LeafCount) - require.NoError(t, crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().PruneUncoveredBranches()) - - now = time.Now() - response, err = client.GetChildrenForPath(context.TODO(), &protobufs.GetChildrenForPathRequest{ - ShardKey: append(append([]byte{}, shardKey.L1[:]...), shardKey.L2[:]...), - Path: toUint32Slice(branchfork), - PhaseSet: protobufs.HypergraphPhaseSet_HYPERGRAPH_PHASE_SET_VERTEX_ADDS, - }) - fmt.Println(time.Since(now)) - - require.NoError(t, err) - - slices.Reverse(response.PathSegments) - sum = uint64(0xffffffffffffffff) - size = big.NewInt(0) - longest := uint32(0) - ourNode, err := clientHypergraphStore.GetNodeByPath( - crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().SetType, - crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().PhaseType, - crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().ShardKey, - toIntSlice(toUint32Slice(branchfork)), - ) - require.NoError(t, err) - for _, ps := range response.PathSegments { - for _, s := range ps.Segments { - switch seg := s.Segment.(type) { - case *protobufs.TreePathSegment_Branch: - if isPrefix(toIntSlice(seg.Branch.FullPrefix), toIntSlice(toUint32Slice(branchfork))) { - seg.Branch.Commitment = nil - branchSize := new(big.Int).SetBytes(seg.Branch.Size) - if sum == 0xffffffffffffffff { - sum = seg.Branch.LeafCount - uint64(ourNode.(*tries.LazyVectorCommitmentBranchNode).LeafCount) - size.Add(size, branchSize) - size.Sub(size, ourNode.GetSize()) - longest = seg.Branch.LongestBranch - } - seg.Branch.LeafCount -= sum - seg.Branch.Size = branchSize.Sub(branchSize, size).Bytes() - seg.Branch.LongestBranch = max(longest, seg.Branch.LongestBranch) - longest++ - } - } - } - } - slices.Reverse(response.PathSegments) - for i, ps := range response.PathSegments { - for _, s := range ps.Segments { - switch seg := s.Segment.(type) { - case *protobufs.TreePathSegment_Leaf: - err := crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().InsertLeafSkeleton( - nil, - &tries.LazyVectorCommitmentLeafNode{ - Key: seg.Leaf.Key, - Value: seg.Leaf.Value, - HashTarget: seg.Leaf.HashTarget, - Commitment: seg.Leaf.Commitment, - Size: new(big.Int).SetBytes(seg.Leaf.Size), - Store: crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Store, - }, - i == 0, - ) - if err != nil { - panic(err) - } - case *protobufs.TreePathSegment_Branch: - if isPrefix(toIntSlice(seg.Branch.FullPrefix), toIntSlice(toUint32Slice(branchfork))) { - seg.Branch.Commitment = nil - } - if !slices.Equal(toIntSlice(seg.Branch.FullPrefix), toIntSlice(toUint32Slice(branchfork))) { - - err := crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().InsertBranchSkeleton( - nil, - &tries.LazyVectorCommitmentBranchNode{ - Prefix: toIntSlice(seg.Branch.Prefix), - Commitment: seg.Branch.Commitment, - Size: new(big.Int).SetBytes(seg.Branch.Size), - LeafCount: int(seg.Branch.LeafCount), - LongestBranch: int(seg.Branch.LongestBranch), - FullPrefix: toIntSlice(seg.Branch.FullPrefix), - Store: crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Store, - }, - i == 0, - ) - if err != nil { - panic(err) - } - } - } - } - } - - time.Sleep(10 * time.Second) str, err = client.HyperStream(context.TODO()) if err != nil { @@ -750,10 +621,10 @@ func TestHypergraphPartialSync(t *testing.T) { crdts[0].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Commit(false) crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Commit(false) - require.Equal(t, crdts[0].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Root.GetSize().Int64(), crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Root.GetSize().Int64()) - require.Equal(t, crdts[0].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Root.(*tries.LazyVectorCommitmentBranchNode).LeafCount, crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Root.(*tries.LazyVectorCommitmentBranchNode).LeafCount) + desc, err := crdts[0].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().GetByPath(toIntSlice(toUint32Slice(branchfork))) + require.NoError(t, err) if !bytes.Equal( - crdts[0].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Commit(false), + desc.(*crypto.LazyVectorCommitmentBranchNode).Commitment, crdts[1].(*hgcrdt.HypergraphCRDT).GetVertexAddsSet(shardKey).GetTree().Commit(false), ) { leaves := crypto.CompareLeaves( @@ -774,7 +645,7 @@ func TestHypergraphPartialSync(t *testing.T) { // Assume variable distribution, but roughly triple is a safe guess. If it fails, just bump it. assert.Greater(t, 40, clientHas, "mismatching vertex data entries") - assert.Greater(t, clientHas, 1, "mismatching vertex data entries") + // assert.Greater(t, clientHas, 1, "mismatching vertex data entries") } func TestHypergraphSyncWithConcurrentCommits(t *testing.T) { diff --git a/node/rpc/pubsub_proxy.go b/node/rpc/pubsub_proxy.go index 2128129..7cf3fbf 100644 --- a/node/rpc/pubsub_proxy.go +++ b/node/rpc/pubsub_proxy.go @@ -702,8 +702,8 @@ func (c *PubSubProxyClient) PublishToBitmask( _, err := c.client.PublishToBitmask( context.Background(), &protobufs.PublishToBitmaskRequest{ - Bitmask: bitmask, - Data: data, + Bitmask: bitmask, // buildutils:allow-slice-alias slice is static + Data: data, // buildutils:allow-slice-alias slice is static }, ) return err @@ -711,8 +711,8 @@ func (c *PubSubProxyClient) PublishToBitmask( func (c *PubSubProxyClient) Publish(address []byte, data []byte) error { _, err := c.client.Publish(context.Background(), &protobufs.PublishRequest{ - Address: address, - Data: data, + Address: address, // buildutils:allow-slice-alias slice is static + Data: data, // buildutils:allow-slice-alias slice is static }) return err } @@ -731,7 +731,7 @@ func (c *PubSubProxyClient) Subscribe( c.mu.Unlock() stream, err := c.client.Subscribe(ctx, &protobufs.SubscribeRequest{ - Bitmask: bitmask, + Bitmask: bitmask, // buildutils:allow-slice-alias slice is static SubscriptionId: subID, }) if err != nil { @@ -781,7 +781,7 @@ func (c *PubSubProxyClient) Unsubscribe(bitmask []byte, raw bool) { _, err := c.client.Unsubscribe( context.Background(), &protobufs.UnsubscribeRequest{ - Bitmask: bitmask, + Bitmask: bitmask, // buildutils:allow-slice-alias slice is static Raw: raw, }, ) @@ -809,7 +809,7 @@ func (c *PubSubProxyClient) RegisterValidator( unreg := &protobufs.ValidationStreamMessage{ Message: &protobufs.ValidationStreamMessage_Unregister{ Unregister: &protobufs.UnregisterValidatorRequest{ - Bitmask: bitmask, + Bitmask: bitmask, // buildutils:allow-slice-alias slice is static ValidatorId: existingID, }, }, @@ -834,7 +834,7 @@ func (c *PubSubProxyClient) RegisterValidator( req := &protobufs.ValidationStreamMessage{ Message: &protobufs.ValidationStreamMessage_Register{ Register: &protobufs.RegisterValidatorRequest{ - Bitmask: bitmask, + Bitmask: bitmask, // buildutils:allow-slice-alias slice is static ValidatorId: validatorID, Sync: sync, }, @@ -887,7 +887,7 @@ func (c *PubSubProxyClient) UnregisterValidator(bitmask []byte) error { req := &protobufs.ValidationStreamMessage{ Message: &protobufs.ValidationStreamMessage_Unregister{ Unregister: &protobufs.UnregisterValidatorRequest{ - Bitmask: bitmask, + Bitmask: bitmask, // buildutils:allow-slice-alias slice is static ValidatorId: validatorID, }, }, @@ -937,7 +937,7 @@ func (c *PubSubProxyClient) GetRandomPeer(bitmask []byte) ([]byte, error) { resp, err := c.client.GetRandomPeer( context.Background(), &protobufs.GetRandomPeerRequest{ - Bitmask: bitmask, + Bitmask: bitmask, // buildutils:allow-slice-alias slice is static }, ) if err != nil { @@ -958,7 +958,7 @@ func (c *PubSubProxyClient) GetMultiaddrOfPeerStream( stream, err := c.client.GetMultiaddrOfPeerStream( ctx, &protobufs.GetMultiaddrOfPeerRequest{ - PeerId: peerId, + PeerId: peerId, // buildutils:allow-slice-alias slice is static }, ) if err != nil { @@ -998,7 +998,7 @@ func (c *PubSubProxyClient) GetMultiaddrOfPeer(peerId []byte) string { resp, err := c.client.GetMultiaddrOfPeer( context.Background(), &protobufs.GetMultiaddrOfPeerRequest{ - PeerId: peerId, + PeerId: peerId, // buildutils:allow-slice-alias slice is static }, ) if err != nil { @@ -1077,7 +1077,7 @@ func (c *PubSubProxyClient) SignMessage(msg []byte) ([]byte, error) { resp, err := c.client.SignMessage( context.Background(), &protobufs.SignMessageRequest{ - Message: msg, + Message: msg, // buildutils:allow-slice-alias slice is static }, ) if err != nil { @@ -1097,7 +1097,7 @@ func (c *PubSubProxyClient) GetPublicKey() []byte { func (c *PubSubProxyClient) GetPeerScore(peerId []byte) int64 { resp, err := c.client.GetPeerScore(context.Background(), &protobufs.GetPeerScoreRequest{ - PeerId: peerId, + PeerId: peerId, // buildutils:allow-slice-alias slice is static }) if err != nil { c.logger.Error("GetPeerScore error", zap.Error(err)) @@ -1110,7 +1110,7 @@ func (c *PubSubProxyClient) SetPeerScore(peerId []byte, score int64) { _, err := c.client.SetPeerScore( context.Background(), &protobufs.SetPeerScoreRequest{ - PeerId: peerId, + PeerId: peerId, // buildutils:allow-slice-alias slice is static Score: score, }, ) @@ -1123,7 +1123,7 @@ func (c *PubSubProxyClient) AddPeerScore(peerId []byte, scoreDelta int64) { _, err := c.client.AddPeerScore( context.Background(), &protobufs.AddPeerScoreRequest{ - PeerId: peerId, + PeerId: peerId, // buildutils:allow-slice-alias slice is static ScoreDelta: scoreDelta, }, ) @@ -1136,7 +1136,7 @@ func (c *PubSubProxyClient) Reconnect(peerId []byte) error { _, err := c.client.Reconnect( context.Background(), &protobufs.ReconnectRequest{ - PeerId: peerId, + PeerId: peerId, // buildutils:allow-slice-alias slice is static }, ) return err @@ -1165,7 +1165,7 @@ func (c *PubSubProxyClient) IsPeerConnected(peerId []byte) bool { resp, err := c.client.IsPeerConnected( context.Background(), &protobufs.IsPeerConnectedRequest{ - PeerId: peerId, + PeerId: peerId, // buildutils:allow-slice-alias slice is static }, ) if err != nil { diff --git a/node/store/clock.go b/node/store/clock.go index 94ac976..96dfd41 100644 --- a/node/store/clock.go +++ b/node/store/clock.go @@ -7,7 +7,7 @@ import ( "math/big" "slices" - "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/v2" "github.com/pkg/errors" "go.uber.org/zap" "google.golang.org/protobuf/proto" @@ -1545,7 +1545,7 @@ func (p *PebbleClockStore) RangeShardClockFrames( } return &PebbleClockIterator{ - filter: filter, + filter: filter, // buildutils:allow-slice-alias slice is static start: startFrameNumber, end: endFrameNumber + 1, cur: startFrameNumber, @@ -1563,7 +1563,7 @@ func (p *PebbleClockStore) RangeStagedShardClockFrames( } return &PebbleStagedShardFrameIterator{ - filter: filter, + filter: filter, // buildutils:allow-slice-alias slice is static start: startFrameNumber, end: endFrameNumber, cur: startFrameNumber, @@ -2284,7 +2284,7 @@ func (p *PebbleClockStore) RangeCertifiedAppShardStates( } return &PebbleAppShardStateIterator{ - filter: filter, + filter: filter, // buildutils:allow-slice-alias slice is static start: startRank, end: endRank + 1, cur: startRank, @@ -2482,7 +2482,7 @@ func (p *PebbleClockStore) RangeQuorumCertificates( } return &PebbleQuorumCertificateIterator{ - filter: filter, + filter: filter, // buildutils:allow-slice-alias slice is static start: startRank, end: endRank + 1, cur: startRank, @@ -2620,7 +2620,7 @@ func (p *PebbleClockStore) RangeTimeoutCertificates( } return &PebbleTimeoutCertificateIterator{ - filter: filter, + filter: filter, // buildutils:allow-slice-alias slice is static start: startRank, end: endRank + 1, cur: startRank, diff --git a/node/store/consensus.go b/node/store/consensus.go index 98030c3..e2283b0 100644 --- a/node/store/consensus.go +++ b/node/store/consensus.go @@ -5,7 +5,7 @@ import ( "encoding/binary" "slices" - "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/v2" "github.com/pkg/errors" "go.uber.org/zap" "source.quilibrium.com/quilibrium/monorepo/consensus" diff --git a/node/store/hypergraph.go b/node/store/hypergraph.go index 5ef10f4..d497f9c 100644 --- a/node/store/hypergraph.go +++ b/node/store/hypergraph.go @@ -9,7 +9,7 @@ import ( "path" "slices" - "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/v2" "github.com/pkg/errors" "go.uber.org/zap" "source.quilibrium.com/quilibrium/monorepo/config" @@ -55,29 +55,39 @@ func NewPebbleHypergraphStore( } } -func (p *PebbleHypergraphStore) NewSnapshot() ( +func (p *PebbleHypergraphStore) NewShardSnapshot( + shardKey tries.ShardKey, +) ( tries.TreeBackingStore, func(), error, ) { - if p.pebble == nil { - return nil, nil, errors.New("hypergraph store does not support snapshots") - } + memConfig := *p.config + memConfig.InMemoryDONOTUSE = true + memConfig.Path = fmt.Sprintf( + "memory-shard-%x", + shardKey.L2[:4], + ) - snapshot := p.pebble.NewSnapshot() - snapshotDB := &pebbleSnapshotDB{snap: snapshot} + memDB := NewPebbleDB(p.logger, &memConfig, 0) + managedDB := newManagedKVDB(memDB) snapshotStore := NewPebbleHypergraphStore( - p.config, - snapshotDB, + &memConfig, + managedDB, p.logger, p.verenc, p.prover, ) snapshotStore.pebble = nil + if err := p.copyShardData(managedDB, shardKey); err != nil { + _ = managedDB.Close() + return nil, nil, errors.Wrap(err, "copy shard snapshot") + } + release := func() { - if err := snapshotDB.Close(); err != nil { - p.logger.Warn("failed to close hypergraph snapshot", zap.Error(err)) + if err := managedDB.Close(); err != nil { + p.logger.Warn("failed to close shard snapshot", zap.Error(err)) } } @@ -391,6 +401,169 @@ func hypergraphCoveredPrefixKey() []byte { return key } +func (p *PebbleHypergraphStore) copyShardData( + dst store.KVDB, + shardKey tries.ShardKey, +) error { + prefixes := []byte{ + VERTEX_ADDS_TREE_NODE, + VERTEX_REMOVES_TREE_NODE, + HYPEREDGE_ADDS_TREE_NODE, + HYPEREDGE_REMOVES_TREE_NODE, + VERTEX_ADDS_TREE_NODE_BY_PATH, + VERTEX_REMOVES_TREE_NODE_BY_PATH, + HYPEREDGE_ADDS_TREE_NODE_BY_PATH, + HYPEREDGE_REMOVES_TREE_NODE_BY_PATH, + VERTEX_ADDS_TREE_ROOT, + VERTEX_REMOVES_TREE_ROOT, + HYPEREDGE_ADDS_TREE_ROOT, + HYPEREDGE_REMOVES_TREE_ROOT, + VERTEX_ADDS_CHANGE_RECORD, + VERTEX_REMOVES_CHANGE_RECORD, + HYPEREDGE_ADDS_CHANGE_RECORD, + HYPEREDGE_REMOVES_CHANGE_RECORD, + HYPERGRAPH_VERTEX_ADDS_SHARD_COMMIT, + HYPERGRAPH_VERTEX_REMOVES_SHARD_COMMIT, + HYPERGRAPH_HYPEREDGE_ADDS_SHARD_COMMIT, + HYPERGRAPH_HYPEREDGE_REMOVES_SHARD_COMMIT, + } + + for _, prefix := range prefixes { + if err := p.copyPrefixedRange(dst, prefix, shardKey); err != nil { + return err + } + } + + if err := p.copyVertexDataForShard(dst, shardKey); err != nil { + return err + } + + if err := p.copyCoveredPrefix(dst); err != nil { + return err + } + + return nil +} + +func (p *PebbleHypergraphStore) copyPrefixedRange( + dst store.KVDB, + prefix byte, + shardKey tries.ShardKey, +) error { + start, end := shardRangeBounds(prefix, shardKey) + iter, err := p.db.NewIter(start, end) + if err != nil { + return errors.Wrap(err, "snapshot: iter range") + } + defer iter.Close() + + for valid := iter.First(); valid; valid = iter.Next() { + key := append([]byte(nil), iter.Key()...) + val := append([]byte(nil), iter.Value()...) + if err := dst.Set(key, val); err != nil { + return errors.Wrap(err, "snapshot: set range value") + } + } + + return nil +} + +func (p *PebbleHypergraphStore) copyVertexDataForShard( + dst store.KVDB, + shardKey tries.ShardKey, +) error { + sets := []struct { + setType string + phaseType string + }{ + {string(hypergraph.VertexAtomType), string(hypergraph.AddsPhaseType)}, + {string(hypergraph.VertexAtomType), string(hypergraph.RemovesPhaseType)}, + } + + vertexKeys := make(map[string]struct{}) + for _, cfg := range sets { + iter, err := p.IterateRawLeaves(cfg.setType, cfg.phaseType, shardKey) + if err != nil { + return errors.Wrap(err, "snapshot: iterate raw leaves") + } + for valid := iter.First(); valid; valid = iter.Next() { + leaf, err := iter.Leaf() + if err != nil || leaf == nil { + continue + } + if len(leaf.UnderlyingData) == 0 { + continue + } + keyStr := string(leaf.Key) + if _, ok := vertexKeys[keyStr]; ok { + continue + } + vertexKeys[keyStr] = struct{}{} + buf := append([]byte(nil), leaf.UnderlyingData...) + if err := dst.Set(hypergraphVertexDataKey(leaf.Key), buf); err != nil { + iter.Close() + return errors.Wrap(err, "snapshot: copy vertex data") + } + } + if err := iter.Close(); err != nil { + return errors.Wrap(err, "snapshot: close vertex iterator") + } + } + + return nil +} + +func (p *PebbleHypergraphStore) copyCoveredPrefix(dst store.KVDB) error { + value, closer, err := p.db.Get(hypergraphCoveredPrefixKey()) + if err != nil { + if errors.Is(err, pebble.ErrNotFound) { + return nil + } + return errors.Wrap(err, "snapshot: get covered prefix") + } + defer closer.Close() + buf := append([]byte(nil), value...) + return errors.Wrap( + dst.Set(hypergraphCoveredPrefixKey(), buf), + "snapshot: set covered prefix", + ) +} + +func shardRangeBounds( + prefix byte, + shardKey tries.ShardKey, +) ([]byte, []byte) { + shardBytes := shardKeyBytes(shardKey) + start := append([]byte{HYPERGRAPH_SHARD, prefix}, shardBytes...) + nextShardBytes, ok := incrementShardBytes(shardBytes) + if ok { + end := append([]byte{HYPERGRAPH_SHARD, prefix}, nextShardBytes...) + return start, end + } + if prefix < 0xFF { + return start, []byte{HYPERGRAPH_SHARD, prefix + 1} + } + return start, []byte{HYPERGRAPH_SHARD + 1} +} + +func shardKeyBytes(shardKey tries.ShardKey) []byte { + key := make([]byte, 0, len(shardKey.L1)+len(shardKey.L2)) + key = append(key, shardKey.L1[:]...) + key = append(key, shardKey.L2[:]...) + return key +} + +func incrementShardBytes(data []byte) ([]byte, bool) { + out := append([]byte(nil), data...) + for i := len(out) - 1; i >= 0; i-- { + out[i]++ + if out[i] != 0 { + return out, true + } + } + return nil, false +} + func shardKeyFromKey(key []byte) tries.ShardKey { return tries.ShardKey{ L1: [3]byte(key[2:5]), @@ -478,6 +651,23 @@ func (p *PebbleHypergraphStore) SaveVertexTreeRaw( ) } +func (p *PebbleHypergraphStore) DeleteVertexTree( + txn tries.TreeBackingStoreTransaction, + id []byte, +) error { + if txn == nil { + return errors.Wrap( + errors.New("requires transaction"), + "delete vertex tree", + ) + } + + return errors.Wrap( + txn.Delete(hypergraphVertexDataKey(id)), + "delete vertex tree", + ) +} + func (p *PebbleHypergraphStore) SetCoveredPrefix(coveredPrefix []int) error { buf := bytes.NewBuffer(nil) prefix := []int64{} diff --git a/node/store/key.go b/node/store/key.go index e376003..d2c6a53 100644 --- a/node/store/key.go +++ b/node/store/key.go @@ -6,7 +6,7 @@ import ( "slices" "time" - "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/v2" "github.com/pkg/errors" "go.uber.org/zap" "google.golang.org/protobuf/proto" diff --git a/node/store/managed_kvdb.go b/node/store/managed_kvdb.go new file mode 100644 index 0000000..18dd67b --- /dev/null +++ b/node/store/managed_kvdb.go @@ -0,0 +1,259 @@ +package store + +import ( + "errors" + "io" + "sync" + "sync/atomic" + + "source.quilibrium.com/quilibrium/monorepo/types/store" +) + +var errManagedKVDBClosed = errors.New("managed kvdb closed") + +// managedKVDB wraps a KVDB and keeps track of in-flight operations so Close() +// waits until all references are released. This prevents panics when iterators +// or point-lookups race with a snapshot being torn down. +type managedKVDB struct { + inner store.KVDB + wg sync.WaitGroup + closed atomic.Bool +} + +func newManagedKVDB(inner store.KVDB) *managedKVDB { + return &managedKVDB{inner: inner} +} + +func (m *managedKVDB) ref() error { + if m.closed.Load() { + return errManagedKVDBClosed + } + m.wg.Add(1) + if m.closed.Load() { + m.wg.Done() + return errManagedKVDBClosed + } + return nil +} + +func (m *managedKVDB) deref() { + m.wg.Done() +} + +func (m *managedKVDB) Get(key []byte) ([]byte, io.Closer, error) { + if err := m.ref(); err != nil { + return nil, nil, err + } + + value, closer, err := m.inner.Get(key) + if err != nil || closer == nil { + m.deref() + return value, nil, err + } + + return value, &managedCloser{ + parent: m, + inner: closer, + }, nil +} + +func (m *managedKVDB) Set(key, value []byte) error { + if err := m.ref(); err != nil { + return err + } + defer m.deref() + return m.inner.Set(key, value) +} + +func (m *managedKVDB) Delete(key []byte) error { + if err := m.ref(); err != nil { + return err + } + defer m.deref() + return m.inner.Delete(key) +} + +func (m *managedKVDB) NewBatch(indexed bool) store.Transaction { + if err := m.ref(); err != nil { + return &closedTransaction{err: err} + } + return &managedTxn{ + parent: m, + inner: m.inner.NewBatch(indexed), + } +} + +func (m *managedKVDB) NewIter(lowerBound []byte, upperBound []byte) ( + store.Iterator, + error, +) { + if err := m.ref(); err != nil { + return nil, err + } + + iter, err := m.inner.NewIter(lowerBound, upperBound) + if err != nil { + m.deref() + return nil, err + } + + return &managedIterator{ + parent: m, + inner: iter, + }, nil +} + +func (m *managedKVDB) Compact(start, end []byte, parallelize bool) error { + if err := m.ref(); err != nil { + return err + } + defer m.deref() + return m.inner.Compact(start, end, parallelize) +} + +func (m *managedKVDB) CompactAll() error { + if err := m.ref(); err != nil { + return err + } + defer m.deref() + return m.inner.CompactAll() +} + +func (m *managedKVDB) DeleteRange(start, end []byte) error { + if err := m.ref(); err != nil { + return err + } + defer m.deref() + return m.inner.DeleteRange(start, end) +} + +func (m *managedKVDB) Close() error { + if !m.closed.CompareAndSwap(false, true) { + return nil + } + m.wg.Wait() + return m.inner.Close() +} + +type managedCloser struct { + parent *managedKVDB + inner io.Closer + once sync.Once +} + +func (c *managedCloser) Close() error { + var err error + c.once.Do(func() { + err = c.inner.Close() + c.parent.deref() + }) + return err +} + +type managedIterator struct { + parent *managedKVDB + inner store.Iterator + once sync.Once +} + +func (i *managedIterator) Close() error { + var err error + i.once.Do(func() { + err = i.inner.Close() + i.parent.deref() + }) + return err +} + +func (i *managedIterator) Key() []byte { return i.inner.Key() } +func (i *managedIterator) First() bool { return i.inner.First() } +func (i *managedIterator) Next() bool { return i.inner.Next() } +func (i *managedIterator) Prev() bool { return i.inner.Prev() } +func (i *managedIterator) Valid() bool { return i.inner.Valid() } +func (i *managedIterator) Value() []byte { return i.inner.Value() } +func (i *managedIterator) SeekLT(b []byte) bool { return i.inner.SeekLT(b) } +func (i *managedIterator) SeekGE(b []byte) bool { return i.inner.SeekGE(b) } +func (i *managedIterator) Last() bool { return i.inner.Last() } + +type managedTxn struct { + parent *managedKVDB + inner store.Transaction + once sync.Once +} + +func (t *managedTxn) finish() { + t.once.Do(func() { + t.parent.deref() + }) +} + +func (t *managedTxn) Get(key []byte) ([]byte, io.Closer, error) { + return t.inner.Get(key) +} + +func (t *managedTxn) Set(key []byte, value []byte) error { + return t.inner.Set(key, value) +} + +func (t *managedTxn) Commit() error { + defer t.finish() + return t.inner.Commit() +} + +func (t *managedTxn) Delete(key []byte) error { + return t.inner.Delete(key) +} + +func (t *managedTxn) Abort() error { + defer t.finish() + return t.inner.Abort() +} + +func (t *managedTxn) NewIter(lowerBound []byte, upperBound []byte) ( + store.Iterator, + error, +) { + return t.inner.NewIter(lowerBound, upperBound) +} + +func (t *managedTxn) DeleteRange(lowerBound []byte, upperBound []byte) error { + return t.inner.DeleteRange(lowerBound, upperBound) +} + +type closedTransaction struct { + err error +} + +func (c *closedTransaction) Get(key []byte) ([]byte, io.Closer, error) { + return nil, nil, c.err +} + +func (c *closedTransaction) Set(key []byte, value []byte) error { + return c.err +} + +func (c *closedTransaction) Commit() error { + return c.err +} + +func (c *closedTransaction) Delete(key []byte) error { + return c.err +} + +func (c *closedTransaction) Abort() error { + return c.err +} + +func (c *closedTransaction) NewIter(lowerBound []byte, upperBound []byte) ( + store.Iterator, + error, +) { + return nil, c.err +} + +func (c *closedTransaction) DeleteRange( + lowerBound []byte, + upperBound []byte, +) error { + return c.err +} diff --git a/node/store/pebble.go b/node/store/pebble.go index 362c90c..6cf4e83 100644 --- a/node/store/pebble.go +++ b/node/store/pebble.go @@ -1,14 +1,17 @@ package store import ( + "context" "encoding/binary" "encoding/hex" "fmt" "io" "os" + "strings" - "github.com/cockroachdb/pebble" - "github.com/cockroachdb/pebble/vfs" + pebblev1 "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/v2" + "github.com/cockroachdb/pebble/v2/vfs" "github.com/pkg/errors" "go.uber.org/zap" "source.quilibrium.com/quilibrium/monorepo/config" @@ -16,7 +19,8 @@ import ( ) type PebbleDB struct { - db *pebble.DB + db *pebble.DB + config *config.DBConfig } func (p *PebbleDB) DB() *pebble.DB { @@ -45,6 +49,12 @@ var pebbleMigrations = []func(*pebble.Batch) error{ migration_2_1_0_149, migration_2_1_0_1410, migration_2_1_0_1411, + migration_2_1_0_15, + migration_2_1_0_151, + migration_2_1_0_152, + migration_2_1_0_153, + migration_2_1_0_154, + migration_2_1_0_155, } func NewPebbleDB( @@ -58,12 +68,10 @@ func NewPebbleDB( L0CompactionThreshold: 8, L0StopWritesThreshold: 32, LBaseMaxBytes: 64 << 20, + FormatMajorVersion: pebble.FormatNewest, } if config.InMemoryDONOTUSE { - logger.Warn( - "IN MEMORY DATABASE OPTION ENABLED - THIS WILL NOT SAVE TO DISK", - ) opts.FS = vfs.NewMem() } @@ -104,6 +112,40 @@ func NewPebbleDB( } db, err := pebble.Open(path, opts) + if err != nil && shouldAttemptLegacyOpen(err, config.InMemoryDONOTUSE) { + logger.Warn( + fmt.Sprintf( + "failed to open %s with pebble v2, trying legacy open", + storeType, + ), + zap.Error(err), + zap.String("path", path), + zap.Uint("core_id", coreId), + ) + if compatErr := ensurePebbleLegacyCompatibility( + path, + storeType, + coreId, + logger, + ); compatErr == nil { + logger.Info( + fmt.Sprintf( + "legacy pebble open succeeded, retrying %s with pebble v2", + storeType, + ), + zap.String("path", path), + zap.Uint("core_id", coreId), + ) + db, err = pebble.Open(path, opts) + } else { + logger.Error( + fmt.Sprintf("legacy pebble open failed for %s", storeType), + zap.Error(compatErr), + zap.String("path", path), + zap.Uint("core_id", coreId), + ) + } + } if err != nil { logger.Error( fmt.Sprintf("failed to open %s", storeType), @@ -114,7 +156,7 @@ func NewPebbleDB( os.Exit(1) } - pebbleDB := &PebbleDB{db} + pebbleDB := &PebbleDB{db, config} if err := pebbleDB.migrate(logger); err != nil { logger.Error( fmt.Sprintf("failed to migrate %s", storeType), @@ -129,7 +171,56 @@ func NewPebbleDB( return pebbleDB } +// shouldAttemptLegacyOpen determines whether the error from pebble.Open is due +// to an outdated on-disk format. Only those cases benefit from temporarily +// opening with the legacy Pebble version. +func shouldAttemptLegacyOpen(err error, inMemory bool) bool { + if err == nil || inMemory { + return false + } + msg := err.Error() + return strings.Contains(msg, "format major version") && + strings.Contains(msg, "no longer supported") +} + +// ensurePebbleLegacyCompatibility attempts to open the database with the +// previous Pebble v1.1.5 release. Older stores that have not yet been opened +// by Pebble v2 will be updated during this open/close cycle, allowing the +// subsequent Pebble v2 open to succeed without manual intervention. +func ensurePebbleLegacyCompatibility( + path string, + storeType string, + coreId uint, + logger *zap.Logger, +) error { + legacyOpts := &pebblev1.Options{ + MemTableSize: 64 << 20, + MaxOpenFiles: 1000, + L0CompactionThreshold: 8, + L0StopWritesThreshold: 32, + LBaseMaxBytes: 64 << 20, + FormatMajorVersion: pebblev1.FormatNewest, + } + legacyDB, err := pebblev1.Open(path, legacyOpts) + if err != nil { + return err + } + if err := legacyDB.Close(); err != nil { + return err + } + logger.Info( + fmt.Sprintf("legacy pebble open and close completed for %s", storeType), + zap.String("path", path), + zap.Uint("core_id", coreId), + ) + return nil +} + func (p *PebbleDB) migrate(logger *zap.Logger) error { + if p.config.InMemoryDONOTUSE { + return nil + } + currentVersion := uint64(len(pebbleMigrations)) var storedVersion uint64 @@ -251,13 +342,14 @@ func (p *PebbleDB) NewIter(lowerBound []byte, upperBound []byte) ( error, ) { return p.db.NewIter(&pebble.IterOptions{ - LowerBound: lowerBound, - UpperBound: upperBound, + LowerBound: lowerBound, // buildutils:allow-slice-alias slice is static + UpperBound: upperBound, // buildutils:allow-slice-alias slice is static }) } func (p *PebbleDB) Compact(start, end []byte, parallelize bool) error { - return p.db.Compact(start, end, parallelize) + return p.db.Compact(context.TODO(), start, end, parallelize) + // return p.db.Compact(start, end, parallelize) } func (p *PebbleDB) Close() error { @@ -323,8 +415,8 @@ func (t *PebbleTransaction) NewIter(lowerBound []byte, upperBound []byte) ( error, ) { return t.b.NewIter(&pebble.IterOptions{ - LowerBound: lowerBound, - UpperBound: upperBound, + LowerBound: lowerBound, // buildutils:allow-slice-alias slice is static + UpperBound: upperBound, // buildutils:allow-slice-alias slice is static }) } @@ -345,7 +437,7 @@ func rightAlign(data []byte, size int) []byte { l := len(data) if l == size { - return data + return data // buildutils:allow-slice-alias slice is static } if l > size { @@ -535,6 +627,30 @@ func migration_2_1_0_1411(b *pebble.Batch) error { return migration_2_1_0_149(b) } +func migration_2_1_0_15(b *pebble.Batch) error { + return nil +} + +func migration_2_1_0_151(b *pebble.Batch) error { + return migration_2_1_0_15(b) +} + +func migration_2_1_0_152(b *pebble.Batch) error { + return migration_2_1_0_15(b) +} + +func migration_2_1_0_153(b *pebble.Batch) error { + return migration_2_1_0_15(b) +} + +func migration_2_1_0_154(b *pebble.Batch) error { + return migration_2_1_0_15(b) +} + +func migration_2_1_0_155(b *pebble.Batch) error { + return migration_2_1_0_15(b) +} + type pebbleSnapshotDB struct { snap *pebble.Snapshot } @@ -560,8 +676,8 @@ func (p *pebbleSnapshotDB) NewIter(lowerBound []byte, upperBound []byte) ( error, ) { return p.snap.NewIter(&pebble.IterOptions{ - LowerBound: lowerBound, - UpperBound: upperBound, + LowerBound: lowerBound, // buildutils:allow-slice-alias slice is static + UpperBound: upperBound, // buildutils:allow-slice-alias slice is static }) } diff --git a/node/store/peerstore.go b/node/store/peerstore.go index c7468aa..8e2b61e 100644 --- a/node/store/peerstore.go +++ b/node/store/peerstore.go @@ -3,7 +3,7 @@ package store import ( "context" - "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/v2" ds "github.com/ipfs/go-datastore" dsq "github.com/ipfs/go-datastore/query" "github.com/pkg/errors" diff --git a/node/store/shards.go b/node/store/shards.go index 85cf7b6..828ce5c 100644 --- a/node/store/shards.go +++ b/node/store/shards.go @@ -5,7 +5,7 @@ import ( "encoding/binary" "slices" - "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/v2" "github.com/pkg/errors" "go.uber.org/zap" "source.quilibrium.com/quilibrium/monorepo/types/store" diff --git a/node/store/token.go b/node/store/token.go index 26045c1..8799292 100644 --- a/node/store/token.go +++ b/node/store/token.go @@ -4,7 +4,7 @@ import ( "bytes" "encoding/binary" - "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/v2" "github.com/pkg/errors" "go.uber.org/zap" "google.golang.org/protobuf/proto" diff --git a/node/store/worker.go b/node/store/worker.go index fe4dfec..06c6d12 100644 --- a/node/store/worker.go +++ b/node/store/worker.go @@ -5,7 +5,7 @@ import ( "fmt" "slices" - "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/v2" "github.com/pkg/errors" "go.uber.org/zap" "source.quilibrium.com/quilibrium/monorepo/types/store" @@ -191,7 +191,7 @@ func encodeWorkerInfo(worker *store.WorkerInfo) ([]byte, error) { // totalLen = coreId(8) + totalStorage(8) + automatic(1) + allocated(1) // + 2 + listen + 2 + stream + 2 + filter totalLen := 8 + 8 + 1 + 1 + 2 + int(listenMultiaddrLen) + 2 + - int(streamListenMultiaddrLen) + 2 + int(filterLen) + int(streamListenMultiaddrLen) + 2 + int(filterLen) + 8 data := make([]byte, totalLen) offset := 0 @@ -228,6 +228,9 @@ func encodeWorkerInfo(worker *store.WorkerInfo) ([]byte, error) { binary.BigEndian.PutUint16(data[offset:], filterLen) offset += 2 copy(data[offset:], worker.Filter) + offset += int(filterLen) + + binary.BigEndian.PutUint64(data[offset:], worker.PendingFilterFrame) return data, nil } @@ -303,6 +306,12 @@ func decodeWorkerInfo(data []byte) (*store.WorkerInfo, error) { filter := make([]byte, filterLen) copy(filter, data[offset:offset+filterLen]) + offset += filterLen + + var pendingFrame uint64 + if offset+8 <= len(data) { + pendingFrame = binary.BigEndian.Uint64(data[offset:]) + } return &store.WorkerInfo{ CoreId: uint(coreId), @@ -312,5 +321,6 @@ func decodeWorkerInfo(data []byte) (*store.WorkerInfo, error) { TotalStorage: uint(totalStorage), Automatic: automatic, Allocated: allocated, + PendingFilterFrame: pendingFrame, }, nil } diff --git a/node/worker/manager.go b/node/worker/manager.go index 11b6fda..862a938 100644 --- a/node/worker/manager.go +++ b/node/worker/manager.go @@ -114,7 +114,7 @@ func (w *WorkerManager) setWorkerFilterMapping( if len(filter) > 0 { w.workersByFilter[string(filter)] = coreID } - w.filtersByWorker[coreID] = filter + w.filtersByWorker[coreID] = filter // buildutils:allow-slice-alias slice is static w.mu.Unlock() } @@ -424,11 +424,12 @@ func (w *WorkerManager) AllocateWorker(coreId uint, filter []byte) error { // Update worker filter if provided if len(filter) > 0 && string(worker.Filter) != string(filter) { - worker.Filter = filter + worker.Filter = filter // buildutils:allow-slice-alias slice is static } // Update allocation status worker.Allocated = true + worker.PendingFilterFrame = 0 // Save to store txn, err := w.store.NewTransaction(false) @@ -503,6 +504,7 @@ func (w *WorkerManager) DeallocateWorker(coreId uint) error { // Update allocation status and clear filter worker.Allocated = false worker.Filter = nil + worker.PendingFilterFrame = 0 // Save to store txn, err := w.store.NewTransaction(false) @@ -1047,7 +1049,7 @@ func (w *WorkerManager) respawnWorker( } ctx, cancel := context.WithTimeout(managerCtx, respawnTimeout) - _, err = svc.Respawn(ctx, &protobufs.RespawnRequest{Filter: filter}) + _, err = svc.Respawn(ctx, &protobufs.RespawnRequest{Filter: filter}) // buildutils:allow-slice-alias slice is static cancel() if err == nil { return nil diff --git a/protobufs/global.go b/protobufs/global.go index 244a8c8..4fd02de 100644 --- a/protobufs/global.go +++ b/protobufs/global.go @@ -2642,6 +2642,11 @@ func (f *FrameHeader) ToCanonicalBytes() ([]byte, error) { return nil, errors.Wrap(err, "to canonical bytes") } + // Write rank + if err := binary.Write(buf, binary.BigEndian, f.Rank); err != nil { + return nil, errors.Wrap(err, "to canonical bytes") + } + // Write timestamp if err := binary.Write(buf, binary.BigEndian, f.Timestamp); err != nil { return nil, errors.Wrap(err, "to canonical bytes") @@ -2791,6 +2796,11 @@ func (f *FrameHeader) FromCanonicalBytes(data []byte) error { return errors.Wrap(err, "from canonical bytes") } + // Read rank + if err := binary.Read(buf, binary.BigEndian, &f.Rank); err != nil { + return errors.Wrap(err, "from canonical bytes") + } + // Read timestamp if err := binary.Read(buf, binary.BigEndian, &f.Timestamp); err != nil { return errors.Wrap(err, "from canonical bytes") @@ -5678,7 +5688,37 @@ func (f *QuorumCertificate) Validate() error { return errors.Wrap(errors.New("missing aggregate signature"), "validate") } - return f.AggregateSignature.Validate() + if len(f.Filter) == 0 { + return f.AggregateSignature.Validate() + } + + // Signature should be 74 bytes + if len(f.AggregateSignature.Signature) < 74 { + return errors.Wrap( + errors.Errorf( + "bls48581 signature must be at least 74 bytes, got %d", + len(f.AggregateSignature.Signature), + ), + "validate", + ) + } + + // Validate public key if present + if f.AggregateSignature.PublicKey != nil { + if err := f.AggregateSignature.PublicKey.Validate(); err != nil { + return errors.Wrap(err, "validate") + } + } + + // Bitmask can be variable length, but should not exceed 32 + if len(f.AggregateSignature.Bitmask) > 32 { + return errors.Wrap( + errors.New("invalid bitmask length"), + "validate", + ) + } + + return nil } var _ ValidatableMessage = (*TimeoutCertificate)(nil) @@ -5688,7 +5728,11 @@ func (f *TimeoutCertificate) Validate() error { return errors.Wrap(errors.New("nil frame confirmation"), "validate") } - // Rank and frame number is uint64, any value is valid + if f.LatestQuorumCertificate != nil { + if err := f.LatestQuorumCertificate.Validate(); err != nil { + return errors.Wrap(err, "validate") + } + } // Aggregate signature must be present if f.AggregateSignature == nil { diff --git a/types/consensus/prover_registry.go b/types/consensus/prover_registry.go index 8303fb4..97a51dc 100644 --- a/types/consensus/prover_registry.go +++ b/types/consensus/prover_registry.go @@ -128,4 +128,8 @@ type ProverRegistry interface { // GetProverShardSummaries returns all shard filters that currently have any // provers assigned (regardless of status) along with their counts. GetProverShardSummaries() ([]*ProverShardSummary, error) + + // PruneOrphanJoins performs pruning of vertexes in the prover trie for + // expired joins. + PruneOrphanJoins(frameNumber uint64) error } diff --git a/types/mocks/prover_registry.go b/types/mocks/prover_registry.go index 06c30b6..a4f8a94 100644 --- a/types/mocks/prover_registry.go +++ b/types/mocks/prover_registry.go @@ -12,6 +12,12 @@ type MockProverRegistry struct { var _ consensus.ProverRegistry = (*MockProverRegistry)(nil) +// PruneOrphanJoins implements consensus.ProverRegistry. +func (m *MockProverRegistry) PruneOrphanJoins(frameNumber uint64) error { + args := m.Called(frameNumber) + return args.Error(0) +} + // GetProvers implements consensus.ProverRegistry. func (m *MockProverRegistry) GetProvers(filter []byte) ( []*consensus.ProverInfo, diff --git a/types/store/worker.go b/types/store/worker.go index f91b328..739c875 100644 --- a/types/store/worker.go +++ b/types/store/worker.go @@ -8,6 +8,7 @@ type WorkerInfo struct { TotalStorage uint Automatic bool Allocated bool + PendingFilterFrame uint64 } type WorkerStore interface { diff --git a/types/tries/lazy_proof_tree.go b/types/tries/lazy_proof_tree.go index 0fba0e5..6885c39 100644 --- a/types/tries/lazy_proof_tree.go +++ b/types/tries/lazy_proof_tree.go @@ -550,7 +550,7 @@ type TreeBackingStore interface { shardAddress []byte, ) ([]byte, error) GetRootCommits(frameNumber uint64) (map[ShardKey][][]byte, error) - NewSnapshot() (TreeBackingStore, func(), error) + NewShardSnapshot(shardKey ShardKey) (TreeBackingStore, func(), error) // IterateRawLeaves returns an iterator over all leaf nodes for a given // shard and phase set. This bypasses in-memory tree caching and reads // directly from the database for raw sync operations. @@ -817,9 +817,9 @@ func (t *LazyVectorCommitmentTree) Insert( } if node == nil { newNode := &LazyVectorCommitmentLeafNode{ - Key: key, - Value: value, - HashTarget: hashTarget, + Key: slices.Clone(key), + Value: slices.Clone(value), + HashTarget: slices.Clone(hashTarget), Size: size, Store: t.Store, } @@ -860,8 +860,8 @@ func (t *LazyVectorCommitmentTree) Insert( switch n := node.(type) { case *LazyVectorCommitmentLeafNode: if bytes.Equal(n.Key, key) { - n.Value = value - n.HashTarget = hashTarget + n.Value = slices.Clone(value) + n.HashTarget = slices.Clone(hashTarget) n.Commitment = nil n.Size = size @@ -900,9 +900,9 @@ func (t *LazyVectorCommitmentTree) Insert( finalNewNibble := getNextNibble(key, divergeDepth) branch.Children[finalOldNibble] = n branch.Children[finalNewNibble] = &LazyVectorCommitmentLeafNode{ - Key: key, - Value: value, - HashTarget: hashTarget, + Key: slices.Clone(key), + Value: slices.Clone(value), + HashTarget: slices.Clone(hashTarget), Size: size, Store: t.Store, } @@ -972,9 +972,9 @@ func (t *LazyVectorCommitmentTree) Insert( newBranch.Children[expectedNibble] = n n.Prefix = n.Prefix[i+1:] // remove shared prefix from old branch newBranch.Children[actualNibble] = &LazyVectorCommitmentLeafNode{ - Key: key, - Value: value, - HashTarget: hashTarget, + Key: slices.Clone(key), + Value: slices.Clone(value), + HashTarget: slices.Clone(hashTarget), Size: size, Store: t.Store, } @@ -1201,7 +1201,7 @@ func (t *LazyVectorCommitmentTree) Verify( false, ) } else { - rootCommit = root + rootCommit = slices.Clone(root) } for _, subProof := range proof.SubProofs { @@ -2154,7 +2154,7 @@ func DeserializeTree( PhaseType: phaseType, ShardKey: shardKey, Store: store, - CoveredPrefix: coveredPrefix, // Empty by default, must be set explicitly + CoveredPrefix: slices.Clone(coveredPrefix), // Empty by default, must be set explicitly }, nil } diff --git a/types/tries/proof_tree.go b/types/tries/proof_tree.go index a883df0..1518b65 100644 --- a/types/tries/proof_tree.go +++ b/types/tries/proof_tree.go @@ -285,9 +285,9 @@ func (t *VectorCommitmentTree) Insert( ) { if node == nil { return 1, &VectorCommitmentLeafNode{ - Key: key, - Value: value, - HashTarget: hashTarget, + Key: slices.Clone(key), + Value: slices.Clone(value), + HashTarget: slices.Clone(hashTarget), Size: size, } } @@ -295,8 +295,8 @@ func (t *VectorCommitmentTree) Insert( switch n := node.(type) { case *VectorCommitmentLeafNode: if bytes.Equal(n.Key, key) { - n.Value = value - n.HashTarget = hashTarget + n.Value = slices.Clone(value) + n.HashTarget = slices.Clone(hashTarget) n.Commitment = nil n.Size = size return 0, n @@ -318,9 +318,9 @@ func (t *VectorCommitmentTree) Insert( finalNewNibble := getNextNibble(key, divergeDepth) branch.Children[finalOldNibble] = n branch.Children[finalNewNibble] = &VectorCommitmentLeafNode{ - Key: key, - Value: value, - HashTarget: hashTarget, + Key: slices.Clone(key), + Value: slices.Clone(value), + HashTarget: slices.Clone(hashTarget), Size: size, } @@ -343,9 +343,9 @@ func (t *VectorCommitmentTree) Insert( newBranch.Children[expectedNibble] = n n.Prefix = n.Prefix[i+1:] // remove shared prefix from old branch newBranch.Children[actualNibble] = &VectorCommitmentLeafNode{ - Key: key, - Value: value, - HashTarget: hashTarget, + Key: slices.Clone(key), + Value: slices.Clone(value), + HashTarget: slices.Clone(hashTarget), Size: size, } return 1, newBranch diff --git a/vdf/wesolowski_frame_prover.go b/vdf/wesolowski_frame_prover.go index e78b712..2b04bac 100644 --- a/vdf/wesolowski_frame_prover.go +++ b/vdf/wesolowski_frame_prover.go @@ -38,10 +38,9 @@ func SetBitAtIndex(mask []byte, index uint8) []byte { newMask := make([]byte, 32) copy(newMask, mask) - mask = newMask - mask[byteIndex] |= 1 << bitPos - return mask + newMask[byteIndex] |= 1 << bitPos + return newMask } // GetSetBitIndices returns a slice of indices where bits are set in the mask. @@ -110,7 +109,7 @@ func (w *WesolowskiFrameProver) ProveFrameHeaderGenesis( } header := &protobufs.FrameHeader{ - Address: address, + Address: address, // buildutils:allow-slice-alias (genesis address is constant) FrameNumber: 0, Timestamp: 0, Difficulty: difficulty, @@ -143,8 +142,6 @@ func (w *WesolowskiFrameProver) ProveFrameHeader( ) } - pubkeyType := provingKey.GetType() - previousSelectorBytes := [516]byte{} copy(previousSelectorBytes[:], previousFrame.Output[:516]) @@ -174,47 +171,27 @@ func (w *WesolowskiFrameProver) ProveFrameHeader( b := sha3.Sum256(input) o := WesolowskiSolve(b, difficulty) - domain := append([]byte("shard"), address...) - signature, err := provingKey.SignWithDomain( - append(append([]byte{}, b[:]...), o[:]...), - domain, - ) - if err != nil { - return nil, errors.Wrap( - err, - "prove frame header", - ) + stateRootsClone := make([][]byte, len(stateRoots)) + for i, root := range stateRoots { + if root != nil { + stateRootsClone[i] = slices.Clone(root) + } } + requestsRootClone := slices.Clone(requestsRoot) + addressClone := slices.Clone(address) + proverClone := slices.Clone(prover) header := &protobufs.FrameHeader{ - Address: address, + Address: addressClone, FrameNumber: previousFrame.FrameNumber + 1, Timestamp: timestamp, Difficulty: difficulty, Output: o[:], ParentSelector: parent.FillBytes(make([]byte, 32)), FeeMultiplierVote: feeMultiplierVote, - RequestsRoot: requestsRoot, - StateRoots: stateRoots, - Prover: prover, - } - - switch pubkeyType { - case qcrypto.KeyTypeBLS48581G1: - fallthrough - case qcrypto.KeyTypeBLS48581G2: - header.PublicKeySignatureBls48581 = &protobufs.BLS48581AggregateSignature{ - Bitmask: SetBitAtIndex(make([]byte, 32), proverIndex), - Signature: signature, - PublicKey: &protobufs.BLS48581G2PublicKey{ - KeyValue: provingKey.Public().([]byte), - }, - } - default: - return nil, errors.Wrap( - errors.New("unsupported proving key"), - "prove frame header", - ) + RequestsRoot: requestsRootClone, + StateRoots: stateRootsClone, + Prover: proverClone, } return header, nil @@ -474,15 +451,24 @@ func (w *WesolowskiFrameProver) ProveGlobalFrameHeader( ) } + clonedCommitments := make([][]byte, len(commitments)) + for i, commitment := range commitments { + if commitment != nil { + clonedCommitments[i] = slices.Clone(commitment) + } + } + proverRootCopy := slices.Clone(proverRoot) + requestRootCopy := slices.Clone(requestRoot) + header := &protobufs.GlobalFrameHeader{ FrameNumber: previousFrame.FrameNumber + 1, Timestamp: timestamp, Difficulty: difficulty, Output: o[:], ParentSelector: parent.FillBytes(make([]byte, 32)), - GlobalCommitments: commitments, - ProverTreeCommitment: proverRoot, - RequestsRoot: requestRoot, + GlobalCommitments: clonedCommitments, + ProverTreeCommitment: proverRootCopy, + RequestsRoot: requestRootCopy, } switch pubkeyType {