ceremonyclient/node/consensus/global/coverage_events.go
Cassandra Heart d1b833e8b1
v2.1.0.2
2025-10-05 19:24:02 -05:00

502 lines
14 KiB
Go

package global
import (
"bytes"
"encoding/hex"
"fmt"
"math/big"
"go.uber.org/zap"
typesconsensus "source.quilibrium.com/quilibrium/monorepo/types/consensus"
)
// checkShardCoverage verifies coverage levels for all active shards
func (e *GlobalConsensusEngine) checkShardCoverage() error {
// Define coverage thresholds
const (
minProvers = 5 // Minimum provers for safe operation
maxProvers = 32 // Maximum provers before split consideration
haltThreshold = 3 // Network halt if <= 3 provers
)
// Get shard coverage information from prover registry
shardCoverageMap := e.getShardCoverageMap()
// Update state summaries metric
stateSummariesAggregated.Set(float64(len(shardCoverageMap)))
for shardAddress, coverage := range shardCoverageMap {
addressLen := len(shardAddress)
// Validate address length (must be 32-64 bytes)
if addressLen < 32 || addressLen > 64 {
e.logger.Error(
"invalid shard address length",
zap.Int("length", addressLen),
zap.String("shard_address", hex.EncodeToString([]byte(shardAddress))),
)
continue
}
proverCount := coverage.ProverCount
attestedStorage := coverage.AttestedStorage
size := big.NewInt(0)
for _, metadata := range coverage.TreeMetadata {
size = size.Add(size, new(big.Int).SetUint64(metadata.TotalSize))
}
e.logger.Debug(
"checking shard coverage",
zap.String("shard_address", hex.EncodeToString([]byte(shardAddress))),
zap.Int("prover_count", proverCount),
zap.Uint64("attested_storage", attestedStorage),
zap.Uint64("shard_size", size.Uint64()),
)
// Check for critical coverage (halt condition)
if proverCount <= haltThreshold && size.Cmp(big.NewInt(0)) > 0 {
// Check if this address is blacklisted
if e.isAddressBlacklisted([]byte(shardAddress)) {
e.logger.Warn(
"Shard has insufficient coverage but is blacklisted - skipping halt",
zap.String("shard_address", hex.EncodeToString([]byte(shardAddress))),
zap.Int("prover_count", proverCount),
zap.Int("halt_threshold", haltThreshold),
)
continue
}
e.logger.Error(
"CRITICAL: Shard has insufficient coverage - triggering network halt",
zap.String("shard_address", hex.EncodeToString([]byte(shardAddress))),
zap.Int("prover_count", proverCount),
zap.Int("halt_threshold", haltThreshold),
)
// Emit halt event
e.emitCoverageEvent(
typesconsensus.ControlEventCoverageHalt,
&typesconsensus.CoverageEventData{
ShardAddress: []byte(shardAddress),
ProverCount: proverCount,
RequiredProvers: minProvers,
AttestedStorage: attestedStorage,
TreeMetadata: coverage.TreeMetadata,
Message: fmt.Sprintf(
"Shard has only %d provers, network halt required",
proverCount,
),
},
)
continue
}
// Check for low coverage
if proverCount < minProvers {
e.handleLowCoverage([]byte(shardAddress), coverage, minProvers)
}
// Check for high coverage (potential split)
if proverCount > maxProvers {
e.handleHighCoverage([]byte(shardAddress), coverage, maxProvers)
}
}
return nil
}
// ShardCoverage represents coverage information for a shard
type ShardCoverage struct {
ProverCount int
AttestedStorage uint64
TreeMetadata []typesconsensus.TreeMetadata
}
// handleLowCoverage handles shards with insufficient provers
func (e *GlobalConsensusEngine) handleLowCoverage(
shardAddress []byte,
coverage *ShardCoverage,
minProvers int,
) {
addressLen := len(shardAddress)
// Case 2.a: Full application address (32 bytes)
if addressLen == 32 {
e.logger.Warn(
"shard has low coverage",
zap.String("shard_address", hex.EncodeToString(shardAddress)),
zap.Int("prover_count", coverage.ProverCount),
zap.Int("min_provers", minProvers),
)
// Emit coverage warning event
e.emitCoverageEvent(
typesconsensus.ControlEventCoverageWarn,
&typesconsensus.CoverageEventData{
ShardAddress: shardAddress,
ProverCount: coverage.ProverCount,
RequiredProvers: minProvers,
AttestedStorage: coverage.AttestedStorage,
TreeMetadata: coverage.TreeMetadata,
Message: "Application shard has low prover coverage",
},
)
return
}
// Case 2.b: Longer than application address (> 32 bytes)
// Check if merge is possible with sibling shards
appPrefix := shardAddress[:32] // Application prefix
siblingShards := e.findSiblingShards(appPrefix, shardAddress)
if len(siblingShards) > 0 {
// Calculate total storage across siblings
totalStorage := coverage.AttestedStorage
totalProvers := coverage.ProverCount
allShards := append([][]byte{shardAddress}, siblingShards...)
for _, sibling := range siblingShards {
if sibCoverage, exists := e.getShardCoverage(sibling); exists {
totalStorage += sibCoverage.AttestedStorage
totalProvers += sibCoverage.ProverCount
}
}
// Check if siblings have sufficient storage to handle merge
requiredStorage := e.calculateRequiredStorage(allShards)
if totalStorage >= requiredStorage {
// Case 2.b.i: Merge is possible
e.logger.Info(
"shards eligible for merge",
zap.String("shard_address", hex.EncodeToString(shardAddress)),
zap.Int("sibling_count", len(siblingShards)),
zap.Uint64("total_storage", totalStorage),
zap.Uint64("required_storage", requiredStorage),
)
// Emit merge eligible event
e.emitMergeEvent(
&typesconsensus.ShardMergeEventData{
ShardAddresses: allShards,
TotalProvers: totalProvers,
AttestedStorage: totalStorage,
RequiredStorage: requiredStorage,
},
)
} else {
// Case 2.b.ii: Insufficient storage for merge
e.logger.Warn(
"shard has low coverage, merge not possible due to insufficient storage",
zap.String("shard_address", hex.EncodeToString(shardAddress)),
zap.Int("prover_count", coverage.ProverCount),
zap.Uint64("total_storage", totalStorage),
zap.Uint64("required_storage", requiredStorage),
)
// Emit coverage warning event
e.emitCoverageEvent(
typesconsensus.ControlEventCoverageWarn,
&typesconsensus.CoverageEventData{
ShardAddress: shardAddress,
ProverCount: coverage.ProverCount,
RequiredProvers: minProvers,
AttestedStorage: coverage.AttestedStorage,
TreeMetadata: coverage.TreeMetadata,
Message: "shard has low coverage and cannot be merged due to insufficient storage",
},
)
}
} else {
// No siblings found, emit warning
e.emitCoverageEvent(
typesconsensus.ControlEventCoverageWarn,
&typesconsensus.CoverageEventData{
ShardAddress: shardAddress,
ProverCount: coverage.ProverCount,
RequiredProvers: minProvers,
AttestedStorage: coverage.AttestedStorage,
TreeMetadata: coverage.TreeMetadata,
Message: "Shard has low coverage and no siblings for merge",
},
)
}
}
// handleHighCoverage handles shards with too many provers
func (e *GlobalConsensusEngine) handleHighCoverage(
shardAddress []byte,
coverage *ShardCoverage,
maxProvers int,
) {
addressLen := len(shardAddress)
// Case 3.a: Not a full app+data address (< 64 bytes)
if addressLen < 64 {
// Check if there's space to split
availableAddressSpace := e.calculateAvailableAddressSpace(shardAddress)
if availableAddressSpace > 0 {
// Case 3.a.i: Split is possible
proposedShards := e.proposeShardSplit(shardAddress, coverage.ProverCount)
e.logger.Info(
"shard eligible for split",
zap.String("shard_address", hex.EncodeToString(shardAddress)),
zap.Int("prover_count", coverage.ProverCount),
zap.Int("proposed_shard_count", len(proposedShards)),
)
// Emit split eligible event
e.emitSplitEvent(&typesconsensus.ShardSplitEventData{
ShardAddress: shardAddress,
ProverCount: coverage.ProverCount,
AttestedStorage: coverage.AttestedStorage,
ProposedShards: proposedShards,
})
} else {
// Case 3.a.ii: No space to split, do nothing
e.logger.Debug(
"Shard has high prover count but cannot be split (no address space)",
zap.String("shard_address", hex.EncodeToString(shardAddress)),
zap.Int("prover_count", coverage.ProverCount),
)
}
} else {
// Already at maximum address length (64 bytes), cannot split further
e.logger.Debug(
"Shard has high prover count but cannot be split (max address length)",
zap.String("shard_address", hex.EncodeToString(shardAddress)),
zap.Int("prover_count", coverage.ProverCount),
)
}
}
func (e *GlobalConsensusEngine) getShardCoverageMap() map[string]*ShardCoverage {
// Get all active app shard provers from the registry
coverageMap := make(map[string]*ShardCoverage)
// Get all app shard provers (provers with filters)
allProvers, err := e.proverRegistry.GetAllActiveAppShardProvers()
if err != nil {
e.logger.Error("failed to get active app shard provers", zap.Error(err))
return coverageMap
}
// Build a map of shards and their provers
shardProvers := make(map[string][]*typesconsensus.ProverInfo)
for _, prover := range allProvers {
// Check which shards this prover is assigned to
for _, allocation := range prover.Allocations {
shardKey := string(allocation.ConfirmationFilter)
shardProvers[shardKey] = append(shardProvers[shardKey], prover)
}
}
// For each shard, build coverage information
for shardAddress, provers := range shardProvers {
proverCount := len(provers)
// Calculate attested storage from prover data
attestedStorage := uint64(0)
for _, prover := range provers {
attestedStorage += prover.AvailableStorage
}
// Get tree metadata from hypergraph
var treeMetadata []typesconsensus.TreeMetadata
metadata, err := e.hypergraph.GetMetadataAtKey([]byte(shardAddress))
if err != nil {
e.logger.Error("could not obtain metadata for path", zap.Error(err))
return nil
}
for _, metadata := range metadata {
treeMetadata = append(treeMetadata, typesconsensus.TreeMetadata{
CommitmentRoot: metadata.Commitment,
TotalSize: metadata.Size,
TotalLeaves: metadata.LeafCount,
})
}
coverageMap[shardAddress] = &ShardCoverage{
ProverCount: proverCount,
AttestedStorage: attestedStorage,
TreeMetadata: treeMetadata,
}
}
return coverageMap
}
func (e *GlobalConsensusEngine) getShardCoverage(shardAddress []byte) (
*ShardCoverage,
bool,
) {
// Query prover registry for specific shard coverage
proverCount, err := e.proverRegistry.GetProverCount(shardAddress)
if err != nil {
e.logger.Debug(
"failed to get prover count for shard",
zap.String("shard_address", hex.EncodeToString(shardAddress)),
zap.Error(err),
)
return nil, false
}
// If no provers, shard doesn't exist
if proverCount == 0 {
return nil, false
}
// Get active provers for this shard to calculate storage
activeProvers, err := e.proverRegistry.GetActiveProvers(shardAddress)
if err != nil {
e.logger.Warn(
"failed to get active provers for shard",
zap.String("shard_address", hex.EncodeToString(shardAddress)),
zap.Error(err),
)
return nil, false
}
// Calculate attested storage from prover data
attestedStorage := uint64(0)
for _, prover := range activeProvers {
attestedStorage += prover.AvailableStorage
}
// Get tree metadata from hypergraph
var treeMetadata []typesconsensus.TreeMetadata
metadata, err := e.hypergraph.GetMetadataAtKey(shardAddress)
if err != nil {
e.logger.Error("could not obtain metadata for path", zap.Error(err))
return nil, false
}
for _, metadata := range metadata {
treeMetadata = append(treeMetadata, typesconsensus.TreeMetadata{
CommitmentRoot: metadata.Commitment,
TotalSize: metadata.Size,
TotalLeaves: metadata.LeafCount,
})
}
coverage := &ShardCoverage{
ProverCount: proverCount,
AttestedStorage: attestedStorage,
TreeMetadata: treeMetadata,
}
return coverage, true
}
func (e *GlobalConsensusEngine) findSiblingShards(
appPrefix, shardAddress []byte,
) [][]byte {
// Find shards with same app prefix but different suffixes
var siblings [][]byte
// Get all active shards from coverage map
coverageMap := e.getShardCoverageMap()
for shardKey := range coverageMap {
shardBytes := []byte(shardKey)
// Skip self
if bytes.Equal(shardBytes, shardAddress) {
continue
}
// Check if it has the same app prefix (first 32 bytes)
if len(shardBytes) >= 32 && bytes.Equal(shardBytes[:32], appPrefix) {
siblings = append(siblings, shardBytes)
}
}
e.logger.Debug(
"found sibling shards",
zap.String("app_prefix", hex.EncodeToString(appPrefix)),
zap.Int("sibling_count", len(siblings)),
)
return siblings
}
func (e *GlobalConsensusEngine) calculateRequiredStorage(
shards [][]byte,
) uint64 {
// Calculate total storage needed for these shards
totalStorage := uint64(0)
for _, shard := range shards {
coverage, exists := e.getShardCoverage(shard)
if exists && len(coverage.TreeMetadata) > 0 {
totalStorage += coverage.TreeMetadata[0].TotalSize
}
}
return totalStorage
}
func (e *GlobalConsensusEngine) calculateAvailableAddressSpace(
shardAddress []byte,
) int {
// Calculate how many more bytes can be added to address for splitting
if len(shardAddress) >= 64 {
return 0
}
return 64 - len(shardAddress)
}
func (e *GlobalConsensusEngine) proposeShardSplit(
shardAddress []byte,
proverCount int,
) [][]byte {
// Propose how to split the shard address space
availableSpace := e.calculateAvailableAddressSpace(shardAddress)
if availableSpace == 0 {
return nil
}
// Determine split factor based on prover count
// For every 16 provers over 32, we can do another split
splitFactor := 2
if proverCount > 48 {
splitFactor = 4
}
if proverCount > 64 && availableSpace >= 2 {
splitFactor = 8
}
// Create proposed shards
proposedShards := make([][]byte, 0, splitFactor)
if splitFactor == 2 {
// Binary split
shard1 := append(append([]byte{}, shardAddress...), 0x00)
shard2 := append(append([]byte{}, shardAddress...), 0x80)
proposedShards = append(proposedShards, shard1, shard2)
} else if splitFactor == 4 {
// Quaternary split
for i := 0; i < 4; i++ {
shard := append(append([]byte{}, shardAddress...), byte(i*64))
proposedShards = append(proposedShards, shard)
}
} else if splitFactor == 8 && availableSpace >= 2 {
// Octal split with 2-byte suffix
for i := 0; i < 8; i++ {
shard := append(append([]byte{}, shardAddress...), byte(i*32), 0x00)
proposedShards = append(proposedShards, shard)
}
}
e.logger.Debug(
"proposed shard split",
zap.String("original_shard", hex.EncodeToString(shardAddress)),
zap.Int("split_factor", splitFactor),
zap.Int("proposed_count", len(proposedShards)),
)
return proposedShards
}