ceremonyclient/node/consensus/app/message_validation.go
Cassandra Heart ce4f77b140
v2.1.0.19 (#515)
* v2.1.0.19

* enhanced error logging, fix seniority marker join blocker, fix sync message size limit defaults

* resolve signature failure

* additional error logging for merge-related signatures

* fix: one-shot sync message size, app shard TC signature size, collector/hotstuff race condition, expired joins blocking new joins due to pruning disable

* remove compat with old 2.0.0 blossomsub

* fix: resolve abandoned prover joins

* reload prover registry

* fix stale worker proposal edge

* add full sanity check on join before submitting to identify bug

* resolve non-fallthrough condition that should be fallthrough

* fix: resolve rare SIGFPE, fix orphan expired joins blocking workers from reallocating

* add reconnect fallback if no peers are found with variable reconnect time (#511)

Co-authored-by: Tyler Sturos <55340199+tjsturos@users.noreply.github.com>

* update base peer count to 1 (#513)

* fix: expired prover join frames, starting port ranges, proposer getting stuck, and seniority on joins

* fix: panic on shutdown, libp2p discovery picking inaccessible peers, coverage event check not in shutdown logic, amend app shard worker behavior to mirror global for prover root reconciliation

* fix: shutdown scenario quirks, reload hanging

* fix: do not bailout early on shutdown of coverage check

* fix: force registry refresh on worker waiting for registration

* add more logging to wait for prover

* fix: worker manager refreshes the filter on allocation, snapshots blocking close on shutdown

* tweak: force shutdown after five seconds for app worker

* fix: don't loop when shutting down

* fix: slight reordering, also added named workers to trace hanging shutdowns

* use deterministic key for peer id of workers to stop flagging workers as sybil attacks

* fix: remove pubsub stop from app consensus engine as it shouldn't manage pubsub lifecycle, integrate shutdown context to PerformSync to prevent stuck syncs from halting respawn

* fix: blossomsub pubsub interface does not properly track subscription status

* fix: subscribe order to avoid nil panic

* switch from dnsaddr to dns4

* add missing quic-v1

* additional logging to isolate respawn quirks

* fix: dnsaddr -> dns4 for blossomsub

* sort-of fix: apply sledgehammer to restart logic

* fix: restore proper respawn logic, fix frozen hypergraph post respawn, unsubscribe from bitmask previously missing

---------

Co-authored-by: winged-pegasus <55340199+winged-pegasus@users.noreply.github.com>
Co-authored-by: Tyler Sturos <55340199+tjsturos@users.noreply.github.com>
2026-02-26 04:20:13 -06:00

659 lines
19 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package app
import (
"bytes"
"encoding/binary"
"time"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/prometheus/client_golang/prometheus"
"go.uber.org/zap"
"source.quilibrium.com/quilibrium/monorepo/go-libp2p-blossomsub/pb"
"source.quilibrium.com/quilibrium/monorepo/node/internal/frametime"
"source.quilibrium.com/quilibrium/monorepo/protobufs"
"source.quilibrium.com/quilibrium/monorepo/types/crypto"
"source.quilibrium.com/quilibrium/monorepo/types/p2p"
)
func (e *AppConsensusEngine) validateConsensusMessage(
_ peer.ID,
message *pb.Message,
) p2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)
frameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
// Read type prefix from first 4 bytes
typePrefix := binary.BigEndian.Uint32(message.Data[:4])
switch typePrefix {
case protobufs.AppShardProposalType:
timer := prometheus.NewTimer(
proposalValidationDuration.WithLabelValues(e.appAddressHex),
)
defer timer.ObserveDuration()
proposal := &protobufs.AppShardProposal{}
if err := proposal.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal proposal", zap.Error(err))
proposalValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
if err := proposal.Validate(); err != nil {
e.logger.Error("invalid proposal", zap.Error(err))
proposalValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
if !bytes.Equal(proposal.State.Header.Address, e.appAddress) {
e.logger.Debug(
"ignoring proposal",
zap.String("reason", "address mismatch"),
)
proposalValidationTotal.WithLabelValues(e.appAddressHex, "ignore").Inc()
return p2p.ValidationResultIgnore
}
if e.forks.FinalizedRank() > proposal.GetRank() {
e.logger.Debug(
"ignoring proposal",
zap.String("reason", "stale rank"),
zap.Uint64("current_rank", e.forks.FinalizedRank()),
zap.Uint64("proposal_rank", proposal.GetRank()),
)
proposalValidationTotal.WithLabelValues(e.appAddressHex, "ignore").Inc()
return p2p.ValidationResultIgnore
}
valid, err := e.frameValidator.Validate(proposal.State)
if err != nil {
e.logger.Debug("frame validation error", zap.Error(err))
proposalValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
if !valid {
e.logger.Debug(
"invalid proposal",
zap.String("reason", "frame validator returned false"),
)
proposalValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
proposalValidationTotal.WithLabelValues(e.appAddressHex, "accept").Inc()
case protobufs.ProposalVoteType:
timer := prometheus.NewTimer(
voteValidationDuration.WithLabelValues(e.appAddressHex),
)
defer timer.ObserveDuration()
vote := &protobufs.ProposalVote{}
if err := vote.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal vote", zap.Error(err))
voteValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
// We should still accept votes for the past rank either because a peer
// needs it, or because we need it to trump a TC
if e.currentRank > vote.Rank+1 {
voteValidationTotal.WithLabelValues(e.appAddressHex, "ignore").Inc()
return p2p.ValidationResultIgnore
}
if err := vote.Validate(); err != nil {
e.logger.Debug("failed to validate vote", zap.Error(err))
voteValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
voteValidationTotal.WithLabelValues(e.appAddressHex, "accept").Inc()
case protobufs.TimeoutStateType:
timer := prometheus.NewTimer(
timeoutStateValidationDuration.WithLabelValues(e.appAddressHex),
)
defer timer.ObserveDuration()
timeoutState := &protobufs.TimeoutState{}
if err := timeoutState.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal timeout state", zap.Error(err))
timeoutStateValidationTotal.WithLabelValues(
e.appAddressHex,
"reject",
).Inc()
return p2p.ValidationResultReject
}
// We should still accept votes for the past rank in case a peer needs it
if e.currentRank > timeoutState.Vote.Rank+1 {
timeoutStateValidationTotal.WithLabelValues(
e.appAddressHex,
"ignore",
).Inc()
return p2p.ValidationResultIgnore
}
if err := timeoutState.Validate(); err != nil {
e.logger.Debug("failed to validate timeout state", zap.Error(err))
timeoutStateValidationTotal.WithLabelValues(
e.appAddressHex,
"reject",
).Inc()
return p2p.ValidationResultReject
}
timeoutStateValidationTotal.WithLabelValues(e.appAddressHex, "accept").Inc()
case protobufs.ProverLivenessCheckType:
check := &protobufs.ProverLivenessCheck{}
if err := check.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal liveness check", zap.Error(err))
return p2p.ValidationResultReject
}
if err := check.Validate(); err != nil {
e.logger.Debug("invalid liveness check", zap.Error(err))
return p2p.ValidationResultReject
}
if len(check.Filter) != 0 && !bytes.Equal(check.Filter, e.appAddress) {
return p2p.ValidationResultIgnore
}
default:
e.logger.Debug(
"rejecting consensus message",
zap.String("reason", "unknown type prefix"),
zap.Uint32("type", typePrefix),
)
return p2p.ValidationResultReject
}
return p2p.ValidationResultAccept
}
func (e *AppConsensusEngine) validateProverMessage(
_ peer.ID,
message *pb.Message,
) p2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)
return p2p.ValidationResultReject
}
// Read type prefix from first 4 bytes
typePrefix := binary.BigEndian.Uint32(message.Data[:4])
switch typePrefix {
case protobufs.MessageBundleType:
// Prover messages come wrapped in MessageBundle
messageBundle := &protobufs.MessageBundle{}
if err := messageBundle.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal message bundle", zap.Error(err))
return p2p.ValidationResultReject
}
if err := messageBundle.Validate(); err != nil {
e.logger.Debug("invalid request", zap.Error(err))
return p2p.ValidationResultReject
}
now := time.Now().UnixMilli()
if messageBundle.Timestamp > now+5000 || messageBundle.Timestamp < now-5000 {
e.logger.Debug(
"ignoring prover message",
zap.String("reason", "timestamp out of window"),
zap.Int64("timestamp", messageBundle.Timestamp),
zap.Int64("now", now),
)
return p2p.ValidationResultIgnore
}
default:
e.logger.Debug("received unknown type", zap.Uint32("type", typePrefix))
return p2p.ValidationResultIgnore
}
return p2p.ValidationResultAccept
}
func (e *AppConsensusEngine) validateGlobalProverMessage(
_ peer.ID,
message *pb.Message,
) p2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)
return p2p.ValidationResultReject
}
// Read type prefix from first 4 bytes
typePrefix := binary.BigEndian.Uint32(message.Data[:4])
switch typePrefix {
case protobufs.MessageBundleType:
// Prover messages come wrapped in MessageBundle
messageBundle := &protobufs.MessageBundle{}
if err := messageBundle.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal message bundle", zap.Error(err))
return p2p.ValidationResultReject
}
if err := messageBundle.Validate(); err != nil {
e.logger.Debug("invalid request", zap.Error(err))
return p2p.ValidationResultReject
}
now := time.Now().UnixMilli()
if messageBundle.Timestamp > now+5000 || messageBundle.Timestamp < now-5000 {
e.logger.Debug(
"ignoring global prover message",
zap.String("reason", "timestamp out of window"),
zap.Int64("timestamp", messageBundle.Timestamp),
zap.Int64("now", now),
)
return p2p.ValidationResultIgnore
}
default:
e.logger.Debug("received unknown type", zap.Uint32("type", typePrefix))
return p2p.ValidationResultIgnore
}
return p2p.ValidationResultAccept
}
func (e *AppConsensusEngine) validateFrameMessage(
_ peer.ID,
message *pb.Message,
) p2p.ValidationResult {
timer := prometheus.NewTimer(
frameValidationDuration.WithLabelValues(e.appAddressHex),
)
defer timer.ObserveDuration()
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)
frameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
// Read type prefix from first 4 bytes
typePrefix := binary.BigEndian.Uint32(message.Data[:4])
switch typePrefix {
case protobufs.AppShardFrameType:
frame := &protobufs.AppShardFrame{}
if err := frame.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal frame", zap.Error(err))
frameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
if !bytes.Equal(frame.Header.Address, e.appAddress) {
e.logger.Debug("frame address incorrect")
frameValidationTotal.WithLabelValues(e.appAddressHex, "ignore").Inc()
// We ignore this rather than reject because it might be correctly routing
// but something we should ignore
return p2p.ValidationResultIgnore
}
if frame.Header.PublicKeySignatureBls48581 == nil ||
frame.Header.PublicKeySignatureBls48581.PublicKey == nil ||
frame.Header.PublicKeySignatureBls48581.PublicKey.KeyValue == nil {
e.logger.Debug("frame validation missing signature")
frameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
valid, err := e.frameValidator.Validate(frame)
if err != nil {
e.logger.Debug("frame validation error", zap.Error(err))
frameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
if !valid {
e.logger.Debug(
"invalid app frame",
zap.String("reason", "frame validator returned false"),
)
frameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
if frametime.AppFrameSince(frame) > 20*time.Second {
e.logger.Debug(
"ignoring app frame",
zap.String("reason", "frame too old"),
)
return p2p.ValidationResultIgnore
}
frameValidationTotal.WithLabelValues(e.appAddressHex, "accept").Inc()
default:
e.logger.Debug(
"rejecting frame message",
zap.String("reason", "unknown type prefix"),
zap.Uint32("type", typePrefix),
)
return p2p.ValidationResultReject
}
return p2p.ValidationResultAccept
}
func (e *AppConsensusEngine) validateGlobalFrameMessage(
_ peer.ID,
message *pb.Message,
) p2p.ValidationResult {
timer := prometheus.NewTimer(globalFrameValidationDuration)
defer timer.ObserveDuration()
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Debug("message too short", zap.Int("data_length", len(message.Data)))
globalFrameValidationTotal.WithLabelValues("reject").Inc()
return p2p.ValidationResultReject
}
// Read type prefix from first 4 bytes
typePrefix := binary.BigEndian.Uint32(message.Data[:4])
switch typePrefix {
case protobufs.GlobalFrameType:
frame := &protobufs.GlobalFrame{}
if err := frame.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal frame", zap.Error(err))
globalFrameValidationTotal.WithLabelValues("reject").Inc()
return p2p.ValidationResultReject
}
if frame.Header.PublicKeySignatureBls48581 == nil ||
frame.Header.PublicKeySignatureBls48581.PublicKey == nil ||
frame.Header.PublicKeySignatureBls48581.PublicKey.KeyValue == nil {
e.logger.Debug("frame validation missing signature")
globalFrameValidationTotal.WithLabelValues("reject").Inc()
return p2p.ValidationResultReject
}
valid, err := e.globalFrameValidator.Validate(frame)
if err != nil {
e.logger.Debug("frame validation error", zap.Error(err))
globalFrameValidationTotal.WithLabelValues("reject").Inc()
return p2p.ValidationResultReject
}
if !valid {
e.logger.Debug(
"invalid global frame",
zap.String("reason", "frame validator returned false"),
)
globalFrameValidationTotal.WithLabelValues("reject").Inc()
return p2p.ValidationResultReject
}
if frametime.GlobalFrameSince(frame) > 20*time.Second {
e.logger.Debug(
"ignoring global frame",
zap.String("reason", "frame too old"),
)
return p2p.ValidationResultIgnore
}
globalFrameValidationTotal.WithLabelValues("accept").Inc()
default:
e.logger.Debug(
"rejecting global frame message",
zap.String("reason", "unknown type prefix"),
zap.Uint32("type", typePrefix),
)
return p2p.ValidationResultReject
}
return p2p.ValidationResultAccept
}
func (e *AppConsensusEngine) validateAlertMessage(
_ peer.ID,
message *pb.Message,
) p2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)
return p2p.ValidationResultReject
}
// Read type prefix from first 4 bytes
typePrefix := binary.BigEndian.Uint32(message.Data[:4])
switch typePrefix {
case protobufs.GlobalAlertType:
alert := &protobufs.GlobalAlert{}
if err := alert.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal alert", zap.Error(err))
return p2p.ValidationResultReject
}
err := alert.Validate()
if err != nil {
e.logger.Debug("alert validation error", zap.Error(err))
return p2p.ValidationResultReject
}
valid, err := e.keyManager.ValidateSignature(
crypto.KeyTypeEd448,
e.alertPublicKey,
[]byte(alert.Message),
alert.Signature,
[]byte("GLOBAL_ALERT"),
)
if !valid || err != nil {
e.logger.Debug("alert signature invalid")
return p2p.ValidationResultReject
}
default:
e.logger.Debug("received unknown type", zap.Uint32("type", typePrefix))
return p2p.ValidationResultIgnore
}
return p2p.ValidationResultAccept
}
func (e *AppConsensusEngine) validatePeerInfoMessage(
_ peer.ID,
message *pb.Message,
) p2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)
return p2p.ValidationResultReject
}
// Read type prefix from first 4 bytes
typePrefix := binary.BigEndian.Uint32(message.Data[:4])
switch typePrefix {
case protobufs.PeerInfoType:
peerInfo := &protobufs.PeerInfo{}
if err := peerInfo.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal peer info", zap.Error(err))
return p2p.ValidationResultReject
}
err := peerInfo.Validate()
if err != nil {
e.logger.Debug("peer info validation error", zap.Error(err))
return p2p.ValidationResultReject
}
// Validate timestamp: reject if older than 1 minute or newer than 5 minutes
// from now
now := time.Now().UnixMilli()
oneMinuteAgo := now - (1 * 60 * 1000) // 1 minute ago
fiveMinutesLater := now + (5 * 60 * 1000) // 5 minutes from now
if peerInfo.Timestamp < oneMinuteAgo {
e.logger.Debug("peer info timestamp too old",
zap.Int64("peer_timestamp", peerInfo.Timestamp),
zap.Int64("cutoff", oneMinuteAgo),
)
return p2p.ValidationResultReject
}
if peerInfo.Timestamp < now-1000 {
e.logger.Debug("peer info timestamp too old, ignoring",
zap.Int64("peer_timestamp", peerInfo.Timestamp),
)
return p2p.ValidationResultIgnore
}
if peerInfo.Timestamp > fiveMinutesLater {
e.logger.Debug("peer info timestamp too far in future",
zap.Int64("peer_timestamp", peerInfo.Timestamp),
zap.Int64("cutoff", fiveMinutesLater),
)
return p2p.ValidationResultIgnore
}
case protobufs.KeyRegistryType:
keyRegistry := &protobufs.KeyRegistry{}
if err := keyRegistry.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal key registry", zap.Error(err))
return p2p.ValidationResultReject
}
if err := keyRegistry.Validate(); err != nil {
e.logger.Debug("key registry validation error", zap.Error(err))
return p2p.ValidationResultReject
}
now := time.Now().UnixMilli()
if int64(keyRegistry.LastUpdated) < now-60000 {
e.logger.Debug("key registry timestamp too old, rejecting")
return p2p.ValidationResultReject
}
if int64(keyRegistry.LastUpdated) < now-1000 {
e.logger.Debug("key registry timestamp too old")
return p2p.ValidationResultIgnore
}
if int64(keyRegistry.LastUpdated) > now+5000 {
e.logger.Debug("key registry timestamp too far in future")
return p2p.ValidationResultIgnore
}
default:
e.logger.Debug("received unknown type", zap.Uint32("type", typePrefix))
return p2p.ValidationResultIgnore
}
return p2p.ValidationResultAccept
}
func (e *AppConsensusEngine) validateDispatchMessage(
_ peer.ID,
message *pb.Message,
) p2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)
return p2p.ValidationResultReject
}
// Read type prefix from first 4 bytes
typePrefix := binary.BigEndian.Uint32(message.Data[:4])
switch typePrefix {
case protobufs.InboxMessageType:
envelope := &protobufs.InboxMessage{}
if err := envelope.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal envelope", zap.Error(err))
return p2p.ValidationResultReject
}
err := envelope.Validate()
if err != nil {
e.logger.Debug("envelope validation error", zap.Error(err))
return p2p.ValidationResultReject
}
if envelope.Timestamp < uint64(time.Now().UnixMilli())-2000 ||
envelope.Timestamp > uint64(time.Now().UnixMilli())+5000 {
e.logger.Debug(
"ignoring dispatch message",
zap.String("reason", "timestamp out of window"),
zap.Uint64("timestamp", envelope.Timestamp),
)
return p2p.ValidationResultIgnore
}
case protobufs.HubAddInboxType:
envelope := &protobufs.HubAddInboxMessage{}
if err := envelope.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal envelope", zap.Error(err))
return p2p.ValidationResultReject
}
err := envelope.Validate()
if err != nil {
e.logger.Debug("envelope validation error", zap.Error(err))
return p2p.ValidationResultReject
}
case protobufs.HubDeleteInboxType:
envelope := &protobufs.HubDeleteInboxMessage{}
if err := envelope.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal envelope", zap.Error(err))
return p2p.ValidationResultReject
}
err := envelope.Validate()
if err != nil {
e.logger.Debug("envelope validation error", zap.Error(err))
return p2p.ValidationResultReject
}
default:
e.logger.Debug("received unknown type", zap.Uint32("type", typePrefix))
return p2p.ValidationResultIgnore
}
return p2p.ValidationResultAccept
}