ceremonyclient/node/consensus/app/message_validation.go
Cassandra Heart 53f7c2b5c9
v2.1.0.2 (#442)
* v2.1.0.2

* restore tweaks to simlibp2p

* fix: nil ref on size calc

* fix: panic should induce shutdown from event_distributor

* fix: friendlier initialization that requires less manual kickstarting for test/devnets

* fix: fewer available shards than provers should choose shard length

* fix: update stored worker registry, improve logging for debug mode

* fix: shut the fuck up, peer log

* qol: log value should be snake cased

* fix:non-archive snap sync issues

* fix: separate X448/Decaf448 signed keys, add onion key to registry

* fix: overflow arithmetic on frame number comparison

* fix: worker registration should be idempotent if inputs are same, otherwise permit updated records

* fix: remove global prover state from size calculation

* fix: divide by zero case

* fix: eager prover

* fix: broadcast listener default

* qol: diagnostic data for peer authenticator

* fix: master/worker connectivity issue in sparse networks

tight coupling of peer and workers can sometimes interfere if mesh is sparse, so give workers a pseudoidentity but publish messages with the proper peer key

* fix: reorder steps of join creation

* fix: join verify frame source + ensure domain is properly padded (unnecessary but good for consistency)

* fix: add delegate to protobuf <-> reified join conversion

* fix: preempt prover from planning with no workers

* fix: use the unallocated workers to generate a proof

* qol: underflow causes join fail in first ten frames on test/devnets

* qol: small logging tweaks for easier log correlation in debug mode

* qol: use fisher-yates shuffle to ensure prover allocations are evenly distributed when scores are equal

* qol: separate decisional logic on post-enrollment confirmation into consensus engine, proposer, and worker manager where relevant, refactor out scoring

* reuse shard descriptors for both join planning and confirm/reject decisions

* fix: add missing interface method and amend test blossomsub to use new peer id basis

* fix: only check allocations if they exist

* fix: pomw mint proof data needs to be hierarchically under global intrinsic domain

* staging temporary state under diagnostics

* fix: first phase of distributed lock refactoring

* fix: compute intrinsic locking

* fix: hypergraph intrinsic locking

* fix: token intrinsic locking

* fix: update execution engines to support new locking model

* fix: adjust tests with new execution shape

* fix: weave in lock/unlock semantics to liveness provider

* fix lock fallthrough, add missing allocation update

* qol: additional logging for diagnostics, also testnet/devnet handling for confirmations

* fix: establish grace period on halt scenario to permit recovery

* fix: support test/devnet defaults for coverage scenarios

* fix: nil ref on consensus halts for non-archive nodes

* fix: remove unnecessary prefix from prover ref

* add test coverage for fork choice behaviors and replay – once passing, blocker (2) is resolved

* fix: no fork replay on repeat for non-archive nodes, snap now behaves correctly

* rollup of pre-liveness check lock interactions

* ahead of tests, get the protobuf/metrics-related changes out so teams can prepare

* add test coverage for distributed lock behaviors – once passing, blocker (3) is resolved

* fix: blocker (3)

* Dev docs improvements (#445)

* Make install deps script more robust

* Improve testing instructions

* Worker node should stop upon OS SIGINT/SIGTERM signal (#447)

* move pebble close to Stop()

* move deferred Stop() to Start()

* add core id to worker stop log message

* create done os signal channel and stop worker upon message to it

---------

Co-authored-by: Cassandra Heart <7929478+CassOnMars@users.noreply.github.com>

---------

Co-authored-by: Daz <daz_the_corgi@proton.me>
Co-authored-by: Black Swan <3999712+blacks1ne@users.noreply.github.com>
2025-10-23 01:03:06 -05:00

594 lines
17 KiB
Go

package app
import (
"bytes"
"encoding/binary"
"time"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/prometheus/client_golang/prometheus"
"go.uber.org/zap"
"source.quilibrium.com/quilibrium/monorepo/go-libp2p-blossomsub/pb"
"source.quilibrium.com/quilibrium/monorepo/node/internal/frametime"
"source.quilibrium.com/quilibrium/monorepo/protobufs"
"source.quilibrium.com/quilibrium/monorepo/types/crypto"
"source.quilibrium.com/quilibrium/monorepo/types/p2p"
)
func (e *AppConsensusEngine) validateConsensusMessage(
_ peer.ID,
message *pb.Message,
) p2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)
frameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
// Read type prefix from first 4 bytes
typePrefix := binary.BigEndian.Uint32(message.Data[:4])
switch typePrefix {
case protobufs.AppShardFrameType:
timer := prometheus.NewTimer(
proposalValidationDuration.WithLabelValues(e.appAddressHex),
)
defer timer.ObserveDuration()
frame := &protobufs.AppShardFrame{}
if err := frame.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal frame", zap.Error(err))
proposalValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
if frame.Header == nil {
e.logger.Debug("frame has no header")
proposalValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
if !bytes.Equal(frame.Header.Address, e.appAddress) {
proposalValidationTotal.WithLabelValues(e.appAddressHex, "ignore").Inc()
return p2p.ValidationResultIgnore
}
if frametime.AppFrameSince(frame) > 20*time.Second {
proposalValidationTotal.WithLabelValues(e.appAddressHex, "ignore").Inc()
return p2p.ValidationResultIgnore
}
if frame.Header.PublicKeySignatureBls48581 != nil {
e.logger.Debug("frame validation has signature")
proposalValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
valid, err := e.frameValidator.Validate(frame)
if err != nil {
e.logger.Debug("frame validation error", zap.Error(err))
proposalValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
if !valid {
e.logger.Debug("invalid frame")
proposalValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
proposalValidationTotal.WithLabelValues(e.appAddressHex, "accept").Inc()
case protobufs.ProverLivenessCheckType:
timer := prometheus.NewTimer(
livenessCheckValidationDuration.WithLabelValues(e.appAddressHex),
)
defer timer.ObserveDuration()
livenessCheck := &protobufs.ProverLivenessCheck{}
if err := livenessCheck.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal liveness check", zap.Error(err))
livenessCheckValidationTotal.WithLabelValues(
e.appAddressHex,
"reject",
).Inc()
return p2p.ValidationResultReject
}
now := time.Now().UnixMilli()
if livenessCheck.Timestamp > now+500 ||
livenessCheck.Timestamp < now-1000 {
livenessCheckValidationTotal.WithLabelValues(
e.appAddressHex,
"ignore",
).Inc()
return p2p.ValidationResultIgnore
}
if err := livenessCheck.Validate(); err != nil {
e.logger.Debug("failed to validate liveness check", zap.Error(err))
livenessCheckValidationTotal.WithLabelValues(
e.appAddressHex,
"reject",
).Inc()
return p2p.ValidationResultReject
}
livenessCheckValidationTotal.WithLabelValues(
e.appAddressHex,
"accept",
).Inc()
case protobufs.FrameVoteType:
timer := prometheus.NewTimer(
voteValidationDuration.WithLabelValues(e.appAddressHex),
)
defer timer.ObserveDuration()
vote := &protobufs.FrameVote{}
if err := vote.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal vote", zap.Error(err))
voteValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
now := time.Now().UnixMilli()
if vote.Timestamp > now+5000 || vote.Timestamp < now-5000 {
voteValidationTotal.WithLabelValues(e.appAddressHex, "ignore").Inc()
return p2p.ValidationResultIgnore
}
if err := vote.Validate(); err != nil {
e.logger.Debug("failed to validate vote", zap.Error(err))
voteValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
voteValidationTotal.WithLabelValues(e.appAddressHex, "accept").Inc()
case protobufs.FrameConfirmationType:
timer := prometheus.NewTimer(
confirmationValidationDuration.WithLabelValues(e.appAddressHex),
)
defer timer.ObserveDuration()
confirmation := &protobufs.FrameConfirmation{}
if err := confirmation.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal confirmation", zap.Error(err))
confirmationValidationTotal.WithLabelValues(
e.appAddressHex,
"reject",
).Inc()
return p2p.ValidationResultReject
}
now := time.Now().UnixMilli()
if confirmation.Timestamp > now+5000 || confirmation.Timestamp < now-5000 {
confirmationValidationTotal.WithLabelValues(
e.appAddressHex,
"ignore",
).Inc()
return p2p.ValidationResultIgnore
}
if err := confirmation.Validate(); err != nil {
e.logger.Debug("failed to validate confirmation", zap.Error(err))
confirmationValidationTotal.WithLabelValues(
e.appAddressHex,
"reject",
).Inc()
return p2p.ValidationResultReject
}
confirmationValidationTotal.WithLabelValues(e.appAddressHex, "accept").Inc()
default:
return p2p.ValidationResultReject
}
return p2p.ValidationResultAccept
}
func (e *AppConsensusEngine) validateProverMessage(
_ peer.ID,
message *pb.Message,
) p2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Error(
"message too short",
zap.Int("data_length", len(message.Data)),
)
return p2p.ValidationResultReject
}
// Read type prefix from first 4 bytes
typePrefix := binary.BigEndian.Uint32(message.Data[:4])
switch typePrefix {
case protobufs.MessageBundleType:
// Prover messages come wrapped in MessageBundle
messageBundle := &protobufs.MessageBundle{}
if err := messageBundle.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal message bundle", zap.Error(err))
return p2p.ValidationResultReject
}
if err := messageBundle.Validate(); err != nil {
e.logger.Debug("invalid request", zap.Error(err))
return p2p.ValidationResultReject
}
now := time.Now().UnixMilli()
if messageBundle.Timestamp > now+5000 || messageBundle.Timestamp < now-5000 {
return p2p.ValidationResultIgnore
}
default:
e.logger.Debug("received unknown type", zap.Uint32("type", typePrefix))
return p2p.ValidationResultIgnore
}
return p2p.ValidationResultAccept
}
func (e *AppConsensusEngine) validateGlobalProverMessage(
_ peer.ID,
message *pb.Message,
) p2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Error(
"message too short",
zap.Int("data_length", len(message.Data)),
)
return p2p.ValidationResultReject
}
// Read type prefix from first 4 bytes
typePrefix := binary.BigEndian.Uint32(message.Data[:4])
switch typePrefix {
case protobufs.MessageBundleType:
// Prover messages come wrapped in MessageBundle
messageBundle := &protobufs.MessageBundle{}
if err := messageBundle.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal message bundle", zap.Error(err))
return p2p.ValidationResultReject
}
if err := messageBundle.Validate(); err != nil {
e.logger.Debug("invalid request", zap.Error(err))
return p2p.ValidationResultReject
}
now := time.Now().UnixMilli()
if messageBundle.Timestamp > now+5000 || messageBundle.Timestamp < now-5000 {
return p2p.ValidationResultIgnore
}
default:
e.logger.Debug("received unknown type", zap.Uint32("type", typePrefix))
return p2p.ValidationResultIgnore
}
return p2p.ValidationResultAccept
}
func (e *AppConsensusEngine) validateFrameMessage(
_ peer.ID,
message *pb.Message,
) p2p.ValidationResult {
timer := prometheus.NewTimer(
frameValidationDuration.WithLabelValues(e.appAddressHex),
)
defer timer.ObserveDuration()
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)
frameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
// Read type prefix from first 4 bytes
typePrefix := binary.BigEndian.Uint32(message.Data[:4])
switch typePrefix {
case protobufs.AppShardFrameType:
frame := &protobufs.AppShardFrame{}
if err := frame.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal frame", zap.Error(err))
frameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
if !bytes.Equal(frame.Header.Address, e.appAddress) {
e.logger.Debug("frame address incorrect")
frameValidationTotal.WithLabelValues(e.appAddressHex, "ignore").Inc()
// We ignore this rather than reject because it might be correctly routing
// but something we should ignore
return p2p.ValidationResultIgnore
}
if frame.Header.PublicKeySignatureBls48581 == nil ||
frame.Header.PublicKeySignatureBls48581.PublicKey == nil ||
frame.Header.PublicKeySignatureBls48581.PublicKey.KeyValue == nil {
e.logger.Debug("frame validation missing signature")
frameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
valid, err := e.frameValidator.Validate(frame)
if err != nil {
e.logger.Debug("frame validation error", zap.Error(err))
frameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
if !valid {
frameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
e.logger.Debug("invalid frame")
return p2p.ValidationResultReject
}
if frametime.AppFrameSince(frame) > 20*time.Second {
return p2p.ValidationResultIgnore
}
frameValidationTotal.WithLabelValues(e.appAddressHex, "accept").Inc()
default:
return p2p.ValidationResultReject
}
return p2p.ValidationResultAccept
}
func (e *AppConsensusEngine) validateGlobalFrameMessage(
_ peer.ID,
message *pb.Message,
) p2p.ValidationResult {
timer := prometheus.NewTimer(globalFrameValidationDuration)
defer timer.ObserveDuration()
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Debug("message too short", zap.Int("data_length", len(message.Data)))
globalFrameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
// Read type prefix from first 4 bytes
typePrefix := binary.BigEndian.Uint32(message.Data[:4])
switch typePrefix {
case protobufs.GlobalFrameType:
frame := &protobufs.GlobalFrame{}
if err := frame.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal frame", zap.Error(err))
globalFrameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
if frame.Header.PublicKeySignatureBls48581 == nil ||
frame.Header.PublicKeySignatureBls48581.PublicKey == nil ||
frame.Header.PublicKeySignatureBls48581.PublicKey.KeyValue == nil {
e.logger.Debug("frame validation missing signature")
globalFrameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
valid, err := e.globalFrameValidator.Validate(frame)
if err != nil {
e.logger.Debug("frame validation error", zap.Error(err))
globalFrameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
if !valid {
e.logger.Debug("invalid frame")
globalFrameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
if frametime.GlobalFrameSince(frame) > 20*time.Second {
return p2p.ValidationResultIgnore
}
globalFrameValidationTotal.WithLabelValues(e.appAddressHex, "accept").Inc()
default:
return p2p.ValidationResultReject
}
return p2p.ValidationResultAccept
}
func (e *AppConsensusEngine) validateAlertMessage(
_ peer.ID,
message *pb.Message,
) p2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Error(
"message too short",
zap.Int("data_length", len(message.Data)),
)
return p2p.ValidationResultReject
}
// Read type prefix from first 4 bytes
typePrefix := binary.BigEndian.Uint32(message.Data[:4])
switch typePrefix {
case protobufs.GlobalAlertType:
alert := &protobufs.GlobalAlert{}
if err := alert.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal alert", zap.Error(err))
return p2p.ValidationResultReject
}
err := alert.Validate()
if err != nil {
e.logger.Debug("alert validation error", zap.Error(err))
return p2p.ValidationResultReject
}
valid, err := e.keyManager.ValidateSignature(
crypto.KeyTypeEd448,
e.alertPublicKey,
[]byte(alert.Message),
alert.Signature,
[]byte("GLOBAL_ALERT"),
)
if !valid || err != nil {
e.logger.Debug("alert signature invalid")
return p2p.ValidationResultReject
}
default:
e.logger.Debug("received unknown type", zap.Uint32("type", typePrefix))
return p2p.ValidationResultIgnore
}
return p2p.ValidationResultAccept
}
func (e *AppConsensusEngine) validatePeerInfoMessage(
_ peer.ID,
message *pb.Message,
) p2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Error(
"message too short",
zap.Int("data_length", len(message.Data)),
)
return p2p.ValidationResultReject
}
// Read type prefix from first 4 bytes
typePrefix := binary.BigEndian.Uint32(message.Data[:4])
switch typePrefix {
case protobufs.PeerInfoType:
peerInfo := &protobufs.PeerInfo{}
if err := peerInfo.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal peer info", zap.Error(err))
return p2p.ValidationResultReject
}
err := peerInfo.Validate()
if err != nil {
e.logger.Debug("peer info validation error", zap.Error(err))
return p2p.ValidationResultReject
}
// Validate timestamp: reject if older than 1 minute or newer than 5 minutes
// from now
now := time.Now().UnixMilli()
oneMinuteAgo := now - (1 * 60 * 1000) // 1 minute ago
fiveMinutesLater := now + (5 * 60 * 1000) // 5 minutes from now
if peerInfo.Timestamp < oneMinuteAgo {
e.logger.Debug("peer info timestamp too old",
zap.Int64("peer_timestamp", peerInfo.Timestamp),
zap.Int64("cutoff", oneMinuteAgo),
)
return p2p.ValidationResultIgnore
}
if peerInfo.Timestamp > fiveMinutesLater {
e.logger.Debug("peer info timestamp too far in future",
zap.Int64("peer_timestamp", peerInfo.Timestamp),
zap.Int64("cutoff", fiveMinutesLater),
)
return p2p.ValidationResultIgnore
}
default:
e.logger.Debug("received unknown type", zap.Uint32("type", typePrefix))
return p2p.ValidationResultIgnore
}
return p2p.ValidationResultAccept
}
func (e *AppConsensusEngine) validateDispatchMessage(
_ peer.ID,
message *pb.Message,
) p2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Error(
"message too short",
zap.Int("data_length", len(message.Data)),
)
return p2p.ValidationResultReject
}
// Read type prefix from first 4 bytes
typePrefix := binary.BigEndian.Uint32(message.Data[:4])
switch typePrefix {
case protobufs.InboxMessageType:
envelope := &protobufs.InboxMessage{}
if err := envelope.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal envelope", zap.Error(err))
return p2p.ValidationResultReject
}
err := envelope.Validate()
if err != nil {
e.logger.Debug("envelope validation error", zap.Error(err))
return p2p.ValidationResultReject
}
if envelope.Timestamp < uint64(time.Now().UnixMilli())-2000 ||
envelope.Timestamp > uint64(time.Now().UnixMilli())+5000 {
return p2p.ValidationResultIgnore
}
case protobufs.HubAddInboxType:
envelope := &protobufs.HubAddInboxMessage{}
if err := envelope.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal envelope", zap.Error(err))
return p2p.ValidationResultReject
}
err := envelope.Validate()
if err != nil {
e.logger.Debug("envelope validation error", zap.Error(err))
return p2p.ValidationResultReject
}
case protobufs.HubDeleteInboxType:
envelope := &protobufs.HubDeleteInboxMessage{}
if err := envelope.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal envelope", zap.Error(err))
return p2p.ValidationResultReject
}
err := envelope.Validate()
if err != nil {
e.logger.Debug("envelope validation error", zap.Error(err))
return p2p.ValidationResultReject
}
default:
e.logger.Debug("received unknown type", zap.Uint32("type", typePrefix))
return p2p.ValidationResultIgnore
}
return p2p.ValidationResultAccept
}