ceremonyclient/node/p2p/internal/peer_connector.go
Cassandra Heart 53f7c2b5c9
v2.1.0.2 (#442)
* v2.1.0.2

* restore tweaks to simlibp2p

* fix: nil ref on size calc

* fix: panic should induce shutdown from event_distributor

* fix: friendlier initialization that requires less manual kickstarting for test/devnets

* fix: fewer available shards than provers should choose shard length

* fix: update stored worker registry, improve logging for debug mode

* fix: shut the fuck up, peer log

* qol: log value should be snake cased

* fix:non-archive snap sync issues

* fix: separate X448/Decaf448 signed keys, add onion key to registry

* fix: overflow arithmetic on frame number comparison

* fix: worker registration should be idempotent if inputs are same, otherwise permit updated records

* fix: remove global prover state from size calculation

* fix: divide by zero case

* fix: eager prover

* fix: broadcast listener default

* qol: diagnostic data for peer authenticator

* fix: master/worker connectivity issue in sparse networks

tight coupling of peer and workers can sometimes interfere if mesh is sparse, so give workers a pseudoidentity but publish messages with the proper peer key

* fix: reorder steps of join creation

* fix: join verify frame source + ensure domain is properly padded (unnecessary but good for consistency)

* fix: add delegate to protobuf <-> reified join conversion

* fix: preempt prover from planning with no workers

* fix: use the unallocated workers to generate a proof

* qol: underflow causes join fail in first ten frames on test/devnets

* qol: small logging tweaks for easier log correlation in debug mode

* qol: use fisher-yates shuffle to ensure prover allocations are evenly distributed when scores are equal

* qol: separate decisional logic on post-enrollment confirmation into consensus engine, proposer, and worker manager where relevant, refactor out scoring

* reuse shard descriptors for both join planning and confirm/reject decisions

* fix: add missing interface method and amend test blossomsub to use new peer id basis

* fix: only check allocations if they exist

* fix: pomw mint proof data needs to be hierarchically under global intrinsic domain

* staging temporary state under diagnostics

* fix: first phase of distributed lock refactoring

* fix: compute intrinsic locking

* fix: hypergraph intrinsic locking

* fix: token intrinsic locking

* fix: update execution engines to support new locking model

* fix: adjust tests with new execution shape

* fix: weave in lock/unlock semantics to liveness provider

* fix lock fallthrough, add missing allocation update

* qol: additional logging for diagnostics, also testnet/devnet handling for confirmations

* fix: establish grace period on halt scenario to permit recovery

* fix: support test/devnet defaults for coverage scenarios

* fix: nil ref on consensus halts for non-archive nodes

* fix: remove unnecessary prefix from prover ref

* add test coverage for fork choice behaviors and replay – once passing, blocker (2) is resolved

* fix: no fork replay on repeat for non-archive nodes, snap now behaves correctly

* rollup of pre-liveness check lock interactions

* ahead of tests, get the protobuf/metrics-related changes out so teams can prepare

* add test coverage for distributed lock behaviors – once passing, blocker (3) is resolved

* fix: blocker (3)

* Dev docs improvements (#445)

* Make install deps script more robust

* Improve testing instructions

* Worker node should stop upon OS SIGINT/SIGTERM signal (#447)

* move pebble close to Stop()

* move deferred Stop() to Start()

* add core id to worker stop log message

* create done os signal channel and stop worker upon message to it

---------

Co-authored-by: Cassandra Heart <7929478+CassOnMars@users.noreply.github.com>

---------

Co-authored-by: Daz <daz_the_corgi@proton.me>
Co-authored-by: Black Swan <3999712+blacks1ne@users.noreply.github.com>
2025-10-23 01:03:06 -05:00

304 lines
5.9 KiB
Go

package internal
import (
"context"
"sync"
"sync/atomic"
"time"
"github.com/libp2p/go-libp2p/core/host"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/libp2p/go-libp2p/core/peerstore"
"github.com/libp2p/go-libp2p/p2p/protocol/identify"
"go.uber.org/zap"
)
// PeerConnector is a connector to peers.
type PeerConnector interface {
// Connect connects to peers.
Connect(context.Context) error
}
// TODO(2.1.1+): metrics only, no debug logging unless configurable logging, too
// noisy
type peerConnector struct {
ctx context.Context
logger *zap.Logger
host host.Host
idService identify.IDService
connectCh chan (chan<- struct{})
minPeers int
parallelism int
source PeerSource
warned bool
}
// Connect implements PeerConnector.
func (pc *peerConnector) Connect(ctx context.Context) error {
if pc.minPeers == 0 && !pc.warned {
pc.warned = true
pc.logger.Warn(
"skipped due to minpeer = 0, make sure this was intentional",
)
return nil
}
done := make(chan struct{})
select {
case <-ctx.Done():
return ctx.Err()
case <-pc.ctx.Done():
return pc.ctx.Err()
case pc.connectCh <- done:
}
select {
case <-ctx.Done():
return ctx.Err()
case <-pc.ctx.Done():
return pc.ctx.Err()
case <-done:
return nil
}
}
func (pc *peerConnector) connectToPeer(
ctx context.Context,
logger *zap.Logger,
p peer.AddrInfo,
wg *sync.WaitGroup,
duplicate, success, failure *uint32,
inflight <-chan struct{},
) {
defer func() {
select {
case <-ctx.Done():
case <-inflight:
}
}()
defer wg.Done()
if p.ID == pc.host.ID() ||
pc.host.Network().Connectedness(p.ID) == network.Connected ||
pc.host.Network().Connectedness(p.ID) == network.Limited {
atomic.AddUint32(duplicate, 1)
return
}
pc.host.Peerstore().AddAddrs(p.ID, p.Addrs, peerstore.AddressTTL)
conn, err := pc.host.Network().DialPeer(ctx, p.ID)
if err != nil {
atomic.AddUint32(failure, 1)
return
}
select {
case <-ctx.Done():
return
case <-time.After(identify.DefaultTimeout / 2):
atomic.AddUint32(failure, 1)
_ = conn.Close()
case <-pc.idService.IdentifyWait(conn):
atomic.AddUint32(success, 1)
}
}
func (pc *peerConnector) connectToPeers(
ctx context.Context,
ch <-chan peer.AddrInfo,
duplicate, success, failure *uint32,
) {
var inflight chan struct{} = make(chan struct{}, pc.parallelism)
var wg sync.WaitGroup
defer wg.Wait()
for p := range ch {
logger := pc.logger.With(zap.String("peer_id", p.ID.String()))
if atomic.LoadUint32(success) >= uint32(pc.minPeers) {
return
}
select {
case <-ctx.Done():
return
case inflight <- struct{}{}:
}
wg.Add(1)
go pc.connectToPeer(
ctx,
logger,
p,
&wg,
duplicate,
success,
failure,
inflight,
)
}
}
func (pc *peerConnector) connect() {
logger := pc.logger
var success, failure, duplicate uint32
ctx, cancel := context.WithCancel(pc.ctx)
defer cancel()
peerChan, err := pc.source.Peers(ctx)
if err != nil {
logger.Error("could not find peers", zap.Error(err))
return
}
pc.connectToPeers(
ctx,
peerChan,
&duplicate,
&success,
&failure,
)
}
func (pc *peerConnector) run() {
for {
select {
case <-pc.ctx.Done():
return
case done := <-pc.connectCh:
pc.connect()
close(done)
}
}
}
// NewPeerConnector creates a new peer connector.
func NewPeerConnector(
ctx context.Context,
logger *zap.Logger,
host host.Host,
idService identify.IDService,
minPeers, parallelism int,
source PeerSource,
) PeerConnector {
pc := &peerConnector{
ctx: ctx,
logger: logger,
host: host,
idService: idService,
connectCh: make(chan (chan<- struct{})),
minPeers: minPeers,
parallelism: parallelism,
source: source,
}
go pc.run()
return pc
}
type chainedPeerConnector struct {
ctx context.Context
connectors []PeerConnector
connectCh chan (chan<- struct{})
}
// Connect implements PeerConnector.
func (cpc *chainedPeerConnector) Connect(ctx context.Context) error {
done := make(chan struct{})
select {
case <-ctx.Done():
return ctx.Err()
case <-cpc.ctx.Done():
return cpc.ctx.Err()
case cpc.connectCh <- done:
}
select {
case <-ctx.Done():
return ctx.Err()
case <-cpc.ctx.Done():
return cpc.ctx.Err()
case <-done:
return nil
}
}
func (cpc *chainedPeerConnector) run() {
for {
select {
case <-cpc.ctx.Done():
return
case done := <-cpc.connectCh:
for _, pc := range cpc.connectors {
_ = pc.Connect(cpc.ctx)
}
close(done)
}
}
}
// NewChainedPeerConnector creates a new chained peer connector.
func NewChainedPeerConnector(ctx context.Context, connectors ...PeerConnector) PeerConnector {
cpc := &chainedPeerConnector{
ctx: ctx,
connectors: connectors,
connectCh: make(chan (chan<- struct{})),
}
go cpc.run()
return cpc
}
type conditionalPeerConnector struct {
ctx context.Context
condition PeerConnectorCondition
connector PeerConnector
connectCh chan (chan<- struct{})
}
func (cpc *conditionalPeerConnector) run() {
for {
select {
case <-cpc.ctx.Done():
return
case done := <-cpc.connectCh:
if cpc.condition.Should() {
_ = cpc.connector.Connect(cpc.ctx)
}
close(done)
}
}
}
// Connect implements PeerConnector.
func (cpc *conditionalPeerConnector) Connect(ctx context.Context) error {
done := make(chan struct{})
select {
case <-ctx.Done():
return ctx.Err()
case <-cpc.ctx.Done():
return cpc.ctx.Err()
case cpc.connectCh <- done:
}
select {
case <-ctx.Done():
return ctx.Err()
case <-cpc.ctx.Done():
return cpc.ctx.Err()
case <-done:
return nil
}
}
// NewConditionalPeerConnector creates a new conditional peer connector.
func NewConditionalPeerConnector(
ctx context.Context,
condition PeerConnectorCondition,
connector PeerConnector,
) PeerConnector {
cpc := &conditionalPeerConnector{
ctx: ctx,
condition: condition,
connector: connector,
connectCh: make(chan (chan<- struct{})),
}
go cpc.run()
return cpc
}